]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - drivers/gpu/drm/i915/intel_ringbuffer.c
drm/i915: Move engine->submit_request selection to a vfunc
[mirror_ubuntu-hirsute-kernel.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
CommitLineData
62fdfeaf
EA
1/*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
a4d8a0fe 30#include <linux/log2.h>
760285e7 31#include <drm/drmP.h>
62fdfeaf 32#include "i915_drv.h"
760285e7 33#include <drm/i915_drm.h>
62fdfeaf 34#include "i915_trace.h"
881f47b6 35#include "intel_drv.h"
62fdfeaf 36
a0442461
CW
37/* Rough estimate of the typical request size, performing a flush,
38 * set-context and then emitting the batch.
39 */
40#define LEGACY_REQUEST_SIZE 200
41
2f35afe9 42static int __intel_ring_space(int head, int tail, int size)
c7dca47b 43{
4f54741e
DG
44 int space = head - tail;
45 if (space <= 0)
1cf0ba14 46 space += size;
4f54741e 47 return space - I915_RING_FREE_SPACE;
c7dca47b
CW
48}
49
32c04f16 50void intel_ring_update_space(struct intel_ring *ring)
ebd0fd4b 51{
32c04f16
CW
52 if (ring->last_retired_head != -1) {
53 ring->head = ring->last_retired_head;
54 ring->last_retired_head = -1;
ebd0fd4b
DG
55 }
56
32c04f16
CW
57 ring->space = __intel_ring_space(ring->head & HEAD_ADDR,
58 ring->tail, ring->size);
ebd0fd4b
DG
59}
60
b72f3acb 61static int
7c9cf4e3 62gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
46f0f8d1 63{
73dec95e 64 u32 cmd, *cs;
46f0f8d1
CW
65
66 cmd = MI_FLUSH;
46f0f8d1 67
7c9cf4e3 68 if (mode & EMIT_INVALIDATE)
46f0f8d1
CW
69 cmd |= MI_READ_FLUSH;
70
73dec95e
TU
71 cs = intel_ring_begin(req, 2);
72 if (IS_ERR(cs))
73 return PTR_ERR(cs);
46f0f8d1 74
73dec95e
TU
75 *cs++ = cmd;
76 *cs++ = MI_NOOP;
77 intel_ring_advance(req, cs);
46f0f8d1
CW
78
79 return 0;
80}
81
82static int
7c9cf4e3 83gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
62fdfeaf 84{
73dec95e 85 u32 cmd, *cs;
6f392d54 86
36d527de
CW
87 /*
88 * read/write caches:
89 *
90 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
91 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
92 * also flushed at 2d versus 3d pipeline switches.
93 *
94 * read-only caches:
95 *
96 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
97 * MI_READ_FLUSH is set, and is always flushed on 965.
98 *
99 * I915_GEM_DOMAIN_COMMAND may not exist?
100 *
101 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
102 * invalidated when MI_EXE_FLUSH is set.
103 *
104 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
105 * invalidated with every MI_FLUSH.
106 *
107 * TLBs:
108 *
109 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
110 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
111 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
112 * are flushed at any MI_FLUSH.
113 */
114
b5321f30 115 cmd = MI_FLUSH;
7c9cf4e3 116 if (mode & EMIT_INVALIDATE) {
36d527de 117 cmd |= MI_EXE_FLUSH;
b5321f30
CW
118 if (IS_G4X(req->i915) || IS_GEN5(req->i915))
119 cmd |= MI_INVALIDATE_ISP;
120 }
70eac33e 121
73dec95e
TU
122 cs = intel_ring_begin(req, 2);
123 if (IS_ERR(cs))
124 return PTR_ERR(cs);
b72f3acb 125
73dec95e
TU
126 *cs++ = cmd;
127 *cs++ = MI_NOOP;
128 intel_ring_advance(req, cs);
b72f3acb
CW
129
130 return 0;
8187a2b7
ZN
131}
132
8d315287
JB
133/**
134 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
135 * implementing two workarounds on gen6. From section 1.4.7.1
136 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
137 *
138 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
139 * produced by non-pipelined state commands), software needs to first
140 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
141 * 0.
142 *
143 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
144 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
145 *
146 * And the workaround for these two requires this workaround first:
147 *
148 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
149 * BEFORE the pipe-control with a post-sync op and no write-cache
150 * flushes.
151 *
152 * And this last workaround is tricky because of the requirements on
153 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
154 * volume 2 part 1:
155 *
156 * "1 of the following must also be set:
157 * - Render Target Cache Flush Enable ([12] of DW1)
158 * - Depth Cache Flush Enable ([0] of DW1)
159 * - Stall at Pixel Scoreboard ([1] of DW1)
160 * - Depth Stall ([13] of DW1)
161 * - Post-Sync Operation ([13] of DW1)
162 * - Notify Enable ([8] of DW1)"
163 *
164 * The cache flushes require the workaround flush that triggered this
165 * one, so we can't use it. Depth stall would trigger the same.
166 * Post-sync nonzero is what triggered this second workaround, so we
167 * can't use that one either. Notify enable is IRQs, which aren't
168 * really our business. That leaves only stall at scoreboard.
169 */
170static int
f2cf1fcc 171intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
8d315287 172{
b5321f30 173 u32 scratch_addr =
bde13ebd 174 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
73dec95e
TU
175 u32 *cs;
176
177 cs = intel_ring_begin(req, 6);
178 if (IS_ERR(cs))
179 return PTR_ERR(cs);
180
181 *cs++ = GFX_OP_PIPE_CONTROL(5);
182 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
183 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
184 *cs++ = 0; /* low dword */
185 *cs++ = 0; /* high dword */
186 *cs++ = MI_NOOP;
187 intel_ring_advance(req, cs);
188
189 cs = intel_ring_begin(req, 6);
190 if (IS_ERR(cs))
191 return PTR_ERR(cs);
192
193 *cs++ = GFX_OP_PIPE_CONTROL(5);
194 *cs++ = PIPE_CONTROL_QW_WRITE;
195 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
196 *cs++ = 0;
197 *cs++ = 0;
198 *cs++ = MI_NOOP;
199 intel_ring_advance(req, cs);
8d315287
JB
200
201 return 0;
202}
203
204static int
7c9cf4e3 205gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
8d315287 206{
b5321f30 207 u32 scratch_addr =
bde13ebd 208 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
73dec95e 209 u32 *cs, flags = 0;
8d315287
JB
210 int ret;
211
b3111509 212 /* Force SNB workarounds for PIPE_CONTROL flushes */
f2cf1fcc 213 ret = intel_emit_post_sync_nonzero_flush(req);
b3111509
PZ
214 if (ret)
215 return ret;
216
8d315287
JB
217 /* Just flush everything. Experiments have shown that reducing the
218 * number of bits based on the write domains has little performance
219 * impact.
220 */
7c9cf4e3 221 if (mode & EMIT_FLUSH) {
7d54a904
CW
222 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
223 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
224 /*
225 * Ensure that any following seqno writes only happen
226 * when the render cache is indeed flushed.
227 */
97f209bc 228 flags |= PIPE_CONTROL_CS_STALL;
7d54a904 229 }
7c9cf4e3 230 if (mode & EMIT_INVALIDATE) {
7d54a904
CW
231 flags |= PIPE_CONTROL_TLB_INVALIDATE;
232 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
233 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
234 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
235 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
236 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
237 /*
238 * TLB invalidate requires a post-sync write.
239 */
3ac78313 240 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
7d54a904 241 }
8d315287 242
73dec95e
TU
243 cs = intel_ring_begin(req, 4);
244 if (IS_ERR(cs))
245 return PTR_ERR(cs);
8d315287 246
73dec95e
TU
247 *cs++ = GFX_OP_PIPE_CONTROL(4);
248 *cs++ = flags;
249 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
250 *cs++ = 0;
251 intel_ring_advance(req, cs);
8d315287
JB
252
253 return 0;
254}
255
f3987631 256static int
f2cf1fcc 257gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
f3987631 258{
73dec95e 259 u32 *cs;
f3987631 260
73dec95e
TU
261 cs = intel_ring_begin(req, 4);
262 if (IS_ERR(cs))
263 return PTR_ERR(cs);
f3987631 264
73dec95e
TU
265 *cs++ = GFX_OP_PIPE_CONTROL(4);
266 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
267 *cs++ = 0;
268 *cs++ = 0;
269 intel_ring_advance(req, cs);
f3987631
PZ
270
271 return 0;
272}
273
4772eaeb 274static int
7c9cf4e3 275gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
4772eaeb 276{
b5321f30 277 u32 scratch_addr =
bde13ebd 278 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
73dec95e 279 u32 *cs, flags = 0;
4772eaeb 280
f3987631
PZ
281 /*
282 * Ensure that any following seqno writes only happen when the render
283 * cache is indeed flushed.
284 *
285 * Workaround: 4th PIPE_CONTROL command (except the ones with only
286 * read-cache invalidate bits set) must have the CS_STALL bit set. We
287 * don't try to be clever and just set it unconditionally.
288 */
289 flags |= PIPE_CONTROL_CS_STALL;
290
4772eaeb
PZ
291 /* Just flush everything. Experiments have shown that reducing the
292 * number of bits based on the write domains has little performance
293 * impact.
294 */
7c9cf4e3 295 if (mode & EMIT_FLUSH) {
4772eaeb
PZ
296 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
297 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 298 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 299 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4772eaeb 300 }
7c9cf4e3 301 if (mode & EMIT_INVALIDATE) {
4772eaeb
PZ
302 flags |= PIPE_CONTROL_TLB_INVALIDATE;
303 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
304 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
305 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
306 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
307 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
148b83d0 308 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
4772eaeb
PZ
309 /*
310 * TLB invalidate requires a post-sync write.
311 */
312 flags |= PIPE_CONTROL_QW_WRITE;
b9e1faa7 313 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
f3987631 314
add284a3
CW
315 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
316
f3987631
PZ
317 /* Workaround: we must issue a pipe_control with CS-stall bit
318 * set before a pipe_control command that has the state cache
319 * invalidate bit set. */
f2cf1fcc 320 gen7_render_ring_cs_stall_wa(req);
4772eaeb
PZ
321 }
322
73dec95e
TU
323 cs = intel_ring_begin(req, 4);
324 if (IS_ERR(cs))
325 return PTR_ERR(cs);
4772eaeb 326
73dec95e
TU
327 *cs++ = GFX_OP_PIPE_CONTROL(4);
328 *cs++ = flags;
329 *cs++ = scratch_addr;
330 *cs++ = 0;
331 intel_ring_advance(req, cs);
4772eaeb
PZ
332
333 return 0;
334}
335
884ceace 336static int
9f235dfa 337gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
884ceace 338{
9f235dfa 339 u32 flags;
73dec95e 340 u32 *cs;
884ceace 341
9f235dfa 342 cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6);
73dec95e
TU
343 if (IS_ERR(cs))
344 return PTR_ERR(cs);
884ceace 345
9f235dfa 346 flags = PIPE_CONTROL_CS_STALL;
a5f3d68e 347
7c9cf4e3 348 if (mode & EMIT_FLUSH) {
a5f3d68e
BW
349 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
350 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 351 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 352 flags |= PIPE_CONTROL_FLUSH_ENABLE;
a5f3d68e 353 }
7c9cf4e3 354 if (mode & EMIT_INVALIDATE) {
a5f3d68e
BW
355 flags |= PIPE_CONTROL_TLB_INVALIDATE;
356 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
357 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
358 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
359 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
360 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
361 flags |= PIPE_CONTROL_QW_WRITE;
362 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
02c9f7e3
KG
363
364 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
9f235dfa
TU
365 cs = gen8_emit_pipe_control(cs,
366 PIPE_CONTROL_CS_STALL |
367 PIPE_CONTROL_STALL_AT_SCOREBOARD,
368 0);
a5f3d68e
BW
369 }
370
9f235dfa
TU
371 cs = gen8_emit_pipe_control(cs, flags,
372 i915_ggtt_offset(req->engine->scratch) +
373 2 * CACHELINE_BYTES);
374
375 intel_ring_advance(req, cs);
376
377 return 0;
a5f3d68e
BW
378}
379
0bc40be8 380static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
035dc1e0 381{
c033666a 382 struct drm_i915_private *dev_priv = engine->i915;
035dc1e0
DV
383 u32 addr;
384
385 addr = dev_priv->status_page_dmah->busaddr;
c033666a 386 if (INTEL_GEN(dev_priv) >= 4)
035dc1e0
DV
387 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
388 I915_WRITE(HWS_PGA, addr);
389}
390
0bc40be8 391static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
af75f269 392{
c033666a 393 struct drm_i915_private *dev_priv = engine->i915;
f0f59a00 394 i915_reg_t mmio;
af75f269
DL
395
396 /* The ring status page addresses are no longer next to the rest of
397 * the ring registers as of gen7.
398 */
c033666a 399 if (IS_GEN7(dev_priv)) {
0bc40be8 400 switch (engine->id) {
af75f269
DL
401 case RCS:
402 mmio = RENDER_HWS_PGA_GEN7;
403 break;
404 case BCS:
405 mmio = BLT_HWS_PGA_GEN7;
406 break;
407 /*
408 * VCS2 actually doesn't exist on Gen7. Only shut up
409 * gcc switch check warning
410 */
411 case VCS2:
412 case VCS:
413 mmio = BSD_HWS_PGA_GEN7;
414 break;
415 case VECS:
416 mmio = VEBOX_HWS_PGA_GEN7;
417 break;
418 }
c033666a 419 } else if (IS_GEN6(dev_priv)) {
0bc40be8 420 mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
af75f269
DL
421 } else {
422 /* XXX: gen8 returns to sanity */
0bc40be8 423 mmio = RING_HWS_PGA(engine->mmio_base);
af75f269
DL
424 }
425
57e88531 426 I915_WRITE(mmio, engine->status_page.ggtt_offset);
af75f269
DL
427 POSTING_READ(mmio);
428
429 /*
430 * Flush the TLB for this page
431 *
432 * FIXME: These two bits have disappeared on gen8, so a question
433 * arises: do we still need this and if so how should we go about
434 * invalidating the TLB?
435 */
ac657f64 436 if (IS_GEN(dev_priv, 6, 7)) {
0bc40be8 437 i915_reg_t reg = RING_INSTPM(engine->mmio_base);
af75f269
DL
438
439 /* ring should be idle before issuing a sync flush*/
0bc40be8 440 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
af75f269
DL
441
442 I915_WRITE(reg,
443 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
444 INSTPM_SYNC_FLUSH));
25ab57f4
CW
445 if (intel_wait_for_register(dev_priv,
446 reg, INSTPM_SYNC_FLUSH, 0,
447 1000))
af75f269 448 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
0bc40be8 449 engine->name);
af75f269
DL
450 }
451}
452
0bc40be8 453static bool stop_ring(struct intel_engine_cs *engine)
8187a2b7 454{
c033666a 455 struct drm_i915_private *dev_priv = engine->i915;
8187a2b7 456
21a2c58a 457 if (INTEL_GEN(dev_priv) > 2) {
0bc40be8 458 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
3d808eb1
CW
459 if (intel_wait_for_register(dev_priv,
460 RING_MI_MODE(engine->mmio_base),
461 MODE_IDLE,
462 MODE_IDLE,
463 1000)) {
0bc40be8
TU
464 DRM_ERROR("%s : timed out trying to stop ring\n",
465 engine->name);
9bec9b13
CW
466 /* Sometimes we observe that the idle flag is not
467 * set even though the ring is empty. So double
468 * check before giving up.
469 */
0bc40be8 470 if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine))
9bec9b13 471 return false;
9991ae78
CW
472 }
473 }
b7884eb4 474
0bc40be8
TU
475 I915_WRITE_CTL(engine, 0);
476 I915_WRITE_HEAD(engine, 0);
c5efa1ad 477 I915_WRITE_TAIL(engine, 0);
8187a2b7 478
21a2c58a 479 if (INTEL_GEN(dev_priv) > 2) {
0bc40be8
TU
480 (void)I915_READ_CTL(engine);
481 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
9991ae78 482 }
a51435a3 483
0bc40be8 484 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0;
9991ae78 485}
8187a2b7 486
0bc40be8 487static int init_ring_common(struct intel_engine_cs *engine)
9991ae78 488{
c033666a 489 struct drm_i915_private *dev_priv = engine->i915;
7e37f889 490 struct intel_ring *ring = engine->buffer;
9991ae78
CW
491 int ret = 0;
492
59bad947 493 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
9991ae78 494
0bc40be8 495 if (!stop_ring(engine)) {
9991ae78 496 /* G45 ring initialization often fails to reset head to zero */
6fd0d56e
CW
497 DRM_DEBUG_KMS("%s head not reset to zero "
498 "ctl %08x head %08x tail %08x start %08x\n",
0bc40be8
TU
499 engine->name,
500 I915_READ_CTL(engine),
501 I915_READ_HEAD(engine),
502 I915_READ_TAIL(engine),
503 I915_READ_START(engine));
8187a2b7 504
0bc40be8 505 if (!stop_ring(engine)) {
6fd0d56e
CW
506 DRM_ERROR("failed to set %s head to zero "
507 "ctl %08x head %08x tail %08x start %08x\n",
0bc40be8
TU
508 engine->name,
509 I915_READ_CTL(engine),
510 I915_READ_HEAD(engine),
511 I915_READ_TAIL(engine),
512 I915_READ_START(engine));
9991ae78
CW
513 ret = -EIO;
514 goto out;
6fd0d56e 515 }
8187a2b7
ZN
516 }
517
3177659a 518 if (HWS_NEEDS_PHYSICAL(dev_priv))
0bc40be8 519 ring_setup_phys_status_page(engine);
3177659a
CS
520 else
521 intel_ring_setup_status_page(engine);
9991ae78 522
ad07dfcd 523 intel_engine_reset_breadcrumbs(engine);
821ed7df 524
ece4a17d 525 /* Enforce ordering by reading HEAD register back */
0bc40be8 526 I915_READ_HEAD(engine);
ece4a17d 527
0d8957c8
DV
528 /* Initialize the ring. This must happen _after_ we've cleared the ring
529 * registers with the above sequence (the readback of the HEAD registers
530 * also enforces ordering), otherwise the hw might lose the new ring
531 * register values. */
bde13ebd 532 I915_WRITE_START(engine, i915_ggtt_offset(ring->vma));
95468892
CW
533
534 /* WaClearRingBufHeadRegAtInit:ctg,elk */
0bc40be8 535 if (I915_READ_HEAD(engine))
95468892 536 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
0bc40be8 537 engine->name, I915_READ_HEAD(engine));
821ed7df
CW
538
539 intel_ring_update_space(ring);
540 I915_WRITE_HEAD(engine, ring->head);
541 I915_WRITE_TAIL(engine, ring->tail);
542 (void)I915_READ_TAIL(engine);
95468892 543
62ae14b1 544 I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID);
8187a2b7 545
8187a2b7 546 /* If the head is still not zero, the ring is dead */
821ed7df
CW
547 if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
548 RING_VALID, RING_VALID,
549 50)) {
e74cfed5 550 DRM_ERROR("%s initialization failed "
821ed7df 551 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
0bc40be8
TU
552 engine->name,
553 I915_READ_CTL(engine),
554 I915_READ_CTL(engine) & RING_VALID,
821ed7df
CW
555 I915_READ_HEAD(engine), ring->head,
556 I915_READ_TAIL(engine), ring->tail,
0bc40be8 557 I915_READ_START(engine),
bde13ebd 558 i915_ggtt_offset(ring->vma));
b7884eb4
DV
559 ret = -EIO;
560 goto out;
8187a2b7
ZN
561 }
562
fc0768ce 563 intel_engine_init_hangcheck(engine);
50f018df 564
b7884eb4 565out:
59bad947 566 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
b7884eb4
DV
567
568 return ret;
8187a2b7
ZN
569}
570
821ed7df
CW
571static void reset_ring_common(struct intel_engine_cs *engine,
572 struct drm_i915_gem_request *request)
573{
c0dcb203
CW
574 /* Try to restore the logical GPU state to match the continuation
575 * of the request queue. If we skip the context/PD restore, then
576 * the next request may try to execute assuming that its context
577 * is valid and loaded on the GPU and so may try to access invalid
578 * memory, prompting repeated GPU hangs.
579 *
580 * If the request was guilty, we still restore the logical state
581 * in case the next request requires it (e.g. the aliasing ppgtt),
582 * but skip over the hung batch.
583 *
584 * If the request was innocent, we try to replay the request with
585 * the restored context.
586 */
587 if (request) {
588 struct drm_i915_private *dev_priv = request->i915;
589 struct intel_context *ce = &request->ctx->engine[engine->id];
590 struct i915_hw_ppgtt *ppgtt;
591
592 /* FIXME consider gen8 reset */
593
594 if (ce->state) {
595 I915_WRITE(CCID,
596 i915_ggtt_offset(ce->state) |
597 BIT(8) /* must be set! */ |
598 CCID_EXTENDED_STATE_SAVE |
599 CCID_EXTENDED_STATE_RESTORE |
600 CCID_EN);
601 }
602
603 ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
604 if (ppgtt) {
605 u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
606
607 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
608 I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
609
610 /* Wait for the PD reload to complete */
611 if (intel_wait_for_register(dev_priv,
612 RING_PP_DIR_BASE(engine),
613 BIT(0), 0,
614 10))
615 DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n");
821ed7df 616
c0dcb203
CW
617 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
618 }
619
620 /* If the rq hung, jump to its breadcrumb and skip the batch */
621 if (request->fence.error == -EIO) {
622 struct intel_ring *ring = request->ring;
623
624 ring->head = request->postfix;
625 ring->last_retired_head = -1;
626 }
627 } else {
628 engine->legacy_active_context = NULL;
629 }
821ed7df
CW
630}
631
8753181e 632static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
8f0e2b9d
DV
633{
634 int ret;
635
e2be4faf 636 ret = intel_ring_workarounds_emit(req);
8f0e2b9d
DV
637 if (ret != 0)
638 return ret;
639
4e50f082 640 ret = i915_gem_render_state_emit(req);
8f0e2b9d 641 if (ret)
e26e1b97 642 return ret;
8f0e2b9d 643
e26e1b97 644 return 0;
8f0e2b9d
DV
645}
646
0bc40be8 647static int init_render_ring(struct intel_engine_cs *engine)
8187a2b7 648{
c033666a 649 struct drm_i915_private *dev_priv = engine->i915;
0bc40be8 650 int ret = init_ring_common(engine);
9c33baa6
KZ
651 if (ret)
652 return ret;
a69ffdbf 653
61a563a2 654 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
ac657f64 655 if (IS_GEN(dev_priv, 4, 6))
6b26c86d 656 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1c8c38c5
CW
657
658 /* We need to disable the AsyncFlip performance optimisations in order
659 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
660 * programmed to '1' on all products.
8693a824 661 *
2441f877 662 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1c8c38c5 663 */
ac657f64 664 if (IS_GEN(dev_priv, 6, 7))
1c8c38c5
CW
665 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
666
f05bb0c7 667 /* Required for the hardware to program scanline values for waiting */
01fa0302 668 /* WaEnableFlushTlbInvalidationMode:snb */
c033666a 669 if (IS_GEN6(dev_priv))
f05bb0c7 670 I915_WRITE(GFX_MODE,
aa83e30d 671 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
f05bb0c7 672
01fa0302 673 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
c033666a 674 if (IS_GEN7(dev_priv))
1c8c38c5 675 I915_WRITE(GFX_MODE_GEN7,
01fa0302 676 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1c8c38c5 677 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
78501eac 678
c033666a 679 if (IS_GEN6(dev_priv)) {
3a69ddd6
KG
680 /* From the Sandybridge PRM, volume 1 part 3, page 24:
681 * "If this bit is set, STCunit will have LRA as replacement
682 * policy. [...] This bit must be reset. LRA replacement
683 * policy is not supported."
684 */
685 I915_WRITE(CACHE_MODE_0,
5e13a0c5 686 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
84f9f938
BW
687 }
688
ac657f64 689 if (IS_GEN(dev_priv, 6, 7))
6b26c86d 690 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
84f9f938 691
035ea405
VS
692 if (INTEL_INFO(dev_priv)->gen >= 6)
693 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
15b9f80e 694
0bc40be8 695 return init_workarounds_ring(engine);
8187a2b7
ZN
696}
697
0bc40be8 698static void render_ring_cleanup(struct intel_engine_cs *engine)
c6df541c 699{
c033666a 700 struct drm_i915_private *dev_priv = engine->i915;
3e78998a 701
19880c4a 702 i915_vma_unpin_and_release(&dev_priv->semaphore);
c6df541c
CW
703}
704
73dec95e 705static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs)
3e78998a 706{
ad7bdb2b 707 struct drm_i915_private *dev_priv = req->i915;
3e78998a 708 struct intel_engine_cs *waiter;
c3232b18 709 enum intel_engine_id id;
3e78998a 710
3b3f1650 711 for_each_engine(waiter, dev_priv, id) {
ad7bdb2b 712 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
3e78998a
BW
713 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
714 continue;
715
73dec95e
TU
716 *cs++ = GFX_OP_PIPE_CONTROL(6);
717 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE |
718 PIPE_CONTROL_CS_STALL;
719 *cs++ = lower_32_bits(gtt_offset);
720 *cs++ = upper_32_bits(gtt_offset);
721 *cs++ = req->global_seqno;
722 *cs++ = 0;
723 *cs++ = MI_SEMAPHORE_SIGNAL |
724 MI_SEMAPHORE_TARGET(waiter->hw_id);
725 *cs++ = 0;
3e78998a
BW
726 }
727
73dec95e 728 return cs;
3e78998a
BW
729}
730
73dec95e 731static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs)
3e78998a 732{
ad7bdb2b 733 struct drm_i915_private *dev_priv = req->i915;
3e78998a 734 struct intel_engine_cs *waiter;
c3232b18 735 enum intel_engine_id id;
3e78998a 736
3b3f1650 737 for_each_engine(waiter, dev_priv, id) {
ad7bdb2b 738 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
3e78998a
BW
739 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
740 continue;
741
73dec95e
TU
742 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
743 *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
744 *cs++ = upper_32_bits(gtt_offset);
745 *cs++ = req->global_seqno;
746 *cs++ = MI_SEMAPHORE_SIGNAL |
747 MI_SEMAPHORE_TARGET(waiter->hw_id);
748 *cs++ = 0;
3e78998a
BW
749 }
750
73dec95e 751 return cs;
3e78998a
BW
752}
753
73dec95e 754static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
1ec14ad3 755{
ad7bdb2b 756 struct drm_i915_private *dev_priv = req->i915;
318f89ca 757 struct intel_engine_cs *engine;
3b3f1650 758 enum intel_engine_id id;
caddfe71 759 int num_rings = 0;
024a43e1 760
3b3f1650 761 for_each_engine(engine, dev_priv, id) {
318f89ca
TU
762 i915_reg_t mbox_reg;
763
764 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
765 continue;
f0f59a00 766
318f89ca 767 mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
f0f59a00 768 if (i915_mmio_reg_valid(mbox_reg)) {
73dec95e
TU
769 *cs++ = MI_LOAD_REGISTER_IMM(1);
770 *cs++ = i915_mmio_reg_offset(mbox_reg);
771 *cs++ = req->global_seqno;
caddfe71 772 num_rings++;
78325f2d
BW
773 }
774 }
caddfe71 775 if (num_rings & 1)
73dec95e 776 *cs++ = MI_NOOP;
024a43e1 777
73dec95e 778 return cs;
1ec14ad3
CW
779}
780
b0411e7d
CW
781static void i9xx_submit_request(struct drm_i915_gem_request *request)
782{
783 struct drm_i915_private *dev_priv = request->i915;
784
d55ac5bf
CW
785 i915_gem_request_submit(request);
786
944a36d4 787 GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
caddfe71 788 I915_WRITE_TAIL(request->engine, request->tail);
b0411e7d
CW
789}
790
73dec95e 791static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
1ec14ad3 792{
73dec95e
TU
793 *cs++ = MI_STORE_DWORD_INDEX;
794 *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
795 *cs++ = req->global_seqno;
796 *cs++ = MI_USER_INTERRUPT;
1ec14ad3 797
73dec95e 798 req->tail = intel_ring_offset(req, cs);
944a36d4 799 GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
1ec14ad3
CW
800}
801
98f29e8d
CW
802static const int i9xx_emit_breadcrumb_sz = 4;
803
b0411e7d 804/**
9b81d556 805 * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers
b0411e7d
CW
806 *
807 * @request - request to write to the ring
808 *
809 * Update the mailbox registers in the *other* rings with the current seqno.
810 * This acts like a signal in the canonical semaphore.
811 */
73dec95e 812static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
b0411e7d 813{
caddfe71 814 return i9xx_emit_breadcrumb(req,
73dec95e 815 req->engine->semaphore.signal(req, cs));
b0411e7d
CW
816}
817
caddfe71 818static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
73dec95e 819 u32 *cs)
a58c01aa
CW
820{
821 struct intel_engine_cs *engine = req->engine;
9242f974 822
caddfe71 823 if (engine->semaphore.signal)
73dec95e
TU
824 cs = engine->semaphore.signal(req, cs);
825
826 *cs++ = GFX_OP_PIPE_CONTROL(6);
827 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
828 PIPE_CONTROL_QW_WRITE;
829 *cs++ = intel_hws_seqno_address(engine);
830 *cs++ = 0;
831 *cs++ = req->global_seqno;
a58c01aa 832 /* We're thrashing one dword of HWS. */
73dec95e
TU
833 *cs++ = 0;
834 *cs++ = MI_USER_INTERRUPT;
835 *cs++ = MI_NOOP;
a58c01aa 836
73dec95e 837 req->tail = intel_ring_offset(req, cs);
944a36d4 838 GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
a58c01aa
CW
839}
840
98f29e8d
CW
841static const int gen8_render_emit_breadcrumb_sz = 8;
842
c8c99b0f
BW
843/**
844 * intel_ring_sync - sync the waiter to the signaller on seqno
845 *
846 * @waiter - ring that is waiting
847 * @signaller - ring which has, or will signal
848 * @seqno - seqno which the waiter will block on
849 */
5ee426ca
BW
850
851static int
ad7bdb2b
CW
852gen8_ring_sync_to(struct drm_i915_gem_request *req,
853 struct drm_i915_gem_request *signal)
5ee426ca 854{
ad7bdb2b
CW
855 struct drm_i915_private *dev_priv = req->i915;
856 u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id);
6ef48d7f 857 struct i915_hw_ppgtt *ppgtt;
73dec95e 858 u32 *cs;
5ee426ca 859
73dec95e
TU
860 cs = intel_ring_begin(req, 4);
861 if (IS_ERR(cs))
862 return PTR_ERR(cs);
5ee426ca 863
73dec95e
TU
864 *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT |
865 MI_SEMAPHORE_SAD_GTE_SDD;
866 *cs++ = signal->global_seqno;
867 *cs++ = lower_32_bits(offset);
868 *cs++ = upper_32_bits(offset);
869 intel_ring_advance(req, cs);
6ef48d7f
CW
870
871 /* When the !RCS engines idle waiting upon a semaphore, they lose their
872 * pagetables and we must reload them before executing the batch.
873 * We do this on the i915_switch_context() following the wait and
874 * before the dispatch.
875 */
ad7bdb2b
CW
876 ppgtt = req->ctx->ppgtt;
877 if (ppgtt && req->engine->id != RCS)
878 ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine);
5ee426ca
BW
879 return 0;
880}
881
c8c99b0f 882static int
ad7bdb2b
CW
883gen6_ring_sync_to(struct drm_i915_gem_request *req,
884 struct drm_i915_gem_request *signal)
1ec14ad3 885{
c8c99b0f
BW
886 u32 dw1 = MI_SEMAPHORE_MBOX |
887 MI_SEMAPHORE_COMPARE |
888 MI_SEMAPHORE_REGISTER;
318f89ca 889 u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id];
73dec95e 890 u32 *cs;
1ec14ad3 891
ebc348b2 892 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
686cb5f9 893
73dec95e
TU
894 cs = intel_ring_begin(req, 4);
895 if (IS_ERR(cs))
896 return PTR_ERR(cs);
1ec14ad3 897
73dec95e 898 *cs++ = dw1 | wait_mbox;
ddf07be7
CW
899 /* Throughout all of the GEM code, seqno passed implies our current
900 * seqno is >= the last seqno executed. However for hardware the
901 * comparison is strictly greater than.
902 */
73dec95e
TU
903 *cs++ = signal->global_seqno - 1;
904 *cs++ = 0;
905 *cs++ = MI_NOOP;
906 intel_ring_advance(req, cs);
1ec14ad3
CW
907
908 return 0;
909}
910
f8973c21 911static void
38a0f2db 912gen5_seqno_barrier(struct intel_engine_cs *engine)
c6df541c 913{
f8973c21
CW
914 /* MI_STORE are internally buffered by the GPU and not flushed
915 * either by MI_FLUSH or SyncFlush or any other combination of
916 * MI commands.
c6df541c 917 *
f8973c21
CW
918 * "Only the submission of the store operation is guaranteed.
919 * The write result will be complete (coherent) some time later
920 * (this is practically a finite period but there is no guaranteed
921 * latency)."
922 *
923 * Empirically, we observe that we need a delay of at least 75us to
924 * be sure that the seqno write is visible by the CPU.
c6df541c 925 */
f8973c21 926 usleep_range(125, 250);
c6df541c
CW
927}
928
c04e0f3b
CW
929static void
930gen6_seqno_barrier(struct intel_engine_cs *engine)
4cd53c0c 931{
c033666a 932 struct drm_i915_private *dev_priv = engine->i915;
bcbdb6d0 933
4cd53c0c
DV
934 /* Workaround to force correct ordering between irq and seqno writes on
935 * ivb (and maybe also on snb) by reading from a CS register (like
9b9ed309
CW
936 * ACTHD) before reading the status page.
937 *
938 * Note that this effectively stalls the read by the time it takes to
939 * do a memory transaction, which more or less ensures that the write
940 * from the GPU has sufficient time to invalidate the CPU cacheline.
941 * Alternatively we could delay the interrupt from the CS ring to give
942 * the write time to land, but that would incur a delay after every
943 * batch i.e. much more frequent than a delay when waiting for the
944 * interrupt (with the same net latency).
bcbdb6d0
CW
945 *
946 * Also note that to prevent whole machine hangs on gen7, we have to
947 * take the spinlock to guard against concurrent cacheline access.
9b9ed309 948 */
bcbdb6d0 949 spin_lock_irq(&dev_priv->uncore.lock);
c04e0f3b 950 POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
bcbdb6d0 951 spin_unlock_irq(&dev_priv->uncore.lock);
4cd53c0c
DV
952}
953
31bb59cc
CW
954static void
955gen5_irq_enable(struct intel_engine_cs *engine)
e48d8634 956{
31bb59cc 957 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
958}
959
960static void
31bb59cc 961gen5_irq_disable(struct intel_engine_cs *engine)
e48d8634 962{
31bb59cc 963 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
964}
965
31bb59cc
CW
966static void
967i9xx_irq_enable(struct intel_engine_cs *engine)
62fdfeaf 968{
c033666a 969 struct drm_i915_private *dev_priv = engine->i915;
b13c2b96 970
31bb59cc
CW
971 dev_priv->irq_mask &= ~engine->irq_enable_mask;
972 I915_WRITE(IMR, dev_priv->irq_mask);
973 POSTING_READ_FW(RING_IMR(engine->mmio_base));
62fdfeaf
EA
974}
975
8187a2b7 976static void
31bb59cc 977i9xx_irq_disable(struct intel_engine_cs *engine)
62fdfeaf 978{
c033666a 979 struct drm_i915_private *dev_priv = engine->i915;
62fdfeaf 980
31bb59cc
CW
981 dev_priv->irq_mask |= engine->irq_enable_mask;
982 I915_WRITE(IMR, dev_priv->irq_mask);
62fdfeaf
EA
983}
984
31bb59cc
CW
985static void
986i8xx_irq_enable(struct intel_engine_cs *engine)
c2798b19 987{
c033666a 988 struct drm_i915_private *dev_priv = engine->i915;
c2798b19 989
31bb59cc
CW
990 dev_priv->irq_mask &= ~engine->irq_enable_mask;
991 I915_WRITE16(IMR, dev_priv->irq_mask);
992 POSTING_READ16(RING_IMR(engine->mmio_base));
c2798b19
CW
993}
994
995static void
31bb59cc 996i8xx_irq_disable(struct intel_engine_cs *engine)
c2798b19 997{
c033666a 998 struct drm_i915_private *dev_priv = engine->i915;
c2798b19 999
31bb59cc
CW
1000 dev_priv->irq_mask |= engine->irq_enable_mask;
1001 I915_WRITE16(IMR, dev_priv->irq_mask);
c2798b19
CW
1002}
1003
b72f3acb 1004static int
7c9cf4e3 1005bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
d1b851fc 1006{
73dec95e 1007 u32 *cs;
b72f3acb 1008
73dec95e
TU
1009 cs = intel_ring_begin(req, 2);
1010 if (IS_ERR(cs))
1011 return PTR_ERR(cs);
b72f3acb 1012
73dec95e
TU
1013 *cs++ = MI_FLUSH;
1014 *cs++ = MI_NOOP;
1015 intel_ring_advance(req, cs);
b72f3acb 1016 return 0;
d1b851fc
ZN
1017}
1018
31bb59cc
CW
1019static void
1020gen6_irq_enable(struct intel_engine_cs *engine)
0f46832f 1021{
c033666a 1022 struct drm_i915_private *dev_priv = engine->i915;
0f46832f 1023
61ff75ac
CW
1024 I915_WRITE_IMR(engine,
1025 ~(engine->irq_enable_mask |
1026 engine->irq_keep_mask));
31bb59cc 1027 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask);
0f46832f
CW
1028}
1029
1030static void
31bb59cc 1031gen6_irq_disable(struct intel_engine_cs *engine)
0f46832f 1032{
c033666a 1033 struct drm_i915_private *dev_priv = engine->i915;
0f46832f 1034
61ff75ac 1035 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
31bb59cc 1036 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask);
d1b851fc
ZN
1037}
1038
31bb59cc
CW
1039static void
1040hsw_vebox_irq_enable(struct intel_engine_cs *engine)
a19d2933 1041{
c033666a 1042 struct drm_i915_private *dev_priv = engine->i915;
a19d2933 1043
31bb59cc 1044 I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
f4e9af4f 1045 gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask);
a19d2933
BW
1046}
1047
1048static void
31bb59cc 1049hsw_vebox_irq_disable(struct intel_engine_cs *engine)
a19d2933 1050{
c033666a 1051 struct drm_i915_private *dev_priv = engine->i915;
a19d2933 1052
31bb59cc 1053 I915_WRITE_IMR(engine, ~0);
f4e9af4f 1054 gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask);
a19d2933
BW
1055}
1056
31bb59cc
CW
1057static void
1058gen8_irq_enable(struct intel_engine_cs *engine)
abd58f01 1059{
c033666a 1060 struct drm_i915_private *dev_priv = engine->i915;
abd58f01 1061
61ff75ac
CW
1062 I915_WRITE_IMR(engine,
1063 ~(engine->irq_enable_mask |
1064 engine->irq_keep_mask));
31bb59cc 1065 POSTING_READ_FW(RING_IMR(engine->mmio_base));
abd58f01
BW
1066}
1067
1068static void
31bb59cc 1069gen8_irq_disable(struct intel_engine_cs *engine)
abd58f01 1070{
c033666a 1071 struct drm_i915_private *dev_priv = engine->i915;
abd58f01 1072
61ff75ac 1073 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
abd58f01
BW
1074}
1075
d1b851fc 1076static int
803688ba
CW
1077i965_emit_bb_start(struct drm_i915_gem_request *req,
1078 u64 offset, u32 length,
1079 unsigned int dispatch_flags)
d1b851fc 1080{
73dec95e 1081 u32 *cs;
78501eac 1082
73dec95e
TU
1083 cs = intel_ring_begin(req, 2);
1084 if (IS_ERR(cs))
1085 return PTR_ERR(cs);
e1f99ce6 1086
73dec95e
TU
1087 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
1088 I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
1089 *cs++ = offset;
1090 intel_ring_advance(req, cs);
78501eac 1091
d1b851fc
ZN
1092 return 0;
1093}
1094
b45305fc
DV
1095/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1096#define I830_BATCH_LIMIT (256*1024)
c4d69da1
CW
1097#define I830_TLB_ENTRIES (2)
1098#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
8187a2b7 1099static int
803688ba
CW
1100i830_emit_bb_start(struct drm_i915_gem_request *req,
1101 u64 offset, u32 len,
1102 unsigned int dispatch_flags)
62fdfeaf 1103{
73dec95e 1104 u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch);
62fdfeaf 1105
73dec95e
TU
1106 cs = intel_ring_begin(req, 6);
1107 if (IS_ERR(cs))
1108 return PTR_ERR(cs);
62fdfeaf 1109
c4d69da1 1110 /* Evict the invalid PTE TLBs */
73dec95e
TU
1111 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
1112 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
1113 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
1114 *cs++ = cs_offset;
1115 *cs++ = 0xdeadbeef;
1116 *cs++ = MI_NOOP;
1117 intel_ring_advance(req, cs);
b45305fc 1118
8e004efc 1119 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
b45305fc
DV
1120 if (len > I830_BATCH_LIMIT)
1121 return -ENOSPC;
1122
73dec95e
TU
1123 cs = intel_ring_begin(req, 6 + 2);
1124 if (IS_ERR(cs))
1125 return PTR_ERR(cs);
c4d69da1
CW
1126
1127 /* Blit the batch (which has now all relocs applied) to the
1128 * stable batch scratch bo area (so that the CS never
1129 * stumbles over its tlb invalidation bug) ...
1130 */
73dec95e
TU
1131 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA;
1132 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
1133 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
1134 *cs++ = cs_offset;
1135 *cs++ = 4096;
1136 *cs++ = offset;
1137
1138 *cs++ = MI_FLUSH;
1139 *cs++ = MI_NOOP;
1140 intel_ring_advance(req, cs);
b45305fc
DV
1141
1142 /* ... and execute it. */
c4d69da1 1143 offset = cs_offset;
b45305fc 1144 }
e1f99ce6 1145
73dec95e
TU
1146 cs = intel_ring_begin(req, 2);
1147 if (IS_ERR(cs))
1148 return PTR_ERR(cs);
c4d69da1 1149
73dec95e
TU
1150 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1151 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1152 MI_BATCH_NON_SECURE);
1153 intel_ring_advance(req, cs);
c4d69da1 1154
fb3256da
DV
1155 return 0;
1156}
1157
1158static int
803688ba
CW
1159i915_emit_bb_start(struct drm_i915_gem_request *req,
1160 u64 offset, u32 len,
1161 unsigned int dispatch_flags)
fb3256da 1162{
73dec95e 1163 u32 *cs;
fb3256da 1164
73dec95e
TU
1165 cs = intel_ring_begin(req, 2);
1166 if (IS_ERR(cs))
1167 return PTR_ERR(cs);
fb3256da 1168
73dec95e
TU
1169 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1170 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1171 MI_BATCH_NON_SECURE);
1172 intel_ring_advance(req, cs);
62fdfeaf 1173
62fdfeaf
EA
1174 return 0;
1175}
1176
0bc40be8 1177static void cleanup_phys_status_page(struct intel_engine_cs *engine)
7d3fdfff 1178{
c033666a 1179 struct drm_i915_private *dev_priv = engine->i915;
7d3fdfff
VS
1180
1181 if (!dev_priv->status_page_dmah)
1182 return;
1183
91c8a326 1184 drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
0bc40be8 1185 engine->status_page.page_addr = NULL;
7d3fdfff
VS
1186}
1187
0bc40be8 1188static void cleanup_status_page(struct intel_engine_cs *engine)
62fdfeaf 1189{
57e88531 1190 struct i915_vma *vma;
f8a7fde4 1191 struct drm_i915_gem_object *obj;
62fdfeaf 1192
57e88531
CW
1193 vma = fetch_and_zero(&engine->status_page.vma);
1194 if (!vma)
62fdfeaf 1195 return;
62fdfeaf 1196
f8a7fde4
CW
1197 obj = vma->obj;
1198
57e88531 1199 i915_vma_unpin(vma);
f8a7fde4
CW
1200 i915_vma_close(vma);
1201
1202 i915_gem_object_unpin_map(obj);
1203 __i915_gem_object_release_unless_active(obj);
62fdfeaf
EA
1204}
1205
0bc40be8 1206static int init_status_page(struct intel_engine_cs *engine)
62fdfeaf 1207{
57e88531
CW
1208 struct drm_i915_gem_object *obj;
1209 struct i915_vma *vma;
1210 unsigned int flags;
920cf419 1211 void *vaddr;
57e88531 1212 int ret;
e4ffd173 1213
f51455d4 1214 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
57e88531
CW
1215 if (IS_ERR(obj)) {
1216 DRM_ERROR("Failed to allocate status page\n");
1217 return PTR_ERR(obj);
1218 }
62fdfeaf 1219
57e88531
CW
1220 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1221 if (ret)
1222 goto err;
e3efda49 1223
a01cb37a 1224 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
57e88531
CW
1225 if (IS_ERR(vma)) {
1226 ret = PTR_ERR(vma);
1227 goto err;
e3efda49 1228 }
62fdfeaf 1229
57e88531
CW
1230 flags = PIN_GLOBAL;
1231 if (!HAS_LLC(engine->i915))
1232 /* On g33, we cannot place HWS above 256MiB, so
1233 * restrict its pinning to the low mappable arena.
1234 * Though this restriction is not documented for
1235 * gen4, gen5, or byt, they also behave similarly
1236 * and hang if the HWS is placed at the top of the
1237 * GTT. To generalise, it appears that all !llc
1238 * platforms have issues with us placing the HWS
1239 * above the mappable region (even though we never
1240 * actualy map it).
1241 */
1242 flags |= PIN_MAPPABLE;
1243 ret = i915_vma_pin(vma, 0, 4096, flags);
1244 if (ret)
1245 goto err;
62fdfeaf 1246
920cf419
CW
1247 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
1248 if (IS_ERR(vaddr)) {
1249 ret = PTR_ERR(vaddr);
1250 goto err_unpin;
1251 }
1252
57e88531 1253 engine->status_page.vma = vma;
bde13ebd 1254 engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
f51455d4 1255 engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
62fdfeaf 1256
bde13ebd
CW
1257 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1258 engine->name, i915_ggtt_offset(vma));
62fdfeaf 1259 return 0;
57e88531 1260
920cf419
CW
1261err_unpin:
1262 i915_vma_unpin(vma);
57e88531
CW
1263err:
1264 i915_gem_object_put(obj);
1265 return ret;
62fdfeaf
EA
1266}
1267
0bc40be8 1268static int init_phys_status_page(struct intel_engine_cs *engine)
6b8294a4 1269{
c033666a 1270 struct drm_i915_private *dev_priv = engine->i915;
6b8294a4 1271
57e88531
CW
1272 dev_priv->status_page_dmah =
1273 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
1274 if (!dev_priv->status_page_dmah)
1275 return -ENOMEM;
6b8294a4 1276
0bc40be8
TU
1277 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1278 memset(engine->status_page.page_addr, 0, PAGE_SIZE);
6b8294a4
CW
1279
1280 return 0;
1281}
1282
d3ef1af6 1283int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias)
7ba717cf 1284{
d3ef1af6 1285 unsigned int flags;
9d80841e 1286 enum i915_map_type map;
57e88531 1287 struct i915_vma *vma = ring->vma;
8305216f 1288 void *addr;
7ba717cf
TD
1289 int ret;
1290
57e88531 1291 GEM_BUG_ON(ring->vaddr);
7ba717cf 1292
9d80841e
CW
1293 map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
1294
d3ef1af6
DCS
1295 flags = PIN_GLOBAL;
1296 if (offset_bias)
1297 flags |= PIN_OFFSET_BIAS | offset_bias;
9d80841e 1298 if (vma->obj->stolen)
57e88531 1299 flags |= PIN_MAPPABLE;
def0c5f6 1300
57e88531 1301 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
9d80841e 1302 if (flags & PIN_MAPPABLE || map == I915_MAP_WC)
57e88531
CW
1303 ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
1304 else
1305 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
1306 if (unlikely(ret))
def0c5f6 1307 return ret;
57e88531 1308 }
7ba717cf 1309
57e88531
CW
1310 ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags);
1311 if (unlikely(ret))
1312 return ret;
def0c5f6 1313
9d80841e 1314 if (i915_vma_is_map_and_fenceable(vma))
57e88531
CW
1315 addr = (void __force *)i915_vma_pin_iomap(vma);
1316 else
9d80841e 1317 addr = i915_gem_object_pin_map(vma->obj, map);
57e88531
CW
1318 if (IS_ERR(addr))
1319 goto err;
7ba717cf 1320
32c04f16 1321 ring->vaddr = addr;
7ba717cf 1322 return 0;
d2cad535 1323
57e88531
CW
1324err:
1325 i915_vma_unpin(vma);
1326 return PTR_ERR(addr);
7ba717cf
TD
1327}
1328
aad29fbb
CW
1329void intel_ring_unpin(struct intel_ring *ring)
1330{
1331 GEM_BUG_ON(!ring->vma);
1332 GEM_BUG_ON(!ring->vaddr);
1333
9d80841e 1334 if (i915_vma_is_map_and_fenceable(ring->vma))
aad29fbb 1335 i915_vma_unpin_iomap(ring->vma);
57e88531
CW
1336 else
1337 i915_gem_object_unpin_map(ring->vma->obj);
aad29fbb
CW
1338 ring->vaddr = NULL;
1339
57e88531 1340 i915_vma_unpin(ring->vma);
2919d291
OM
1341}
1342
57e88531
CW
1343static struct i915_vma *
1344intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
62fdfeaf 1345{
05394f39 1346 struct drm_i915_gem_object *obj;
57e88531 1347 struct i915_vma *vma;
62fdfeaf 1348
187685cb 1349 obj = i915_gem_object_create_stolen(dev_priv, size);
c58b735f 1350 if (!obj)
12d79d78 1351 obj = i915_gem_object_create(dev_priv, size);
57e88531
CW
1352 if (IS_ERR(obj))
1353 return ERR_CAST(obj);
8187a2b7 1354
24f3a8cf
AG
1355 /* mark ring buffers as read-only from GPU side by default */
1356 obj->gt_ro = 1;
1357
a01cb37a 1358 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
57e88531
CW
1359 if (IS_ERR(vma))
1360 goto err;
1361
1362 return vma;
e3efda49 1363
57e88531
CW
1364err:
1365 i915_gem_object_put(obj);
1366 return vma;
e3efda49
CW
1367}
1368
7e37f889
CW
1369struct intel_ring *
1370intel_engine_create_ring(struct intel_engine_cs *engine, int size)
01101fa7 1371{
7e37f889 1372 struct intel_ring *ring;
57e88531 1373 struct i915_vma *vma;
01101fa7 1374
8f942018 1375 GEM_BUG_ON(!is_power_of_2(size));
62ae14b1 1376 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
8f942018 1377
01101fa7 1378 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
57e88531 1379 if (!ring)
01101fa7
CW
1380 return ERR_PTR(-ENOMEM);
1381
4a570db5 1382 ring->engine = engine;
01101fa7 1383
675d9ad7
CW
1384 INIT_LIST_HEAD(&ring->request_list);
1385
01101fa7
CW
1386 ring->size = size;
1387 /* Workaround an erratum on the i830 which causes a hang if
1388 * the TAIL pointer points to within the last 2 cachelines
1389 * of the buffer.
1390 */
1391 ring->effective_size = size;
2a307c2e 1392 if (IS_I830(engine->i915) || IS_I845G(engine->i915))
01101fa7
CW
1393 ring->effective_size -= 2 * CACHELINE_BYTES;
1394
1395 ring->last_retired_head = -1;
1396 intel_ring_update_space(ring);
1397
57e88531
CW
1398 vma = intel_ring_create_vma(engine->i915, size);
1399 if (IS_ERR(vma)) {
01101fa7 1400 kfree(ring);
57e88531 1401 return ERR_CAST(vma);
01101fa7 1402 }
57e88531 1403 ring->vma = vma;
01101fa7
CW
1404
1405 return ring;
1406}
1407
1408void
7e37f889 1409intel_ring_free(struct intel_ring *ring)
01101fa7 1410{
f8a7fde4
CW
1411 struct drm_i915_gem_object *obj = ring->vma->obj;
1412
1413 i915_vma_close(ring->vma);
1414 __i915_gem_object_release_unless_active(obj);
1415
01101fa7
CW
1416 kfree(ring);
1417}
1418
72b72ae4 1419static int context_pin(struct i915_gem_context *ctx)
e8a9c58f
CW
1420{
1421 struct i915_vma *vma = ctx->engine[RCS].state;
1422 int ret;
1423
1424 /* Clear this page out of any CPU caches for coherent swap-in/out.
1425 * We only want to do this on the first bind so that we do not stall
1426 * on an active context (which by nature is already on the GPU).
1427 */
1428 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
1429 ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
1430 if (ret)
1431 return ret;
1432 }
1433
afeddf50
CW
1434 return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT,
1435 PIN_GLOBAL | PIN_HIGH);
e8a9c58f
CW
1436}
1437
1438static int intel_ring_context_pin(struct intel_engine_cs *engine,
1439 struct i915_gem_context *ctx)
0cb26a8e
CW
1440{
1441 struct intel_context *ce = &ctx->engine[engine->id];
1442 int ret;
1443
91c8a326 1444 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
0cb26a8e
CW
1445
1446 if (ce->pin_count++)
1447 return 0;
1448
1449 if (ce->state) {
72b72ae4 1450 ret = context_pin(ctx);
e8a9c58f 1451 if (ret)
0cb26a8e
CW
1452 goto error;
1453 }
1454
c7c3c07d
CW
1455 /* The kernel context is only used as a placeholder for flushing the
1456 * active context. It is never used for submitting user rendering and
1457 * as such never requires the golden render context, and so we can skip
1458 * emitting it when we switch to the kernel context. This is required
1459 * as during eviction we cannot allocate and pin the renderstate in
1460 * order to initialise the context.
1461 */
984ff29f 1462 if (i915_gem_context_is_kernel(ctx))
c7c3c07d
CW
1463 ce->initialised = true;
1464
9a6feaf0 1465 i915_gem_context_get(ctx);
0cb26a8e
CW
1466 return 0;
1467
1468error:
1469 ce->pin_count = 0;
1470 return ret;
1471}
1472
e8a9c58f
CW
1473static void intel_ring_context_unpin(struct intel_engine_cs *engine,
1474 struct i915_gem_context *ctx)
0cb26a8e
CW
1475{
1476 struct intel_context *ce = &ctx->engine[engine->id];
1477
91c8a326 1478 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
e8a9c58f 1479 GEM_BUG_ON(ce->pin_count == 0);
0cb26a8e
CW
1480
1481 if (--ce->pin_count)
1482 return;
1483
1484 if (ce->state)
bf3783e5 1485 i915_vma_unpin(ce->state);
0cb26a8e 1486
9a6feaf0 1487 i915_gem_context_put(ctx);
0cb26a8e
CW
1488}
1489
acd27845 1490static int intel_init_ring_buffer(struct intel_engine_cs *engine)
e3efda49 1491{
acd27845 1492 struct drm_i915_private *dev_priv = engine->i915;
32c04f16 1493 struct intel_ring *ring;
e3efda49
CW
1494 int ret;
1495
0bc40be8 1496 WARN_ON(engine->buffer);
bfc882b4 1497
019bf277
TU
1498 intel_engine_setup_common(engine);
1499
019bf277 1500 ret = intel_engine_init_common(engine);
688e6c72
CW
1501 if (ret)
1502 goto error;
e3efda49 1503
32c04f16
CW
1504 ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
1505 if (IS_ERR(ring)) {
1506 ret = PTR_ERR(ring);
b0366a54
DG
1507 goto error;
1508 }
01101fa7 1509
3177659a
CS
1510 if (HWS_NEEDS_PHYSICAL(dev_priv)) {
1511 WARN_ON(engine->id != RCS);
1512 ret = init_phys_status_page(engine);
e3efda49 1513 if (ret)
8ee14975 1514 goto error;
e3efda49 1515 } else {
3177659a 1516 ret = init_status_page(engine);
e3efda49 1517 if (ret)
8ee14975 1518 goto error;
e3efda49
CW
1519 }
1520
d3ef1af6 1521 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
f51455d4 1522 ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE);
bfc882b4 1523 if (ret) {
57e88531 1524 intel_ring_free(ring);
bfc882b4 1525 goto error;
e3efda49 1526 }
57e88531 1527 engine->buffer = ring;
62fdfeaf 1528
8ee14975 1529 return 0;
351e3db2 1530
8ee14975 1531error:
7e37f889 1532 intel_engine_cleanup(engine);
8ee14975 1533 return ret;
62fdfeaf
EA
1534}
1535
7e37f889 1536void intel_engine_cleanup(struct intel_engine_cs *engine)
62fdfeaf 1537{
6402c330 1538 struct drm_i915_private *dev_priv;
33626e6a 1539
c033666a 1540 dev_priv = engine->i915;
6402c330 1541
0bc40be8 1542 if (engine->buffer) {
21a2c58a
CW
1543 WARN_ON(INTEL_GEN(dev_priv) > 2 &&
1544 (I915_READ_MODE(engine) & MODE_IDLE) == 0);
33626e6a 1545
aad29fbb 1546 intel_ring_unpin(engine->buffer);
7e37f889 1547 intel_ring_free(engine->buffer);
0bc40be8 1548 engine->buffer = NULL;
b0366a54 1549 }
78501eac 1550
0bc40be8
TU
1551 if (engine->cleanup)
1552 engine->cleanup(engine);
8d19215b 1553
3177659a 1554 if (HWS_NEEDS_PHYSICAL(dev_priv)) {
0bc40be8
TU
1555 WARN_ON(engine->id != RCS);
1556 cleanup_phys_status_page(engine);
3177659a
CS
1557 } else {
1558 cleanup_status_page(engine);
7d3fdfff 1559 }
44e895a8 1560
96a945aa 1561 intel_engine_cleanup_common(engine);
0cb26a8e 1562
c033666a 1563 engine->i915 = NULL;
3b3f1650
AG
1564 dev_priv->engine[engine->id] = NULL;
1565 kfree(engine);
62fdfeaf
EA
1566}
1567
821ed7df
CW
1568void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
1569{
1570 struct intel_engine_cs *engine;
3b3f1650 1571 enum intel_engine_id id;
821ed7df 1572
3b3f1650 1573 for_each_engine(engine, dev_priv, id) {
821ed7df
CW
1574 engine->buffer->head = engine->buffer->tail;
1575 engine->buffer->last_retired_head = -1;
1576 }
1577}
1578
f73e7399 1579static int ring_request_alloc(struct drm_i915_gem_request *request)
9d773091 1580{
73dec95e 1581 u32 *cs;
6310346e 1582
e8a9c58f
CW
1583 GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
1584
6310346e
CW
1585 /* Flush enough space to reduce the likelihood of waiting after
1586 * we start building the request - in which case we will just
1587 * have to repeat work.
1588 */
a0442461 1589 request->reserved_space += LEGACY_REQUEST_SIZE;
6310346e 1590
e8a9c58f 1591 GEM_BUG_ON(!request->engine->buffer);
1dae2dfb 1592 request->ring = request->engine->buffer;
6310346e 1593
73dec95e
TU
1594 cs = intel_ring_begin(request, 0);
1595 if (IS_ERR(cs))
1596 return PTR_ERR(cs);
6310346e 1597
a0442461 1598 request->reserved_space -= LEGACY_REQUEST_SIZE;
6310346e 1599 return 0;
9d773091
CW
1600}
1601
987046ad
CW
1602static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
1603{
7e37f889 1604 struct intel_ring *ring = req->ring;
987046ad 1605 struct drm_i915_gem_request *target;
e95433c7
CW
1606 long timeout;
1607
1608 lockdep_assert_held(&req->i915->drm.struct_mutex);
987046ad 1609
1dae2dfb
CW
1610 intel_ring_update_space(ring);
1611 if (ring->space >= bytes)
987046ad
CW
1612 return 0;
1613
1614 /*
1615 * Space is reserved in the ringbuffer for finalising the request,
1616 * as that cannot be allowed to fail. During request finalisation,
1617 * reserved_space is set to 0 to stop the overallocation and the
1618 * assumption is that then we never need to wait (which has the
1619 * risk of failing with EINTR).
1620 *
1621 * See also i915_gem_request_alloc() and i915_add_request().
1622 */
0251a963 1623 GEM_BUG_ON(!req->reserved_space);
987046ad 1624
675d9ad7 1625 list_for_each_entry(target, &ring->request_list, ring_link) {
987046ad
CW
1626 unsigned space;
1627
987046ad 1628 /* Would completion of this request free enough space? */
1dae2dfb
CW
1629 space = __intel_ring_space(target->postfix, ring->tail,
1630 ring->size);
987046ad
CW
1631 if (space >= bytes)
1632 break;
79bbcc29 1633 }
29b1b415 1634
675d9ad7 1635 if (WARN_ON(&target->ring_link == &ring->request_list))
987046ad
CW
1636 return -ENOSPC;
1637
e95433c7
CW
1638 timeout = i915_wait_request(target,
1639 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
1640 MAX_SCHEDULE_TIMEOUT);
1641 if (timeout < 0)
1642 return timeout;
7da844c5 1643
7da844c5
CW
1644 i915_gem_request_retire_upto(target);
1645
1646 intel_ring_update_space(ring);
1647 GEM_BUG_ON(ring->space < bytes);
1648 return 0;
29b1b415
JH
1649}
1650
73dec95e 1651u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
cbcc80df 1652{
7e37f889 1653 struct intel_ring *ring = req->ring;
1dae2dfb
CW
1654 int remain_actual = ring->size - ring->tail;
1655 int remain_usable = ring->effective_size - ring->tail;
987046ad
CW
1656 int bytes = num_dwords * sizeof(u32);
1657 int total_bytes, wait_bytes;
79bbcc29 1658 bool need_wrap = false;
73dec95e 1659 u32 *cs;
29b1b415 1660
0251a963 1661 total_bytes = bytes + req->reserved_space;
29b1b415 1662
79bbcc29
JH
1663 if (unlikely(bytes > remain_usable)) {
1664 /*
1665 * Not enough space for the basic request. So need to flush
1666 * out the remainder and then wait for base + reserved.
1667 */
1668 wait_bytes = remain_actual + total_bytes;
1669 need_wrap = true;
987046ad
CW
1670 } else if (unlikely(total_bytes > remain_usable)) {
1671 /*
1672 * The base request will fit but the reserved space
1673 * falls off the end. So we don't need an immediate wrap
1674 * and only need to effectively wait for the reserved
1675 * size space from the start of ringbuffer.
1676 */
0251a963 1677 wait_bytes = remain_actual + req->reserved_space;
79bbcc29 1678 } else {
987046ad
CW
1679 /* No wrapping required, just waiting. */
1680 wait_bytes = total_bytes;
cbcc80df
MK
1681 }
1682
1dae2dfb 1683 if (wait_bytes > ring->space) {
987046ad 1684 int ret = wait_for_space(req, wait_bytes);
cbcc80df 1685 if (unlikely(ret))
73dec95e 1686 return ERR_PTR(ret);
cbcc80df
MK
1687 }
1688
987046ad 1689 if (unlikely(need_wrap)) {
1dae2dfb
CW
1690 GEM_BUG_ON(remain_actual > ring->space);
1691 GEM_BUG_ON(ring->tail + remain_actual > ring->size);
78501eac 1692
987046ad 1693 /* Fill the tail with MI_NOOP */
1dae2dfb
CW
1694 memset(ring->vaddr + ring->tail, 0, remain_actual);
1695 ring->tail = 0;
1696 ring->space -= remain_actual;
987046ad 1697 }
304d695c 1698
73dec95e
TU
1699 GEM_BUG_ON(ring->tail > ring->size - bytes);
1700 cs = ring->vaddr + ring->tail;
1701 ring->tail += bytes;
1dae2dfb
CW
1702 ring->space -= bytes;
1703 GEM_BUG_ON(ring->space < 0);
73dec95e
TU
1704
1705 return cs;
8187a2b7 1706}
78501eac 1707
753b1ad4 1708/* Align the ring tail to a cacheline boundary */
bba09b12 1709int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
753b1ad4 1710{
b5321f30 1711 int num_dwords =
73dec95e
TU
1712 (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
1713 u32 *cs;
753b1ad4
VS
1714
1715 if (num_dwords == 0)
1716 return 0;
1717
18393f63 1718 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
73dec95e
TU
1719 cs = intel_ring_begin(req, num_dwords);
1720 if (IS_ERR(cs))
1721 return PTR_ERR(cs);
753b1ad4
VS
1722
1723 while (num_dwords--)
73dec95e 1724 *cs++ = MI_NOOP;
753b1ad4 1725
73dec95e 1726 intel_ring_advance(req, cs);
753b1ad4
VS
1727
1728 return 0;
1729}
1730
c5efa1ad 1731static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
881f47b6 1732{
c5efa1ad 1733 struct drm_i915_private *dev_priv = request->i915;
881f47b6 1734
76f8421f
CW
1735 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
1736
881f47b6 1737 /* Every tail move must follow the sequence below */
12f55818
CW
1738
1739 /* Disable notification that the ring is IDLE. The GT
1740 * will then assume that it is busy and bring it out of rc6.
1741 */
76f8421f
CW
1742 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL,
1743 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
12f55818
CW
1744
1745 /* Clear the context id. Here be magic! */
76f8421f 1746 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
0206e353 1747
12f55818 1748 /* Wait for the ring not to be idle, i.e. for it to wake up. */
76f8421f
CW
1749 if (intel_wait_for_register_fw(dev_priv,
1750 GEN6_BSD_SLEEP_PSMI_CONTROL,
1751 GEN6_BSD_SLEEP_INDICATOR,
1752 0,
1753 50))
12f55818 1754 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
0206e353 1755
12f55818 1756 /* Now that the ring is fully powered up, update the tail */
b0411e7d 1757 i9xx_submit_request(request);
12f55818
CW
1758
1759 /* Let the ring send IDLE messages to the GT again,
1760 * and so let it sleep to conserve power when idle.
1761 */
76f8421f
CW
1762 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL,
1763 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1764
1765 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
881f47b6
XH
1766}
1767
7c9cf4e3 1768static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
881f47b6 1769{
73dec95e 1770 u32 cmd, *cs;
b72f3acb 1771
73dec95e
TU
1772 cs = intel_ring_begin(req, 4);
1773 if (IS_ERR(cs))
1774 return PTR_ERR(cs);
b72f3acb 1775
71a77e07 1776 cmd = MI_FLUSH_DW;
c033666a 1777 if (INTEL_GEN(req->i915) >= 8)
075b3bba 1778 cmd += 1;
f0a1fb10
CW
1779
1780 /* We always require a command barrier so that subsequent
1781 * commands, such as breadcrumb interrupts, are strictly ordered
1782 * wrt the contents of the write cache being flushed to memory
1783 * (and thus being coherent from the CPU).
1784 */
1785 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1786
9a289771
JB
1787 /*
1788 * Bspec vol 1c.5 - video engine command streamer:
1789 * "If ENABLED, all TLBs will be invalidated once the flush
1790 * operation is complete. This bit is only valid when the
1791 * Post-Sync Operation field is a value of 1h or 3h."
1792 */
7c9cf4e3 1793 if (mode & EMIT_INVALIDATE)
f0a1fb10
CW
1794 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1795
73dec95e
TU
1796 *cs++ = cmd;
1797 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
c033666a 1798 if (INTEL_GEN(req->i915) >= 8) {
73dec95e
TU
1799 *cs++ = 0; /* upper addr */
1800 *cs++ = 0; /* value */
075b3bba 1801 } else {
73dec95e
TU
1802 *cs++ = 0;
1803 *cs++ = MI_NOOP;
075b3bba 1804 }
73dec95e 1805 intel_ring_advance(req, cs);
b72f3acb 1806 return 0;
881f47b6
XH
1807}
1808
1c7a0623 1809static int
803688ba
CW
1810gen8_emit_bb_start(struct drm_i915_gem_request *req,
1811 u64 offset, u32 len,
1812 unsigned int dispatch_flags)
1c7a0623 1813{
b5321f30 1814 bool ppgtt = USES_PPGTT(req->i915) &&
8e004efc 1815 !(dispatch_flags & I915_DISPATCH_SECURE);
73dec95e 1816 u32 *cs;
1c7a0623 1817
73dec95e
TU
1818 cs = intel_ring_begin(req, 4);
1819 if (IS_ERR(cs))
1820 return PTR_ERR(cs);
1c7a0623
BW
1821
1822 /* FIXME(BDW): Address space and security selectors. */
73dec95e
TU
1823 *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags &
1824 I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
1825 *cs++ = lower_32_bits(offset);
1826 *cs++ = upper_32_bits(offset);
1827 *cs++ = MI_NOOP;
1828 intel_ring_advance(req, cs);
1c7a0623
BW
1829
1830 return 0;
1831}
1832
d7d4eedd 1833static int
803688ba
CW
1834hsw_emit_bb_start(struct drm_i915_gem_request *req,
1835 u64 offset, u32 len,
1836 unsigned int dispatch_flags)
d7d4eedd 1837{
73dec95e 1838 u32 *cs;
d7d4eedd 1839
73dec95e
TU
1840 cs = intel_ring_begin(req, 2);
1841 if (IS_ERR(cs))
1842 return PTR_ERR(cs);
d7d4eedd 1843
73dec95e
TU
1844 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
1845 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
1846 (dispatch_flags & I915_DISPATCH_RS ?
1847 MI_BATCH_RESOURCE_STREAMER : 0);
d7d4eedd 1848 /* bit0-7 is the length on GEN6+ */
73dec95e
TU
1849 *cs++ = offset;
1850 intel_ring_advance(req, cs);
d7d4eedd
CW
1851
1852 return 0;
1853}
1854
881f47b6 1855static int
803688ba
CW
1856gen6_emit_bb_start(struct drm_i915_gem_request *req,
1857 u64 offset, u32 len,
1858 unsigned int dispatch_flags)
881f47b6 1859{
73dec95e 1860 u32 *cs;
ab6f8e32 1861
73dec95e
TU
1862 cs = intel_ring_begin(req, 2);
1863 if (IS_ERR(cs))
1864 return PTR_ERR(cs);
e1f99ce6 1865
73dec95e
TU
1866 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
1867 0 : MI_BATCH_NON_SECURE_I965);
0206e353 1868 /* bit0-7 is the length on GEN6+ */
73dec95e
TU
1869 *cs++ = offset;
1870 intel_ring_advance(req, cs);
ab6f8e32 1871
0206e353 1872 return 0;
881f47b6
XH
1873}
1874
549f7365
CW
1875/* Blitter support (SandyBridge+) */
1876
7c9cf4e3 1877static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
8d19215b 1878{
73dec95e 1879 u32 cmd, *cs;
b72f3acb 1880
73dec95e
TU
1881 cs = intel_ring_begin(req, 4);
1882 if (IS_ERR(cs))
1883 return PTR_ERR(cs);
b72f3acb 1884
71a77e07 1885 cmd = MI_FLUSH_DW;
c033666a 1886 if (INTEL_GEN(req->i915) >= 8)
075b3bba 1887 cmd += 1;
f0a1fb10
CW
1888
1889 /* We always require a command barrier so that subsequent
1890 * commands, such as breadcrumb interrupts, are strictly ordered
1891 * wrt the contents of the write cache being flushed to memory
1892 * (and thus being coherent from the CPU).
1893 */
1894 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1895
9a289771
JB
1896 /*
1897 * Bspec vol 1c.3 - blitter engine command streamer:
1898 * "If ENABLED, all TLBs will be invalidated once the flush
1899 * operation is complete. This bit is only valid when the
1900 * Post-Sync Operation field is a value of 1h or 3h."
1901 */
7c9cf4e3 1902 if (mode & EMIT_INVALIDATE)
f0a1fb10 1903 cmd |= MI_INVALIDATE_TLB;
73dec95e
TU
1904 *cs++ = cmd;
1905 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
c033666a 1906 if (INTEL_GEN(req->i915) >= 8) {
73dec95e
TU
1907 *cs++ = 0; /* upper addr */
1908 *cs++ = 0; /* value */
075b3bba 1909 } else {
73dec95e
TU
1910 *cs++ = 0;
1911 *cs++ = MI_NOOP;
075b3bba 1912 }
73dec95e 1913 intel_ring_advance(req, cs);
fd3da6c9 1914
b72f3acb 1915 return 0;
8d19215b
ZN
1916}
1917
d9a64610
TU
1918static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
1919 struct intel_engine_cs *engine)
1920{
db3d4019 1921 struct drm_i915_gem_object *obj;
1b9e6650 1922 int ret, i;
db3d4019 1923
39df9190 1924 if (!i915.semaphores)
db3d4019
TU
1925 return;
1926
51d545d0
CW
1927 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) {
1928 struct i915_vma *vma;
1929
f51455d4 1930 obj = i915_gem_object_create(dev_priv, PAGE_SIZE);
51d545d0
CW
1931 if (IS_ERR(obj))
1932 goto err;
db3d4019 1933
a01cb37a 1934 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
51d545d0
CW
1935 if (IS_ERR(vma))
1936 goto err_obj;
1937
1938 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1939 if (ret)
1940 goto err_obj;
1941
1942 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
1943 if (ret)
1944 goto err_obj;
1945
1946 dev_priv->semaphore = vma;
1947 }
d9a64610
TU
1948
1949 if (INTEL_GEN(dev_priv) >= 8) {
bde13ebd 1950 u32 offset = i915_ggtt_offset(dev_priv->semaphore);
1b9e6650 1951
ad7bdb2b 1952 engine->semaphore.sync_to = gen8_ring_sync_to;
d9a64610 1953 engine->semaphore.signal = gen8_xcs_signal;
1b9e6650
TU
1954
1955 for (i = 0; i < I915_NUM_ENGINES; i++) {
bde13ebd 1956 u32 ring_offset;
1b9e6650
TU
1957
1958 if (i != engine->id)
1959 ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
1960 else
1961 ring_offset = MI_SEMAPHORE_SYNC_INVALID;
1962
1963 engine->semaphore.signal_ggtt[i] = ring_offset;
1964 }
d9a64610 1965 } else if (INTEL_GEN(dev_priv) >= 6) {
ad7bdb2b 1966 engine->semaphore.sync_to = gen6_ring_sync_to;
d9a64610 1967 engine->semaphore.signal = gen6_signal;
4b8e38a9
TU
1968
1969 /*
1970 * The current semaphore is only applied on pre-gen8
1971 * platform. And there is no VCS2 ring on the pre-gen8
1972 * platform. So the semaphore between RCS and VCS2 is
1973 * initialized as INVALID. Gen8 will initialize the
1974 * sema between VCS2 and RCS later.
1975 */
318f89ca 1976 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
4b8e38a9
TU
1977 static const struct {
1978 u32 wait_mbox;
1979 i915_reg_t mbox_reg;
318f89ca
TU
1980 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
1981 [RCS_HW] = {
1982 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
1983 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
1984 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
4b8e38a9 1985 },
318f89ca
TU
1986 [VCS_HW] = {
1987 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
1988 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
1989 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
4b8e38a9 1990 },
318f89ca
TU
1991 [BCS_HW] = {
1992 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
1993 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
1994 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
4b8e38a9 1995 },
318f89ca
TU
1996 [VECS_HW] = {
1997 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
1998 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
1999 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
4b8e38a9
TU
2000 },
2001 };
2002 u32 wait_mbox;
2003 i915_reg_t mbox_reg;
2004
318f89ca 2005 if (i == engine->hw_id) {
4b8e38a9
TU
2006 wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
2007 mbox_reg = GEN6_NOSYNC;
2008 } else {
318f89ca
TU
2009 wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
2010 mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
4b8e38a9
TU
2011 }
2012
2013 engine->semaphore.mbox.wait[i] = wait_mbox;
2014 engine->semaphore.mbox.signal[i] = mbox_reg;
2015 }
d9a64610 2016 }
51d545d0
CW
2017
2018 return;
2019
2020err_obj:
2021 i915_gem_object_put(obj);
2022err:
2023 DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
2024 i915.semaphores = 0;
d9a64610
TU
2025}
2026
ed003078
CW
2027static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2028 struct intel_engine_cs *engine)
2029{
c78d6061
TU
2030 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
2031
ed003078 2032 if (INTEL_GEN(dev_priv) >= 8) {
31bb59cc
CW
2033 engine->irq_enable = gen8_irq_enable;
2034 engine->irq_disable = gen8_irq_disable;
ed003078
CW
2035 engine->irq_seqno_barrier = gen6_seqno_barrier;
2036 } else if (INTEL_GEN(dev_priv) >= 6) {
31bb59cc
CW
2037 engine->irq_enable = gen6_irq_enable;
2038 engine->irq_disable = gen6_irq_disable;
ed003078
CW
2039 engine->irq_seqno_barrier = gen6_seqno_barrier;
2040 } else if (INTEL_GEN(dev_priv) >= 5) {
31bb59cc
CW
2041 engine->irq_enable = gen5_irq_enable;
2042 engine->irq_disable = gen5_irq_disable;
f8973c21 2043 engine->irq_seqno_barrier = gen5_seqno_barrier;
ed003078 2044 } else if (INTEL_GEN(dev_priv) >= 3) {
31bb59cc
CW
2045 engine->irq_enable = i9xx_irq_enable;
2046 engine->irq_disable = i9xx_irq_disable;
ed003078 2047 } else {
31bb59cc
CW
2048 engine->irq_enable = i8xx_irq_enable;
2049 engine->irq_disable = i8xx_irq_disable;
ed003078
CW
2050 }
2051}
2052
ff44ad51
CW
2053static void i9xx_set_default_submission(struct intel_engine_cs *engine)
2054{
2055 engine->submit_request = i9xx_submit_request;
2056}
2057
2058static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
2059{
2060 engine->submit_request = gen6_bsd_submit_request;
2061}
2062
06a2fe22
TU
2063static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2064 struct intel_engine_cs *engine)
2065{
618e4ca7
CW
2066 intel_ring_init_irq(dev_priv, engine);
2067 intel_ring_init_semaphores(dev_priv, engine);
2068
1d8a1337 2069 engine->init_hw = init_ring_common;
821ed7df 2070 engine->reset_hw = reset_ring_common;
7445a2a4 2071
e8a9c58f
CW
2072 engine->context_pin = intel_ring_context_pin;
2073 engine->context_unpin = intel_ring_context_unpin;
2074
f73e7399
CW
2075 engine->request_alloc = ring_request_alloc;
2076
9b81d556 2077 engine->emit_breadcrumb = i9xx_emit_breadcrumb;
98f29e8d
CW
2078 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
2079 if (i915.semaphores) {
2080 int num_rings;
2081
9b81d556 2082 engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
98f29e8d
CW
2083
2084 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
2085 if (INTEL_GEN(dev_priv) >= 8) {
2086 engine->emit_breadcrumb_sz += num_rings * 6;
2087 } else {
2088 engine->emit_breadcrumb_sz += num_rings * 3;
2089 if (num_rings & 1)
2090 engine->emit_breadcrumb_sz++;
2091 }
2092 }
ff44ad51
CW
2093
2094 engine->set_default_submission = i9xx_set_default_submission;
6f7bef75
CW
2095
2096 if (INTEL_GEN(dev_priv) >= 8)
803688ba 2097 engine->emit_bb_start = gen8_emit_bb_start;
6f7bef75 2098 else if (INTEL_GEN(dev_priv) >= 6)
803688ba 2099 engine->emit_bb_start = gen6_emit_bb_start;
6f7bef75 2100 else if (INTEL_GEN(dev_priv) >= 4)
803688ba 2101 engine->emit_bb_start = i965_emit_bb_start;
2a307c2e 2102 else if (IS_I830(dev_priv) || IS_I845G(dev_priv))
803688ba 2103 engine->emit_bb_start = i830_emit_bb_start;
6f7bef75 2104 else
803688ba 2105 engine->emit_bb_start = i915_emit_bb_start;
06a2fe22
TU
2106}
2107
8b3e2d36 2108int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
5c1143bb 2109{
8b3e2d36 2110 struct drm_i915_private *dev_priv = engine->i915;
3e78998a 2111 int ret;
5c1143bb 2112
06a2fe22
TU
2113 intel_ring_default_vfuncs(dev_priv, engine);
2114
61ff75ac
CW
2115 if (HAS_L3_DPF(dev_priv))
2116 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
f8973c21 2117
c033666a 2118 if (INTEL_GEN(dev_priv) >= 8) {
e2f80391 2119 engine->init_context = intel_rcs_ctx_init;
9b81d556 2120 engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
98f29e8d 2121 engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz;
c7fe7d25 2122 engine->emit_flush = gen8_render_ring_flush;
98f29e8d
CW
2123 if (i915.semaphores) {
2124 int num_rings;
2125
e2f80391 2126 engine->semaphore.signal = gen8_rcs_signal;
98f29e8d
CW
2127
2128 num_rings =
2129 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
2130 engine->emit_breadcrumb_sz += num_rings * 6;
2131 }
c033666a 2132 } else if (INTEL_GEN(dev_priv) >= 6) {
e2f80391 2133 engine->init_context = intel_rcs_ctx_init;
c7fe7d25 2134 engine->emit_flush = gen7_render_ring_flush;
c033666a 2135 if (IS_GEN6(dev_priv))
c7fe7d25 2136 engine->emit_flush = gen6_render_ring_flush;
c033666a 2137 } else if (IS_GEN5(dev_priv)) {
c7fe7d25 2138 engine->emit_flush = gen4_render_ring_flush;
59465b5f 2139 } else {
c033666a 2140 if (INTEL_GEN(dev_priv) < 4)
c7fe7d25 2141 engine->emit_flush = gen2_render_ring_flush;
46f0f8d1 2142 else
c7fe7d25 2143 engine->emit_flush = gen4_render_ring_flush;
e2f80391 2144 engine->irq_enable_mask = I915_USER_INTERRUPT;
1ec14ad3 2145 }
707d9cf9 2146
c033666a 2147 if (IS_HASWELL(dev_priv))
803688ba 2148 engine->emit_bb_start = hsw_emit_bb_start;
6f7bef75 2149
e2f80391
TU
2150 engine->init_hw = init_render_ring;
2151 engine->cleanup = render_ring_cleanup;
59465b5f 2152
acd27845 2153 ret = intel_init_ring_buffer(engine);
99be1dfe
DV
2154 if (ret)
2155 return ret;
2156
f8973c21 2157 if (INTEL_GEN(dev_priv) >= 6) {
f51455d4 2158 ret = intel_engine_create_scratch(engine, PAGE_SIZE);
7d5ea807
CW
2159 if (ret)
2160 return ret;
2161 } else if (HAS_BROKEN_CS_TLB(dev_priv)) {
56c0f1a7 2162 ret = intel_engine_create_scratch(engine, I830_WA_SIZE);
99be1dfe
DV
2163 if (ret)
2164 return ret;
2165 }
2166
2167 return 0;
5c1143bb
XH
2168}
2169
8b3e2d36 2170int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
5c1143bb 2171{
8b3e2d36 2172 struct drm_i915_private *dev_priv = engine->i915;
58fa3835 2173
06a2fe22
TU
2174 intel_ring_default_vfuncs(dev_priv, engine);
2175
c033666a 2176 if (INTEL_GEN(dev_priv) >= 6) {
0fd2c201 2177 /* gen6 bsd needs a special wa for tail updates */
c033666a 2178 if (IS_GEN6(dev_priv))
ff44ad51 2179 engine->set_default_submission = gen6_bsd_set_default_submission;
c7fe7d25 2180 engine->emit_flush = gen6_bsd_ring_flush;
c78d6061 2181 if (INTEL_GEN(dev_priv) < 8)
e2f80391 2182 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
58fa3835 2183 } else {
e2f80391 2184 engine->mmio_base = BSD_RING_BASE;
c7fe7d25 2185 engine->emit_flush = bsd_ring_flush;
8d228911 2186 if (IS_GEN5(dev_priv))
e2f80391 2187 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
8d228911 2188 else
e2f80391 2189 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
58fa3835 2190 }
58fa3835 2191
acd27845 2192 return intel_init_ring_buffer(engine);
5c1143bb 2193}
549f7365 2194
845f74a7 2195/**
62659920 2196 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
845f74a7 2197 */
8b3e2d36 2198int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
845f74a7 2199{
8b3e2d36 2200 struct drm_i915_private *dev_priv = engine->i915;
06a2fe22
TU
2201
2202 intel_ring_default_vfuncs(dev_priv, engine);
2203
c7fe7d25 2204 engine->emit_flush = gen6_bsd_ring_flush;
845f74a7 2205
acd27845 2206 return intel_init_ring_buffer(engine);
845f74a7
ZY
2207}
2208
8b3e2d36 2209int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
549f7365 2210{
8b3e2d36 2211 struct drm_i915_private *dev_priv = engine->i915;
06a2fe22
TU
2212
2213 intel_ring_default_vfuncs(dev_priv, engine);
2214
c7fe7d25 2215 engine->emit_flush = gen6_ring_flush;
c78d6061 2216 if (INTEL_GEN(dev_priv) < 8)
e2f80391 2217 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
549f7365 2218
acd27845 2219 return intel_init_ring_buffer(engine);
549f7365 2220}
a7b9761d 2221
8b3e2d36 2222int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
9a8a2213 2223{
8b3e2d36 2224 struct drm_i915_private *dev_priv = engine->i915;
06a2fe22
TU
2225
2226 intel_ring_default_vfuncs(dev_priv, engine);
2227
c7fe7d25 2228 engine->emit_flush = gen6_ring_flush;
abd58f01 2229
c78d6061 2230 if (INTEL_GEN(dev_priv) < 8) {
e2f80391 2231 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
31bb59cc
CW
2232 engine->irq_enable = hsw_vebox_irq_enable;
2233 engine->irq_disable = hsw_vebox_irq_disable;
abd58f01 2234 }
9a8a2213 2235
acd27845 2236 return intel_init_ring_buffer(engine);
9a8a2213 2237}