]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/gpu/drm/i915/intel_ringbuffer.c
drm/i915: Update ring->flush() to take a requests structure
[mirror_ubuntu-focal-kernel.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
CommitLineData
62fdfeaf
EA
1/*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
760285e7 30#include <drm/drmP.h>
62fdfeaf 31#include "i915_drv.h"
760285e7 32#include <drm/i915_drm.h>
62fdfeaf 33#include "i915_trace.h"
881f47b6 34#include "intel_drv.h"
62fdfeaf 35
48d82387
OM
36bool
37intel_ring_initialized(struct intel_engine_cs *ring)
38{
39 struct drm_device *dev = ring->dev;
40
41 if (!dev)
42 return false;
43
44 if (i915.enable_execlists) {
45 struct intel_context *dctx = ring->default_context;
46 struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;
47
48 return ringbuf->obj;
49 } else
50 return ring->buffer && ring->buffer->obj;
51}
18393f63 52
82e104cc 53int __intel_ring_space(int head, int tail, int size)
c7dca47b 54{
4f54741e
DG
55 int space = head - tail;
56 if (space <= 0)
1cf0ba14 57 space += size;
4f54741e 58 return space - I915_RING_FREE_SPACE;
c7dca47b
CW
59}
60
ebd0fd4b
DG
61void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
62{
63 if (ringbuf->last_retired_head != -1) {
64 ringbuf->head = ringbuf->last_retired_head;
65 ringbuf->last_retired_head = -1;
66 }
67
68 ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
69 ringbuf->tail, ringbuf->size);
70}
71
82e104cc 72int intel_ring_space(struct intel_ringbuffer *ringbuf)
1cf0ba14 73{
ebd0fd4b
DG
74 intel_ring_update_space(ringbuf);
75 return ringbuf->space;
1cf0ba14
CW
76}
77
82e104cc 78bool intel_ring_stopped(struct intel_engine_cs *ring)
09246732
CW
79{
80 struct drm_i915_private *dev_priv = ring->dev->dev_private;
88b4aa87
MK
81 return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
82}
09246732 83
6258fbe2 84static void __intel_ring_advance(struct intel_engine_cs *ring)
88b4aa87 85{
93b0a4e0
OM
86 struct intel_ringbuffer *ringbuf = ring->buffer;
87 ringbuf->tail &= ringbuf->size - 1;
88b4aa87 88 if (intel_ring_stopped(ring))
09246732 89 return;
93b0a4e0 90 ring->write_tail(ring, ringbuf->tail);
09246732
CW
91}
92
b72f3acb 93static int
a84c3ae1 94gen2_render_ring_flush(struct drm_i915_gem_request *req,
46f0f8d1
CW
95 u32 invalidate_domains,
96 u32 flush_domains)
97{
a84c3ae1 98 struct intel_engine_cs *ring = req->ring;
46f0f8d1
CW
99 u32 cmd;
100 int ret;
101
102 cmd = MI_FLUSH;
31b14c9f 103 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
46f0f8d1
CW
104 cmd |= MI_NO_WRITE_FLUSH;
105
106 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
107 cmd |= MI_READ_FLUSH;
108
109 ret = intel_ring_begin(ring, 2);
110 if (ret)
111 return ret;
112
113 intel_ring_emit(ring, cmd);
114 intel_ring_emit(ring, MI_NOOP);
115 intel_ring_advance(ring);
116
117 return 0;
118}
119
120static int
a84c3ae1 121gen4_render_ring_flush(struct drm_i915_gem_request *req,
46f0f8d1
CW
122 u32 invalidate_domains,
123 u32 flush_domains)
62fdfeaf 124{
a84c3ae1 125 struct intel_engine_cs *ring = req->ring;
78501eac 126 struct drm_device *dev = ring->dev;
6f392d54 127 u32 cmd;
b72f3acb 128 int ret;
6f392d54 129
36d527de
CW
130 /*
131 * read/write caches:
132 *
133 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
134 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
135 * also flushed at 2d versus 3d pipeline switches.
136 *
137 * read-only caches:
138 *
139 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
140 * MI_READ_FLUSH is set, and is always flushed on 965.
141 *
142 * I915_GEM_DOMAIN_COMMAND may not exist?
143 *
144 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
145 * invalidated when MI_EXE_FLUSH is set.
146 *
147 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
148 * invalidated with every MI_FLUSH.
149 *
150 * TLBs:
151 *
152 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
153 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
154 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
155 * are flushed at any MI_FLUSH.
156 */
157
158 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
46f0f8d1 159 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
36d527de 160 cmd &= ~MI_NO_WRITE_FLUSH;
36d527de
CW
161 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
162 cmd |= MI_EXE_FLUSH;
62fdfeaf 163
36d527de
CW
164 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
165 (IS_G4X(dev) || IS_GEN5(dev)))
166 cmd |= MI_INVALIDATE_ISP;
70eac33e 167
36d527de
CW
168 ret = intel_ring_begin(ring, 2);
169 if (ret)
170 return ret;
b72f3acb 171
36d527de
CW
172 intel_ring_emit(ring, cmd);
173 intel_ring_emit(ring, MI_NOOP);
174 intel_ring_advance(ring);
b72f3acb
CW
175
176 return 0;
8187a2b7
ZN
177}
178
8d315287
JB
179/**
180 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
181 * implementing two workarounds on gen6. From section 1.4.7.1
182 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
183 *
184 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
185 * produced by non-pipelined state commands), software needs to first
186 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
187 * 0.
188 *
189 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
190 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
191 *
192 * And the workaround for these two requires this workaround first:
193 *
194 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
195 * BEFORE the pipe-control with a post-sync op and no write-cache
196 * flushes.
197 *
198 * And this last workaround is tricky because of the requirements on
199 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
200 * volume 2 part 1:
201 *
202 * "1 of the following must also be set:
203 * - Render Target Cache Flush Enable ([12] of DW1)
204 * - Depth Cache Flush Enable ([0] of DW1)
205 * - Stall at Pixel Scoreboard ([1] of DW1)
206 * - Depth Stall ([13] of DW1)
207 * - Post-Sync Operation ([13] of DW1)
208 * - Notify Enable ([8] of DW1)"
209 *
210 * The cache flushes require the workaround flush that triggered this
211 * one, so we can't use it. Depth stall would trigger the same.
212 * Post-sync nonzero is what triggered this second workaround, so we
213 * can't use that one either. Notify enable is IRQs, which aren't
214 * really our business. That leaves only stall at scoreboard.
215 */
216static int
a4872ba6 217intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
8d315287 218{
18393f63 219 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
8d315287
JB
220 int ret;
221
222
223 ret = intel_ring_begin(ring, 6);
224 if (ret)
225 return ret;
226
227 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
228 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
229 PIPE_CONTROL_STALL_AT_SCOREBOARD);
230 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
231 intel_ring_emit(ring, 0); /* low dword */
232 intel_ring_emit(ring, 0); /* high dword */
233 intel_ring_emit(ring, MI_NOOP);
234 intel_ring_advance(ring);
235
236 ret = intel_ring_begin(ring, 6);
237 if (ret)
238 return ret;
239
240 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
242 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
243 intel_ring_emit(ring, 0);
244 intel_ring_emit(ring, 0);
245 intel_ring_emit(ring, MI_NOOP);
246 intel_ring_advance(ring);
247
248 return 0;
249}
250
251static int
a84c3ae1
JH
252gen6_render_ring_flush(struct drm_i915_gem_request *req,
253 u32 invalidate_domains, u32 flush_domains)
8d315287 254{
a84c3ae1 255 struct intel_engine_cs *ring = req->ring;
8d315287 256 u32 flags = 0;
18393f63 257 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
8d315287
JB
258 int ret;
259
b3111509
PZ
260 /* Force SNB workarounds for PIPE_CONTROL flushes */
261 ret = intel_emit_post_sync_nonzero_flush(ring);
262 if (ret)
263 return ret;
264
8d315287
JB
265 /* Just flush everything. Experiments have shown that reducing the
266 * number of bits based on the write domains has little performance
267 * impact.
268 */
7d54a904
CW
269 if (flush_domains) {
270 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
271 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
272 /*
273 * Ensure that any following seqno writes only happen
274 * when the render cache is indeed flushed.
275 */
97f209bc 276 flags |= PIPE_CONTROL_CS_STALL;
7d54a904
CW
277 }
278 if (invalidate_domains) {
279 flags |= PIPE_CONTROL_TLB_INVALIDATE;
280 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
281 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
282 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
283 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
284 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
285 /*
286 * TLB invalidate requires a post-sync write.
287 */
3ac78313 288 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
7d54a904 289 }
8d315287 290
6c6cf5aa 291 ret = intel_ring_begin(ring, 4);
8d315287
JB
292 if (ret)
293 return ret;
294
6c6cf5aa 295 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
8d315287
JB
296 intel_ring_emit(ring, flags);
297 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
6c6cf5aa 298 intel_ring_emit(ring, 0);
8d315287
JB
299 intel_ring_advance(ring);
300
301 return 0;
302}
303
f3987631 304static int
a4872ba6 305gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
f3987631
PZ
306{
307 int ret;
308
309 ret = intel_ring_begin(ring, 4);
310 if (ret)
311 return ret;
312
313 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
314 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
315 PIPE_CONTROL_STALL_AT_SCOREBOARD);
316 intel_ring_emit(ring, 0);
317 intel_ring_emit(ring, 0);
318 intel_ring_advance(ring);
319
320 return 0;
321}
322
4772eaeb 323static int
a84c3ae1 324gen7_render_ring_flush(struct drm_i915_gem_request *req,
4772eaeb
PZ
325 u32 invalidate_domains, u32 flush_domains)
326{
a84c3ae1 327 struct intel_engine_cs *ring = req->ring;
4772eaeb 328 u32 flags = 0;
18393f63 329 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
4772eaeb
PZ
330 int ret;
331
f3987631
PZ
332 /*
333 * Ensure that any following seqno writes only happen when the render
334 * cache is indeed flushed.
335 *
336 * Workaround: 4th PIPE_CONTROL command (except the ones with only
337 * read-cache invalidate bits set) must have the CS_STALL bit set. We
338 * don't try to be clever and just set it unconditionally.
339 */
340 flags |= PIPE_CONTROL_CS_STALL;
341
4772eaeb
PZ
342 /* Just flush everything. Experiments have shown that reducing the
343 * number of bits based on the write domains has little performance
344 * impact.
345 */
346 if (flush_domains) {
347 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
348 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4772eaeb
PZ
349 }
350 if (invalidate_domains) {
351 flags |= PIPE_CONTROL_TLB_INVALIDATE;
352 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
353 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
354 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
355 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
356 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
148b83d0 357 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
4772eaeb
PZ
358 /*
359 * TLB invalidate requires a post-sync write.
360 */
361 flags |= PIPE_CONTROL_QW_WRITE;
b9e1faa7 362 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
f3987631 363
add284a3
CW
364 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
365
f3987631
PZ
366 /* Workaround: we must issue a pipe_control with CS-stall bit
367 * set before a pipe_control command that has the state cache
368 * invalidate bit set. */
369 gen7_render_ring_cs_stall_wa(ring);
4772eaeb
PZ
370 }
371
372 ret = intel_ring_begin(ring, 4);
373 if (ret)
374 return ret;
375
376 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
377 intel_ring_emit(ring, flags);
b9e1faa7 378 intel_ring_emit(ring, scratch_addr);
4772eaeb
PZ
379 intel_ring_emit(ring, 0);
380 intel_ring_advance(ring);
381
382 return 0;
383}
384
884ceace
KG
385static int
386gen8_emit_pipe_control(struct intel_engine_cs *ring,
387 u32 flags, u32 scratch_addr)
388{
389 int ret;
390
391 ret = intel_ring_begin(ring, 6);
392 if (ret)
393 return ret;
394
395 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
396 intel_ring_emit(ring, flags);
397 intel_ring_emit(ring, scratch_addr);
398 intel_ring_emit(ring, 0);
399 intel_ring_emit(ring, 0);
400 intel_ring_emit(ring, 0);
401 intel_ring_advance(ring);
402
403 return 0;
404}
405
a5f3d68e 406static int
a84c3ae1 407gen8_render_ring_flush(struct drm_i915_gem_request *req,
a5f3d68e
BW
408 u32 invalidate_domains, u32 flush_domains)
409{
a84c3ae1 410 struct intel_engine_cs *ring = req->ring;
a5f3d68e 411 u32 flags = 0;
18393f63 412 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
02c9f7e3 413 int ret;
a5f3d68e
BW
414
415 flags |= PIPE_CONTROL_CS_STALL;
416
417 if (flush_domains) {
418 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
419 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
420 }
421 if (invalidate_domains) {
422 flags |= PIPE_CONTROL_TLB_INVALIDATE;
423 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
424 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
425 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
426 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
427 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
428 flags |= PIPE_CONTROL_QW_WRITE;
429 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
02c9f7e3
KG
430
431 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
432 ret = gen8_emit_pipe_control(ring,
433 PIPE_CONTROL_CS_STALL |
434 PIPE_CONTROL_STALL_AT_SCOREBOARD,
435 0);
436 if (ret)
437 return ret;
a5f3d68e
BW
438 }
439
6e0b3f8d 440 return gen8_emit_pipe_control(ring, flags, scratch_addr);
a5f3d68e
BW
441}
442
a4872ba6 443static void ring_write_tail(struct intel_engine_cs *ring,
297b0c5b 444 u32 value)
d46eefa2 445{
4640c4ff 446 struct drm_i915_private *dev_priv = ring->dev->dev_private;
297b0c5b 447 I915_WRITE_TAIL(ring, value);
d46eefa2
XH
448}
449
a4872ba6 450u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
8187a2b7 451{
4640c4ff 452 struct drm_i915_private *dev_priv = ring->dev->dev_private;
50877445 453 u64 acthd;
8187a2b7 454
50877445
CW
455 if (INTEL_INFO(ring->dev)->gen >= 8)
456 acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
457 RING_ACTHD_UDW(ring->mmio_base));
458 else if (INTEL_INFO(ring->dev)->gen >= 4)
459 acthd = I915_READ(RING_ACTHD(ring->mmio_base));
460 else
461 acthd = I915_READ(ACTHD);
462
463 return acthd;
8187a2b7
ZN
464}
465
a4872ba6 466static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
035dc1e0
DV
467{
468 struct drm_i915_private *dev_priv = ring->dev->dev_private;
469 u32 addr;
470
471 addr = dev_priv->status_page_dmah->busaddr;
472 if (INTEL_INFO(ring->dev)->gen >= 4)
473 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
474 I915_WRITE(HWS_PGA, addr);
475}
476
af75f269
DL
477static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
478{
479 struct drm_device *dev = ring->dev;
480 struct drm_i915_private *dev_priv = ring->dev->dev_private;
481 u32 mmio = 0;
482
483 /* The ring status page addresses are no longer next to the rest of
484 * the ring registers as of gen7.
485 */
486 if (IS_GEN7(dev)) {
487 switch (ring->id) {
488 case RCS:
489 mmio = RENDER_HWS_PGA_GEN7;
490 break;
491 case BCS:
492 mmio = BLT_HWS_PGA_GEN7;
493 break;
494 /*
495 * VCS2 actually doesn't exist on Gen7. Only shut up
496 * gcc switch check warning
497 */
498 case VCS2:
499 case VCS:
500 mmio = BSD_HWS_PGA_GEN7;
501 break;
502 case VECS:
503 mmio = VEBOX_HWS_PGA_GEN7;
504 break;
505 }
506 } else if (IS_GEN6(ring->dev)) {
507 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
508 } else {
509 /* XXX: gen8 returns to sanity */
510 mmio = RING_HWS_PGA(ring->mmio_base);
511 }
512
513 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
514 POSTING_READ(mmio);
515
516 /*
517 * Flush the TLB for this page
518 *
519 * FIXME: These two bits have disappeared on gen8, so a question
520 * arises: do we still need this and if so how should we go about
521 * invalidating the TLB?
522 */
523 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
524 u32 reg = RING_INSTPM(ring->mmio_base);
525
526 /* ring should be idle before issuing a sync flush*/
527 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
528
529 I915_WRITE(reg,
530 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
531 INSTPM_SYNC_FLUSH));
532 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
533 1000))
534 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
535 ring->name);
536 }
537}
538
a4872ba6 539static bool stop_ring(struct intel_engine_cs *ring)
8187a2b7 540{
9991ae78 541 struct drm_i915_private *dev_priv = to_i915(ring->dev);
8187a2b7 542
9991ae78
CW
543 if (!IS_GEN2(ring->dev)) {
544 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
403bdd10
DV
545 if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
546 DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
9bec9b13
CW
547 /* Sometimes we observe that the idle flag is not
548 * set even though the ring is empty. So double
549 * check before giving up.
550 */
551 if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
552 return false;
9991ae78
CW
553 }
554 }
b7884eb4 555
7f2ab699 556 I915_WRITE_CTL(ring, 0);
570ef608 557 I915_WRITE_HEAD(ring, 0);
78501eac 558 ring->write_tail(ring, 0);
8187a2b7 559
9991ae78
CW
560 if (!IS_GEN2(ring->dev)) {
561 (void)I915_READ_CTL(ring);
562 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
563 }
a51435a3 564
9991ae78
CW
565 return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
566}
8187a2b7 567
a4872ba6 568static int init_ring_common(struct intel_engine_cs *ring)
9991ae78
CW
569{
570 struct drm_device *dev = ring->dev;
571 struct drm_i915_private *dev_priv = dev->dev_private;
93b0a4e0
OM
572 struct intel_ringbuffer *ringbuf = ring->buffer;
573 struct drm_i915_gem_object *obj = ringbuf->obj;
9991ae78
CW
574 int ret = 0;
575
59bad947 576 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
9991ae78
CW
577
578 if (!stop_ring(ring)) {
579 /* G45 ring initialization often fails to reset head to zero */
6fd0d56e
CW
580 DRM_DEBUG_KMS("%s head not reset to zero "
581 "ctl %08x head %08x tail %08x start %08x\n",
582 ring->name,
583 I915_READ_CTL(ring),
584 I915_READ_HEAD(ring),
585 I915_READ_TAIL(ring),
586 I915_READ_START(ring));
8187a2b7 587
9991ae78 588 if (!stop_ring(ring)) {
6fd0d56e
CW
589 DRM_ERROR("failed to set %s head to zero "
590 "ctl %08x head %08x tail %08x start %08x\n",
591 ring->name,
592 I915_READ_CTL(ring),
593 I915_READ_HEAD(ring),
594 I915_READ_TAIL(ring),
595 I915_READ_START(ring));
9991ae78
CW
596 ret = -EIO;
597 goto out;
6fd0d56e 598 }
8187a2b7
ZN
599 }
600
9991ae78
CW
601 if (I915_NEED_GFX_HWS(dev))
602 intel_ring_setup_status_page(ring);
603 else
604 ring_setup_phys_status_page(ring);
605
ece4a17d
JK
606 /* Enforce ordering by reading HEAD register back */
607 I915_READ_HEAD(ring);
608
0d8957c8
DV
609 /* Initialize the ring. This must happen _after_ we've cleared the ring
610 * registers with the above sequence (the readback of the HEAD registers
611 * also enforces ordering), otherwise the hw might lose the new ring
612 * register values. */
f343c5f6 613 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
95468892
CW
614
615 /* WaClearRingBufHeadRegAtInit:ctg,elk */
616 if (I915_READ_HEAD(ring))
617 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
618 ring->name, I915_READ_HEAD(ring));
619 I915_WRITE_HEAD(ring, 0);
620 (void)I915_READ_HEAD(ring);
621
7f2ab699 622 I915_WRITE_CTL(ring,
93b0a4e0 623 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
5d031e5b 624 | RING_VALID);
8187a2b7 625
8187a2b7 626 /* If the head is still not zero, the ring is dead */
f01db988 627 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
f343c5f6 628 I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
f01db988 629 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
e74cfed5 630 DRM_ERROR("%s initialization failed "
48e48a0b
CW
631 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
632 ring->name,
633 I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
634 I915_READ_HEAD(ring), I915_READ_TAIL(ring),
635 I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
b7884eb4
DV
636 ret = -EIO;
637 goto out;
8187a2b7
ZN
638 }
639
ebd0fd4b 640 ringbuf->last_retired_head = -1;
5c6c6003
CW
641 ringbuf->head = I915_READ_HEAD(ring);
642 ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
ebd0fd4b 643 intel_ring_update_space(ringbuf);
1ec14ad3 644
50f018df
CW
645 memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
646
b7884eb4 647out:
59bad947 648 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
b7884eb4
DV
649
650 return ret;
8187a2b7
ZN
651}
652
9b1136d5
OM
653void
654intel_fini_pipe_control(struct intel_engine_cs *ring)
655{
656 struct drm_device *dev = ring->dev;
657
658 if (ring->scratch.obj == NULL)
659 return;
660
661 if (INTEL_INFO(dev)->gen >= 5) {
662 kunmap(sg_page(ring->scratch.obj->pages->sgl));
663 i915_gem_object_ggtt_unpin(ring->scratch.obj);
664 }
665
666 drm_gem_object_unreference(&ring->scratch.obj->base);
667 ring->scratch.obj = NULL;
668}
669
670int
671intel_init_pipe_control(struct intel_engine_cs *ring)
c6df541c 672{
c6df541c
CW
673 int ret;
674
bfc882b4 675 WARN_ON(ring->scratch.obj);
c6df541c 676
0d1aacac
CW
677 ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
678 if (ring->scratch.obj == NULL) {
c6df541c
CW
679 DRM_ERROR("Failed to allocate seqno page\n");
680 ret = -ENOMEM;
681 goto err;
682 }
e4ffd173 683
a9cc726c
DV
684 ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
685 if (ret)
686 goto err_unref;
c6df541c 687
1ec9e26d 688 ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
c6df541c
CW
689 if (ret)
690 goto err_unref;
691
0d1aacac
CW
692 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
693 ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
694 if (ring->scratch.cpu_page == NULL) {
56b085a0 695 ret = -ENOMEM;
c6df541c 696 goto err_unpin;
56b085a0 697 }
c6df541c 698
2b1086cc 699 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
0d1aacac 700 ring->name, ring->scratch.gtt_offset);
c6df541c
CW
701 return 0;
702
703err_unpin:
d7f46fc4 704 i915_gem_object_ggtt_unpin(ring->scratch.obj);
c6df541c 705err_unref:
0d1aacac 706 drm_gem_object_unreference(&ring->scratch.obj->base);
c6df541c 707err:
c6df541c
CW
708 return ret;
709}
710
e2be4faf 711static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
86d7f238 712{
7225342a 713 int ret, i;
e2be4faf 714 struct intel_engine_cs *ring = req->ring;
888b5995
AS
715 struct drm_device *dev = ring->dev;
716 struct drm_i915_private *dev_priv = dev->dev_private;
7225342a 717 struct i915_workarounds *w = &dev_priv->workarounds;
888b5995 718
e6c1abb7 719 if (WARN_ON_ONCE(w->count == 0))
7225342a 720 return 0;
888b5995 721
7225342a 722 ring->gpu_caches_dirty = true;
4866d729 723 ret = intel_ring_flush_all_caches(req);
7225342a
MK
724 if (ret)
725 return ret;
888b5995 726
22a916aa 727 ret = intel_ring_begin(ring, (w->count * 2 + 2));
7225342a
MK
728 if (ret)
729 return ret;
730
22a916aa 731 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
7225342a 732 for (i = 0; i < w->count; i++) {
7225342a
MK
733 intel_ring_emit(ring, w->reg[i].addr);
734 intel_ring_emit(ring, w->reg[i].value);
735 }
22a916aa 736 intel_ring_emit(ring, MI_NOOP);
7225342a
MK
737
738 intel_ring_advance(ring);
739
740 ring->gpu_caches_dirty = true;
4866d729 741 ret = intel_ring_flush_all_caches(req);
7225342a
MK
742 if (ret)
743 return ret;
888b5995 744
7225342a 745 DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
888b5995 746
7225342a 747 return 0;
86d7f238
AS
748}
749
8753181e 750static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
8f0e2b9d
DV
751{
752 int ret;
753
e2be4faf 754 ret = intel_ring_workarounds_emit(req);
8f0e2b9d
DV
755 if (ret != 0)
756 return ret;
757
be01363f 758 ret = i915_gem_render_state_init(req);
8f0e2b9d
DV
759 if (ret)
760 DRM_ERROR("init render state: %d\n", ret);
761
762 return ret;
763}
764
7225342a 765static int wa_add(struct drm_i915_private *dev_priv,
cf4b0de6 766 const u32 addr, const u32 mask, const u32 val)
7225342a
MK
767{
768 const u32 idx = dev_priv->workarounds.count;
769
770 if (WARN_ON(idx >= I915_MAX_WA_REGS))
771 return -ENOSPC;
772
773 dev_priv->workarounds.reg[idx].addr = addr;
774 dev_priv->workarounds.reg[idx].value = val;
775 dev_priv->workarounds.reg[idx].mask = mask;
776
777 dev_priv->workarounds.count++;
778
779 return 0;
86d7f238
AS
780}
781
cf4b0de6
DL
782#define WA_REG(addr, mask, val) { \
783 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
7225342a
MK
784 if (r) \
785 return r; \
786 }
787
788#define WA_SET_BIT_MASKED(addr, mask) \
26459343 789 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
7225342a
MK
790
791#define WA_CLR_BIT_MASKED(addr, mask) \
26459343 792 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
7225342a 793
98533251 794#define WA_SET_FIELD_MASKED(addr, mask, value) \
cf4b0de6 795 WA_REG(addr, mask, _MASKED_FIELD(mask, value))
7225342a 796
cf4b0de6
DL
797#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
798#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
7225342a 799
cf4b0de6 800#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
7225342a 801
00e1e623 802static int bdw_init_workarounds(struct intel_engine_cs *ring)
86d7f238 803{
888b5995
AS
804 struct drm_device *dev = ring->dev;
805 struct drm_i915_private *dev_priv = dev->dev_private;
86d7f238 806
9cc83020
VS
807 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
808
2441f877
VS
809 /* WaDisableAsyncFlipPerfMode:bdw */
810 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
811
86d7f238 812 /* WaDisablePartialInstShootdown:bdw */
101b376d 813 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
7225342a
MK
814 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
815 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
816 STALL_DOP_GATING_DISABLE);
86d7f238 817
101b376d 818 /* WaDisableDopClockGating:bdw */
7225342a
MK
819 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
820 DOP_CLOCK_GATING_DISABLE);
86d7f238 821
7225342a
MK
822 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
823 GEN8_SAMPLER_POWER_BYPASS_DIS);
86d7f238
AS
824
825 /* Use Force Non-Coherent whenever executing a 3D context. This is a
826 * workaround for for a possible hang in the unlikely event a TLB
827 * invalidation occurs during a PSD flush.
828 */
7225342a 829 WA_SET_BIT_MASKED(HDC_CHICKEN0,
35cb6f3b 830 /* WaForceEnableNonCoherent:bdw */
7225342a 831 HDC_FORCE_NON_COHERENT |
35cb6f3b
DL
832 /* WaForceContextSaveRestoreNonCoherent:bdw */
833 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
834 /* WaHdcDisableFetchWhenMasked:bdw */
f3f32360 835 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
35cb6f3b 836 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
7225342a 837 (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
86d7f238 838
2701fc43
KG
839 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
840 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
841 * polygons in the same 8x4 pixel/sample area to be processed without
842 * stalling waiting for the earlier ones to write to Hierarchical Z
843 * buffer."
844 *
845 * This optimization is off by default for Broadwell; turn it on.
846 */
847 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
848
86d7f238 849 /* Wa4x4STCOptimizationDisable:bdw */
7225342a
MK
850 WA_SET_BIT_MASKED(CACHE_MODE_1,
851 GEN8_4x4_STC_OPTIMIZATION_DISABLE);
86d7f238
AS
852
853 /*
854 * BSpec recommends 8x4 when MSAA is used,
855 * however in practice 16x4 seems fastest.
856 *
857 * Note that PS/WM thread counts depend on the WIZ hashing
858 * disable bit, which we don't touch here, but it's good
859 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
860 */
98533251
DL
861 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
862 GEN6_WIZ_HASHING_MASK,
863 GEN6_WIZ_HASHING_16x4);
888b5995 864
86d7f238
AS
865 return 0;
866}
867
00e1e623
VS
868static int chv_init_workarounds(struct intel_engine_cs *ring)
869{
00e1e623
VS
870 struct drm_device *dev = ring->dev;
871 struct drm_i915_private *dev_priv = dev->dev_private;
872
9cc83020
VS
873 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
874
2441f877
VS
875 /* WaDisableAsyncFlipPerfMode:chv */
876 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
877
00e1e623 878 /* WaDisablePartialInstShootdown:chv */
00e1e623 879 /* WaDisableThreadStallDopClockGating:chv */
7225342a 880 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
605f1433
AS
881 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
882 STALL_DOP_GATING_DISABLE);
00e1e623 883
95289009
AS
884 /* Use Force Non-Coherent whenever executing a 3D context. This is a
885 * workaround for a possible hang in the unlikely event a TLB
886 * invalidation occurs during a PSD flush.
887 */
888 /* WaForceEnableNonCoherent:chv */
889 /* WaHdcDisableFetchWhenMasked:chv */
890 WA_SET_BIT_MASKED(HDC_CHICKEN0,
891 HDC_FORCE_NON_COHERENT |
892 HDC_DONOT_FETCH_MEM_WHEN_MASKED);
893
973a5b06
KG
894 /* According to the CACHE_MODE_0 default value documentation, some
895 * CHV platforms disable this optimization by default. Turn it on.
896 */
897 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
898
14bc16e3
VS
899 /* Wa4x4STCOptimizationDisable:chv */
900 WA_SET_BIT_MASKED(CACHE_MODE_1,
901 GEN8_4x4_STC_OPTIMIZATION_DISABLE);
902
d60de81d
KG
903 /* Improve HiZ throughput on CHV. */
904 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
905
e7fc2436
VS
906 /*
907 * BSpec recommends 8x4 when MSAA is used,
908 * however in practice 16x4 seems fastest.
909 *
910 * Note that PS/WM thread counts depend on the WIZ hashing
911 * disable bit, which we don't touch here, but it's good
912 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
913 */
914 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
915 GEN6_WIZ_HASHING_MASK,
916 GEN6_WIZ_HASHING_16x4);
917
7225342a
MK
918 return 0;
919}
920
3b106531
HN
921static int gen9_init_workarounds(struct intel_engine_cs *ring)
922{
ab0dfafe
HN
923 struct drm_device *dev = ring->dev;
924 struct drm_i915_private *dev_priv = dev->dev_private;
8ea6f892 925 uint32_t tmp;
ab0dfafe 926
b0e6f6d4 927 /* WaDisablePartialInstShootdown:skl,bxt */
ab0dfafe
HN
928 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
929 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
930
a119a6e6 931 /* Syncing dependencies between camera and graphics:skl,bxt */
8424171e
NH
932 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
933 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
934
d2a31dbd
NH
935 if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
936 INTEL_REVID(dev) == SKL_REVID_B0)) ||
937 (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
938 /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
a86eb582
DL
939 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
940 GEN9_DG_MIRROR_FIX_ENABLE);
1de4582f
NH
941 }
942
a13d215f
NH
943 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
944 (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
945 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
183c6dac
DL
946 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
947 GEN9_RHWO_OPTIMIZATION_DISABLE);
948 WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0,
949 DISABLE_PIXEL_MASK_CAMMING);
950 }
951
27a1b688
NH
952 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
953 IS_BROXTON(dev)) {
954 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
cac23df4
NH
955 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
956 GEN9_ENABLE_YV12_BUGFIX);
957 }
958
5068368c 959 /* Wa4x4STCOptimizationDisable:skl,bxt */
1840481f
HN
960 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
961
27160c96 962 /* WaDisablePartialResolveInVc:skl,bxt */
9370cd98
DL
963 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
964
16be17af 965 /* WaCcsTlbPrefetchDisable:skl,bxt */
e2db7071
DL
966 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
967 GEN9_CCS_TLB_PREFETCH_ENABLE);
968
5a2ae95e
ID
969 /* WaDisableMaskBasedCammingInRCC:skl,bxt */
970 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
971 (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
38a39a7b
BW
972 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
973 PIXEL_MASK_CAMMING_DISABLE);
974
8ea6f892
ID
975 /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
976 tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
977 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
978 (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
979 tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
980 WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
981
3b106531
HN
982 return 0;
983}
984
b7668791
DL
985static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
986{
987 struct drm_device *dev = ring->dev;
988 struct drm_i915_private *dev_priv = dev->dev_private;
989 u8 vals[3] = { 0, 0, 0 };
990 unsigned int i;
991
992 for (i = 0; i < 3; i++) {
993 u8 ss;
994
995 /*
996 * Only consider slices where one, and only one, subslice has 7
997 * EUs
998 */
999 if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
1000 continue;
1001
1002 /*
1003 * subslice_7eu[i] != 0 (because of the check above) and
1004 * ss_max == 4 (maximum number of subslices possible per slice)
1005 *
1006 * -> 0 <= ss <= 3;
1007 */
1008 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1009 vals[i] = 3 - ss;
1010 }
1011
1012 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1013 return 0;
1014
1015 /* Tune IZ hashing. See intel_device_info_runtime_init() */
1016 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1017 GEN9_IZ_HASHING_MASK(2) |
1018 GEN9_IZ_HASHING_MASK(1) |
1019 GEN9_IZ_HASHING_MASK(0),
1020 GEN9_IZ_HASHING(2, vals[2]) |
1021 GEN9_IZ_HASHING(1, vals[1]) |
1022 GEN9_IZ_HASHING(0, vals[0]));
1023
1024 return 0;
1025}
1026
1027
8d205494
DL
1028static int skl_init_workarounds(struct intel_engine_cs *ring)
1029{
d0bbbc4f
DL
1030 struct drm_device *dev = ring->dev;
1031 struct drm_i915_private *dev_priv = dev->dev_private;
1032
8d205494
DL
1033 gen9_init_workarounds(ring);
1034
d0bbbc4f
DL
1035 /* WaDisablePowerCompilerClockGating:skl */
1036 if (INTEL_REVID(dev) == SKL_REVID_B0)
1037 WA_SET_BIT_MASKED(HIZ_CHICKEN,
1038 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1039
b62adbd1
NH
1040 if (INTEL_REVID(dev) <= SKL_REVID_D0) {
1041 /*
1042 *Use Force Non-Coherent whenever executing a 3D context. This
1043 * is a workaround for a possible hang in the unlikely event
1044 * a TLB invalidation occurs during a PSD flush.
1045 */
1046 /* WaForceEnableNonCoherent:skl */
1047 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1048 HDC_FORCE_NON_COHERENT);
1049 }
1050
5b6fd12a
VS
1051 if (INTEL_REVID(dev) == SKL_REVID_C0 ||
1052 INTEL_REVID(dev) == SKL_REVID_D0)
1053 /* WaBarrierPerformanceFixDisable:skl */
1054 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1055 HDC_FENCE_DEST_SLM_DISABLE |
1056 HDC_BARRIER_PERFORMANCE_DISABLE);
1057
b7668791 1058 return skl_tune_iz_hashing(ring);
7225342a
MK
1059}
1060
cae0437f
NH
1061static int bxt_init_workarounds(struct intel_engine_cs *ring)
1062{
dfb601e6
NH
1063 struct drm_device *dev = ring->dev;
1064 struct drm_i915_private *dev_priv = dev->dev_private;
1065
cae0437f
NH
1066 gen9_init_workarounds(ring);
1067
dfb601e6
NH
1068 /* WaDisableThreadStallDopClockGating:bxt */
1069 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1070 STALL_DOP_GATING_DISABLE);
1071
983b4b9d
NH
1072 /* WaDisableSbeCacheDispatchPortSharing:bxt */
1073 if (INTEL_REVID(dev) <= BXT_REVID_B0) {
1074 WA_SET_BIT_MASKED(
1075 GEN7_HALF_SLICE_CHICKEN1,
1076 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1077 }
1078
cae0437f
NH
1079 return 0;
1080}
1081
771b9a53 1082int init_workarounds_ring(struct intel_engine_cs *ring)
7225342a
MK
1083{
1084 struct drm_device *dev = ring->dev;
1085 struct drm_i915_private *dev_priv = dev->dev_private;
1086
1087 WARN_ON(ring->id != RCS);
1088
1089 dev_priv->workarounds.count = 0;
1090
1091 if (IS_BROADWELL(dev))
1092 return bdw_init_workarounds(ring);
1093
1094 if (IS_CHERRYVIEW(dev))
1095 return chv_init_workarounds(ring);
00e1e623 1096
8d205494
DL
1097 if (IS_SKYLAKE(dev))
1098 return skl_init_workarounds(ring);
cae0437f
NH
1099
1100 if (IS_BROXTON(dev))
1101 return bxt_init_workarounds(ring);
3b106531 1102
00e1e623
VS
1103 return 0;
1104}
1105
a4872ba6 1106static int init_render_ring(struct intel_engine_cs *ring)
8187a2b7 1107{
78501eac 1108 struct drm_device *dev = ring->dev;
1ec14ad3 1109 struct drm_i915_private *dev_priv = dev->dev_private;
78501eac 1110 int ret = init_ring_common(ring);
9c33baa6
KZ
1111 if (ret)
1112 return ret;
a69ffdbf 1113
61a563a2
AG
1114 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1115 if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
6b26c86d 1116 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1c8c38c5
CW
1117
1118 /* We need to disable the AsyncFlip performance optimisations in order
1119 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1120 * programmed to '1' on all products.
8693a824 1121 *
2441f877 1122 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1c8c38c5 1123 */
2441f877 1124 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1c8c38c5
CW
1125 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1126
f05bb0c7 1127 /* Required for the hardware to program scanline values for waiting */
01fa0302 1128 /* WaEnableFlushTlbInvalidationMode:snb */
f05bb0c7
CW
1129 if (INTEL_INFO(dev)->gen == 6)
1130 I915_WRITE(GFX_MODE,
aa83e30d 1131 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
f05bb0c7 1132
01fa0302 1133 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1c8c38c5
CW
1134 if (IS_GEN7(dev))
1135 I915_WRITE(GFX_MODE_GEN7,
01fa0302 1136 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1c8c38c5 1137 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
78501eac 1138
5e13a0c5 1139 if (IS_GEN6(dev)) {
3a69ddd6
KG
1140 /* From the Sandybridge PRM, volume 1 part 3, page 24:
1141 * "If this bit is set, STCunit will have LRA as replacement
1142 * policy. [...] This bit must be reset. LRA replacement
1143 * policy is not supported."
1144 */
1145 I915_WRITE(CACHE_MODE_0,
5e13a0c5 1146 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
84f9f938
BW
1147 }
1148
9cc83020 1149 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
6b26c86d 1150 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
84f9f938 1151
040d2baa 1152 if (HAS_L3_DPF(dev))
35a85ac6 1153 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
15b9f80e 1154
7225342a 1155 return init_workarounds_ring(ring);
8187a2b7
ZN
1156}
1157
a4872ba6 1158static void render_ring_cleanup(struct intel_engine_cs *ring)
c6df541c 1159{
b45305fc 1160 struct drm_device *dev = ring->dev;
3e78998a
BW
1161 struct drm_i915_private *dev_priv = dev->dev_private;
1162
1163 if (dev_priv->semaphore_obj) {
1164 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1165 drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
1166 dev_priv->semaphore_obj = NULL;
1167 }
b45305fc 1168
9b1136d5 1169 intel_fini_pipe_control(ring);
c6df541c
CW
1170}
1171
3e78998a
BW
1172static int gen8_rcs_signal(struct intel_engine_cs *signaller,
1173 unsigned int num_dwords)
1174{
1175#define MBOX_UPDATE_DWORDS 8
1176 struct drm_device *dev = signaller->dev;
1177 struct drm_i915_private *dev_priv = dev->dev_private;
1178 struct intel_engine_cs *waiter;
1179 int i, ret, num_rings;
1180
1181 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1182 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1183#undef MBOX_UPDATE_DWORDS
1184
1185 ret = intel_ring_begin(signaller, num_dwords);
1186 if (ret)
1187 return ret;
1188
1189 for_each_ring(waiter, dev_priv, i) {
6259cead 1190 u32 seqno;
3e78998a
BW
1191 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1192 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1193 continue;
1194
6259cead
JH
1195 seqno = i915_gem_request_get_seqno(
1196 signaller->outstanding_lazy_request);
3e78998a
BW
1197 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1198 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1199 PIPE_CONTROL_QW_WRITE |
1200 PIPE_CONTROL_FLUSH_ENABLE);
1201 intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1202 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6259cead 1203 intel_ring_emit(signaller, seqno);
3e78998a
BW
1204 intel_ring_emit(signaller, 0);
1205 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1206 MI_SEMAPHORE_TARGET(waiter->id));
1207 intel_ring_emit(signaller, 0);
1208 }
1209
1210 return 0;
1211}
1212
1213static int gen8_xcs_signal(struct intel_engine_cs *signaller,
1214 unsigned int num_dwords)
1215{
1216#define MBOX_UPDATE_DWORDS 6
1217 struct drm_device *dev = signaller->dev;
1218 struct drm_i915_private *dev_priv = dev->dev_private;
1219 struct intel_engine_cs *waiter;
1220 int i, ret, num_rings;
1221
1222 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1223 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1224#undef MBOX_UPDATE_DWORDS
1225
1226 ret = intel_ring_begin(signaller, num_dwords);
1227 if (ret)
1228 return ret;
1229
1230 for_each_ring(waiter, dev_priv, i) {
6259cead 1231 u32 seqno;
3e78998a
BW
1232 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1233 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1234 continue;
1235
6259cead
JH
1236 seqno = i915_gem_request_get_seqno(
1237 signaller->outstanding_lazy_request);
3e78998a
BW
1238 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1239 MI_FLUSH_DW_OP_STOREDW);
1240 intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1241 MI_FLUSH_DW_USE_GTT);
1242 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6259cead 1243 intel_ring_emit(signaller, seqno);
3e78998a
BW
1244 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1245 MI_SEMAPHORE_TARGET(waiter->id));
1246 intel_ring_emit(signaller, 0);
1247 }
1248
1249 return 0;
1250}
1251
a4872ba6 1252static int gen6_signal(struct intel_engine_cs *signaller,
024a43e1 1253 unsigned int num_dwords)
1ec14ad3 1254{
024a43e1
BW
1255 struct drm_device *dev = signaller->dev;
1256 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 1257 struct intel_engine_cs *useless;
a1444b79 1258 int i, ret, num_rings;
78325f2d 1259
a1444b79
BW
1260#define MBOX_UPDATE_DWORDS 3
1261 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1262 num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1263#undef MBOX_UPDATE_DWORDS
024a43e1
BW
1264
1265 ret = intel_ring_begin(signaller, num_dwords);
1266 if (ret)
1267 return ret;
024a43e1 1268
78325f2d
BW
1269 for_each_ring(useless, dev_priv, i) {
1270 u32 mbox_reg = signaller->semaphore.mbox.signal[i];
1271 if (mbox_reg != GEN6_NOSYNC) {
6259cead
JH
1272 u32 seqno = i915_gem_request_get_seqno(
1273 signaller->outstanding_lazy_request);
78325f2d
BW
1274 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
1275 intel_ring_emit(signaller, mbox_reg);
6259cead 1276 intel_ring_emit(signaller, seqno);
78325f2d
BW
1277 }
1278 }
024a43e1 1279
a1444b79
BW
1280 /* If num_dwords was rounded, make sure the tail pointer is correct */
1281 if (num_rings % 2 == 0)
1282 intel_ring_emit(signaller, MI_NOOP);
1283
024a43e1 1284 return 0;
1ec14ad3
CW
1285}
1286
c8c99b0f
BW
1287/**
1288 * gen6_add_request - Update the semaphore mailbox registers
1289 *
1290 * @ring - ring that is adding a request
1291 * @seqno - return seqno stuck into the ring
1292 *
1293 * Update the mailbox registers in the *other* rings with the current seqno.
1294 * This acts like a signal in the canonical semaphore.
1295 */
1ec14ad3 1296static int
a4872ba6 1297gen6_add_request(struct intel_engine_cs *ring)
1ec14ad3 1298{
024a43e1 1299 int ret;
52ed2325 1300
707d9cf9
BW
1301 if (ring->semaphore.signal)
1302 ret = ring->semaphore.signal(ring, 4);
1303 else
1304 ret = intel_ring_begin(ring, 4);
1305
1ec14ad3
CW
1306 if (ret)
1307 return ret;
1308
1ec14ad3
CW
1309 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1310 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
6259cead
JH
1311 intel_ring_emit(ring,
1312 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
1ec14ad3 1313 intel_ring_emit(ring, MI_USER_INTERRUPT);
09246732 1314 __intel_ring_advance(ring);
1ec14ad3 1315
1ec14ad3
CW
1316 return 0;
1317}
1318
f72b3435
MK
1319static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1320 u32 seqno)
1321{
1322 struct drm_i915_private *dev_priv = dev->dev_private;
1323 return dev_priv->last_seqno < seqno;
1324}
1325
c8c99b0f
BW
1326/**
1327 * intel_ring_sync - sync the waiter to the signaller on seqno
1328 *
1329 * @waiter - ring that is waiting
1330 * @signaller - ring which has, or will signal
1331 * @seqno - seqno which the waiter will block on
1332 */
5ee426ca
BW
1333
1334static int
1335gen8_ring_sync(struct intel_engine_cs *waiter,
1336 struct intel_engine_cs *signaller,
1337 u32 seqno)
1338{
1339 struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1340 int ret;
1341
1342 ret = intel_ring_begin(waiter, 4);
1343 if (ret)
1344 return ret;
1345
1346 intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1347 MI_SEMAPHORE_GLOBAL_GTT |
bae4fcd2 1348 MI_SEMAPHORE_POLL |
5ee426ca
BW
1349 MI_SEMAPHORE_SAD_GTE_SDD);
1350 intel_ring_emit(waiter, seqno);
1351 intel_ring_emit(waiter,
1352 lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1353 intel_ring_emit(waiter,
1354 upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1355 intel_ring_advance(waiter);
1356 return 0;
1357}
1358
c8c99b0f 1359static int
a4872ba6
OM
1360gen6_ring_sync(struct intel_engine_cs *waiter,
1361 struct intel_engine_cs *signaller,
686cb5f9 1362 u32 seqno)
1ec14ad3 1363{
c8c99b0f
BW
1364 u32 dw1 = MI_SEMAPHORE_MBOX |
1365 MI_SEMAPHORE_COMPARE |
1366 MI_SEMAPHORE_REGISTER;
ebc348b2
BW
1367 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1368 int ret;
1ec14ad3 1369
1500f7ea
BW
1370 /* Throughout all of the GEM code, seqno passed implies our current
1371 * seqno is >= the last seqno executed. However for hardware the
1372 * comparison is strictly greater than.
1373 */
1374 seqno -= 1;
1375
ebc348b2 1376 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
686cb5f9 1377
c8c99b0f 1378 ret = intel_ring_begin(waiter, 4);
1ec14ad3
CW
1379 if (ret)
1380 return ret;
1381
f72b3435
MK
1382 /* If seqno wrap happened, omit the wait with no-ops */
1383 if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
ebc348b2 1384 intel_ring_emit(waiter, dw1 | wait_mbox);
f72b3435
MK
1385 intel_ring_emit(waiter, seqno);
1386 intel_ring_emit(waiter, 0);
1387 intel_ring_emit(waiter, MI_NOOP);
1388 } else {
1389 intel_ring_emit(waiter, MI_NOOP);
1390 intel_ring_emit(waiter, MI_NOOP);
1391 intel_ring_emit(waiter, MI_NOOP);
1392 intel_ring_emit(waiter, MI_NOOP);
1393 }
c8c99b0f 1394 intel_ring_advance(waiter);
1ec14ad3
CW
1395
1396 return 0;
1397}
1398
c6df541c
CW
1399#define PIPE_CONTROL_FLUSH(ring__, addr__) \
1400do { \
fcbc34e4
KG
1401 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
1402 PIPE_CONTROL_DEPTH_STALL); \
c6df541c
CW
1403 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
1404 intel_ring_emit(ring__, 0); \
1405 intel_ring_emit(ring__, 0); \
1406} while (0)
1407
1408static int
a4872ba6 1409pc_render_add_request(struct intel_engine_cs *ring)
c6df541c 1410{
18393f63 1411 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
c6df541c
CW
1412 int ret;
1413
1414 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1415 * incoherent with writes to memory, i.e. completely fubar,
1416 * so we need to use PIPE_NOTIFY instead.
1417 *
1418 * However, we also need to workaround the qword write
1419 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1420 * memory before requesting an interrupt.
1421 */
1422 ret = intel_ring_begin(ring, 32);
1423 if (ret)
1424 return ret;
1425
fcbc34e4 1426 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
9d971b37
KG
1427 PIPE_CONTROL_WRITE_FLUSH |
1428 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
0d1aacac 1429 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
6259cead
JH
1430 intel_ring_emit(ring,
1431 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
c6df541c
CW
1432 intel_ring_emit(ring, 0);
1433 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1434 scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
c6df541c 1435 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1436 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1437 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1438 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1439 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1440 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1441 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1442 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1443 PIPE_CONTROL_FLUSH(ring, scratch_addr);
a71d8d94 1444
fcbc34e4 1445 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
9d971b37
KG
1446 PIPE_CONTROL_WRITE_FLUSH |
1447 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
c6df541c 1448 PIPE_CONTROL_NOTIFY);
0d1aacac 1449 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
6259cead
JH
1450 intel_ring_emit(ring,
1451 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
c6df541c 1452 intel_ring_emit(ring, 0);
09246732 1453 __intel_ring_advance(ring);
c6df541c 1454
c6df541c
CW
1455 return 0;
1456}
1457
4cd53c0c 1458static u32
a4872ba6 1459gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
4cd53c0c 1460{
4cd53c0c
DV
1461 /* Workaround to force correct ordering between irq and seqno writes on
1462 * ivb (and maybe also on snb) by reading from a CS register (like
1463 * ACTHD) before reading the status page. */
50877445
CW
1464 if (!lazy_coherency) {
1465 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1466 POSTING_READ(RING_ACTHD(ring->mmio_base));
1467 }
1468
4cd53c0c
DV
1469 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1470}
1471
8187a2b7 1472static u32
a4872ba6 1473ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
8187a2b7 1474{
1ec14ad3
CW
1475 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1476}
1477
b70ec5bf 1478static void
a4872ba6 1479ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
b70ec5bf
MK
1480{
1481 intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1482}
1483
c6df541c 1484static u32
a4872ba6 1485pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
c6df541c 1486{
0d1aacac 1487 return ring->scratch.cpu_page[0];
c6df541c
CW
1488}
1489
b70ec5bf 1490static void
a4872ba6 1491pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
b70ec5bf 1492{
0d1aacac 1493 ring->scratch.cpu_page[0] = seqno;
b70ec5bf
MK
1494}
1495
e48d8634 1496static bool
a4872ba6 1497gen5_ring_get_irq(struct intel_engine_cs *ring)
e48d8634
DV
1498{
1499 struct drm_device *dev = ring->dev;
4640c4ff 1500 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1501 unsigned long flags;
e48d8634 1502
7cd512f1 1503 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
e48d8634
DV
1504 return false;
1505
7338aefa 1506 spin_lock_irqsave(&dev_priv->irq_lock, flags);
43eaea13 1507 if (ring->irq_refcount++ == 0)
480c8033 1508 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
7338aefa 1509 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
e48d8634
DV
1510
1511 return true;
1512}
1513
1514static void
a4872ba6 1515gen5_ring_put_irq(struct intel_engine_cs *ring)
e48d8634
DV
1516{
1517 struct drm_device *dev = ring->dev;
4640c4ff 1518 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1519 unsigned long flags;
e48d8634 1520
7338aefa 1521 spin_lock_irqsave(&dev_priv->irq_lock, flags);
43eaea13 1522 if (--ring->irq_refcount == 0)
480c8033 1523 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
7338aefa 1524 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
e48d8634
DV
1525}
1526
b13c2b96 1527static bool
a4872ba6 1528i9xx_ring_get_irq(struct intel_engine_cs *ring)
62fdfeaf 1529{
78501eac 1530 struct drm_device *dev = ring->dev;
4640c4ff 1531 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1532 unsigned long flags;
62fdfeaf 1533
7cd512f1 1534 if (!intel_irqs_enabled(dev_priv))
b13c2b96
CW
1535 return false;
1536
7338aefa 1537 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1538 if (ring->irq_refcount++ == 0) {
f637fde4
DV
1539 dev_priv->irq_mask &= ~ring->irq_enable_mask;
1540 I915_WRITE(IMR, dev_priv->irq_mask);
1541 POSTING_READ(IMR);
1542 }
7338aefa 1543 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
b13c2b96
CW
1544
1545 return true;
62fdfeaf
EA
1546}
1547
8187a2b7 1548static void
a4872ba6 1549i9xx_ring_put_irq(struct intel_engine_cs *ring)
62fdfeaf 1550{
78501eac 1551 struct drm_device *dev = ring->dev;
4640c4ff 1552 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1553 unsigned long flags;
62fdfeaf 1554
7338aefa 1555 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1556 if (--ring->irq_refcount == 0) {
f637fde4
DV
1557 dev_priv->irq_mask |= ring->irq_enable_mask;
1558 I915_WRITE(IMR, dev_priv->irq_mask);
1559 POSTING_READ(IMR);
1560 }
7338aefa 1561 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
62fdfeaf
EA
1562}
1563
c2798b19 1564static bool
a4872ba6 1565i8xx_ring_get_irq(struct intel_engine_cs *ring)
c2798b19
CW
1566{
1567 struct drm_device *dev = ring->dev;
4640c4ff 1568 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1569 unsigned long flags;
c2798b19 1570
7cd512f1 1571 if (!intel_irqs_enabled(dev_priv))
c2798b19
CW
1572 return false;
1573
7338aefa 1574 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1575 if (ring->irq_refcount++ == 0) {
c2798b19
CW
1576 dev_priv->irq_mask &= ~ring->irq_enable_mask;
1577 I915_WRITE16(IMR, dev_priv->irq_mask);
1578 POSTING_READ16(IMR);
1579 }
7338aefa 1580 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
c2798b19
CW
1581
1582 return true;
1583}
1584
1585static void
a4872ba6 1586i8xx_ring_put_irq(struct intel_engine_cs *ring)
c2798b19
CW
1587{
1588 struct drm_device *dev = ring->dev;
4640c4ff 1589 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1590 unsigned long flags;
c2798b19 1591
7338aefa 1592 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1593 if (--ring->irq_refcount == 0) {
c2798b19
CW
1594 dev_priv->irq_mask |= ring->irq_enable_mask;
1595 I915_WRITE16(IMR, dev_priv->irq_mask);
1596 POSTING_READ16(IMR);
1597 }
7338aefa 1598 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
c2798b19
CW
1599}
1600
b72f3acb 1601static int
a84c3ae1 1602bsd_ring_flush(struct drm_i915_gem_request *req,
78501eac
CW
1603 u32 invalidate_domains,
1604 u32 flush_domains)
d1b851fc 1605{
a84c3ae1 1606 struct intel_engine_cs *ring = req->ring;
b72f3acb
CW
1607 int ret;
1608
b72f3acb
CW
1609 ret = intel_ring_begin(ring, 2);
1610 if (ret)
1611 return ret;
1612
1613 intel_ring_emit(ring, MI_FLUSH);
1614 intel_ring_emit(ring, MI_NOOP);
1615 intel_ring_advance(ring);
1616 return 0;
d1b851fc
ZN
1617}
1618
3cce469c 1619static int
a4872ba6 1620i9xx_add_request(struct intel_engine_cs *ring)
d1b851fc 1621{
3cce469c
CW
1622 int ret;
1623
1624 ret = intel_ring_begin(ring, 4);
1625 if (ret)
1626 return ret;
6f392d54 1627
3cce469c
CW
1628 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1629 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
6259cead
JH
1630 intel_ring_emit(ring,
1631 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
3cce469c 1632 intel_ring_emit(ring, MI_USER_INTERRUPT);
09246732 1633 __intel_ring_advance(ring);
d1b851fc 1634
3cce469c 1635 return 0;
d1b851fc
ZN
1636}
1637
0f46832f 1638static bool
a4872ba6 1639gen6_ring_get_irq(struct intel_engine_cs *ring)
0f46832f
CW
1640{
1641 struct drm_device *dev = ring->dev;
4640c4ff 1642 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1643 unsigned long flags;
0f46832f 1644
7cd512f1
DV
1645 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1646 return false;
0f46832f 1647
7338aefa 1648 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1649 if (ring->irq_refcount++ == 0) {
040d2baa 1650 if (HAS_L3_DPF(dev) && ring->id == RCS)
cc609d5d
BW
1651 I915_WRITE_IMR(ring,
1652 ~(ring->irq_enable_mask |
35a85ac6 1653 GT_PARITY_ERROR(dev)));
15b9f80e
BW
1654 else
1655 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
480c8033 1656 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
0f46832f 1657 }
7338aefa 1658 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
0f46832f
CW
1659
1660 return true;
1661}
1662
1663static void
a4872ba6 1664gen6_ring_put_irq(struct intel_engine_cs *ring)
0f46832f
CW
1665{
1666 struct drm_device *dev = ring->dev;
4640c4ff 1667 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1668 unsigned long flags;
0f46832f 1669
7338aefa 1670 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1671 if (--ring->irq_refcount == 0) {
040d2baa 1672 if (HAS_L3_DPF(dev) && ring->id == RCS)
35a85ac6 1673 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
15b9f80e
BW
1674 else
1675 I915_WRITE_IMR(ring, ~0);
480c8033 1676 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1ec14ad3 1677 }
7338aefa 1678 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
d1b851fc
ZN
1679}
1680
a19d2933 1681static bool
a4872ba6 1682hsw_vebox_get_irq(struct intel_engine_cs *ring)
a19d2933
BW
1683{
1684 struct drm_device *dev = ring->dev;
1685 struct drm_i915_private *dev_priv = dev->dev_private;
1686 unsigned long flags;
1687
7cd512f1 1688 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
a19d2933
BW
1689 return false;
1690
59cdb63d 1691 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1692 if (ring->irq_refcount++ == 0) {
a19d2933 1693 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
480c8033 1694 gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
a19d2933 1695 }
59cdb63d 1696 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
a19d2933
BW
1697
1698 return true;
1699}
1700
1701static void
a4872ba6 1702hsw_vebox_put_irq(struct intel_engine_cs *ring)
a19d2933
BW
1703{
1704 struct drm_device *dev = ring->dev;
1705 struct drm_i915_private *dev_priv = dev->dev_private;
1706 unsigned long flags;
1707
59cdb63d 1708 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1709 if (--ring->irq_refcount == 0) {
a19d2933 1710 I915_WRITE_IMR(ring, ~0);
480c8033 1711 gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
a19d2933 1712 }
59cdb63d 1713 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
a19d2933
BW
1714}
1715
abd58f01 1716static bool
a4872ba6 1717gen8_ring_get_irq(struct intel_engine_cs *ring)
abd58f01
BW
1718{
1719 struct drm_device *dev = ring->dev;
1720 struct drm_i915_private *dev_priv = dev->dev_private;
1721 unsigned long flags;
1722
7cd512f1 1723 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
abd58f01
BW
1724 return false;
1725
1726 spin_lock_irqsave(&dev_priv->irq_lock, flags);
1727 if (ring->irq_refcount++ == 0) {
1728 if (HAS_L3_DPF(dev) && ring->id == RCS) {
1729 I915_WRITE_IMR(ring,
1730 ~(ring->irq_enable_mask |
1731 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1732 } else {
1733 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1734 }
1735 POSTING_READ(RING_IMR(ring->mmio_base));
1736 }
1737 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1738
1739 return true;
1740}
1741
1742static void
a4872ba6 1743gen8_ring_put_irq(struct intel_engine_cs *ring)
abd58f01
BW
1744{
1745 struct drm_device *dev = ring->dev;
1746 struct drm_i915_private *dev_priv = dev->dev_private;
1747 unsigned long flags;
1748
1749 spin_lock_irqsave(&dev_priv->irq_lock, flags);
1750 if (--ring->irq_refcount == 0) {
1751 if (HAS_L3_DPF(dev) && ring->id == RCS) {
1752 I915_WRITE_IMR(ring,
1753 ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1754 } else {
1755 I915_WRITE_IMR(ring, ~0);
1756 }
1757 POSTING_READ(RING_IMR(ring->mmio_base));
1758 }
1759 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1760}
1761
d1b851fc 1762static int
a4872ba6 1763i965_dispatch_execbuffer(struct intel_engine_cs *ring,
9bcb144c 1764 u64 offset, u32 length,
8e004efc 1765 unsigned dispatch_flags)
d1b851fc 1766{
e1f99ce6 1767 int ret;
78501eac 1768
e1f99ce6
CW
1769 ret = intel_ring_begin(ring, 2);
1770 if (ret)
1771 return ret;
1772
78501eac 1773 intel_ring_emit(ring,
65f56876
CW
1774 MI_BATCH_BUFFER_START |
1775 MI_BATCH_GTT |
8e004efc
JH
1776 (dispatch_flags & I915_DISPATCH_SECURE ?
1777 0 : MI_BATCH_NON_SECURE_I965));
c4e7a414 1778 intel_ring_emit(ring, offset);
78501eac
CW
1779 intel_ring_advance(ring);
1780
d1b851fc
ZN
1781 return 0;
1782}
1783
b45305fc
DV
1784/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1785#define I830_BATCH_LIMIT (256*1024)
c4d69da1
CW
1786#define I830_TLB_ENTRIES (2)
1787#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
8187a2b7 1788static int
a4872ba6 1789i830_dispatch_execbuffer(struct intel_engine_cs *ring,
8e004efc
JH
1790 u64 offset, u32 len,
1791 unsigned dispatch_flags)
62fdfeaf 1792{
c4d69da1 1793 u32 cs_offset = ring->scratch.gtt_offset;
c4e7a414 1794 int ret;
62fdfeaf 1795
c4d69da1
CW
1796 ret = intel_ring_begin(ring, 6);
1797 if (ret)
1798 return ret;
62fdfeaf 1799
c4d69da1
CW
1800 /* Evict the invalid PTE TLBs */
1801 intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1802 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1803 intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1804 intel_ring_emit(ring, cs_offset);
1805 intel_ring_emit(ring, 0xdeadbeef);
1806 intel_ring_emit(ring, MI_NOOP);
1807 intel_ring_advance(ring);
b45305fc 1808
8e004efc 1809 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
b45305fc
DV
1810 if (len > I830_BATCH_LIMIT)
1811 return -ENOSPC;
1812
c4d69da1 1813 ret = intel_ring_begin(ring, 6 + 2);
b45305fc
DV
1814 if (ret)
1815 return ret;
c4d69da1
CW
1816
1817 /* Blit the batch (which has now all relocs applied) to the
1818 * stable batch scratch bo area (so that the CS never
1819 * stumbles over its tlb invalidation bug) ...
1820 */
1821 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1822 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
611a7a4f 1823 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
b45305fc 1824 intel_ring_emit(ring, cs_offset);
b45305fc
DV
1825 intel_ring_emit(ring, 4096);
1826 intel_ring_emit(ring, offset);
c4d69da1 1827
b45305fc 1828 intel_ring_emit(ring, MI_FLUSH);
c4d69da1
CW
1829 intel_ring_emit(ring, MI_NOOP);
1830 intel_ring_advance(ring);
b45305fc
DV
1831
1832 /* ... and execute it. */
c4d69da1 1833 offset = cs_offset;
b45305fc 1834 }
e1f99ce6 1835
c4d69da1
CW
1836 ret = intel_ring_begin(ring, 4);
1837 if (ret)
1838 return ret;
1839
1840 intel_ring_emit(ring, MI_BATCH_BUFFER);
8e004efc
JH
1841 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1842 0 : MI_BATCH_NON_SECURE));
c4d69da1
CW
1843 intel_ring_emit(ring, offset + len - 8);
1844 intel_ring_emit(ring, MI_NOOP);
1845 intel_ring_advance(ring);
1846
fb3256da
DV
1847 return 0;
1848}
1849
1850static int
a4872ba6 1851i915_dispatch_execbuffer(struct intel_engine_cs *ring,
9bcb144c 1852 u64 offset, u32 len,
8e004efc 1853 unsigned dispatch_flags)
fb3256da
DV
1854{
1855 int ret;
1856
1857 ret = intel_ring_begin(ring, 2);
1858 if (ret)
1859 return ret;
1860
65f56876 1861 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
8e004efc
JH
1862 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1863 0 : MI_BATCH_NON_SECURE));
c4e7a414 1864 intel_ring_advance(ring);
62fdfeaf 1865
62fdfeaf
EA
1866 return 0;
1867}
1868
a4872ba6 1869static void cleanup_status_page(struct intel_engine_cs *ring)
62fdfeaf 1870{
05394f39 1871 struct drm_i915_gem_object *obj;
62fdfeaf 1872
8187a2b7
ZN
1873 obj = ring->status_page.obj;
1874 if (obj == NULL)
62fdfeaf 1875 return;
62fdfeaf 1876
9da3da66 1877 kunmap(sg_page(obj->pages->sgl));
d7f46fc4 1878 i915_gem_object_ggtt_unpin(obj);
05394f39 1879 drm_gem_object_unreference(&obj->base);
8187a2b7 1880 ring->status_page.obj = NULL;
62fdfeaf
EA
1881}
1882
a4872ba6 1883static int init_status_page(struct intel_engine_cs *ring)
62fdfeaf 1884{
05394f39 1885 struct drm_i915_gem_object *obj;
62fdfeaf 1886
e3efda49 1887 if ((obj = ring->status_page.obj) == NULL) {
1f767e02 1888 unsigned flags;
e3efda49 1889 int ret;
e4ffd173 1890
e3efda49
CW
1891 obj = i915_gem_alloc_object(ring->dev, 4096);
1892 if (obj == NULL) {
1893 DRM_ERROR("Failed to allocate status page\n");
1894 return -ENOMEM;
1895 }
62fdfeaf 1896
e3efda49
CW
1897 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1898 if (ret)
1899 goto err_unref;
1900
1f767e02
CW
1901 flags = 0;
1902 if (!HAS_LLC(ring->dev))
1903 /* On g33, we cannot place HWS above 256MiB, so
1904 * restrict its pinning to the low mappable arena.
1905 * Though this restriction is not documented for
1906 * gen4, gen5, or byt, they also behave similarly
1907 * and hang if the HWS is placed at the top of the
1908 * GTT. To generalise, it appears that all !llc
1909 * platforms have issues with us placing the HWS
1910 * above the mappable region (even though we never
1911 * actualy map it).
1912 */
1913 flags |= PIN_MAPPABLE;
1914 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
e3efda49
CW
1915 if (ret) {
1916err_unref:
1917 drm_gem_object_unreference(&obj->base);
1918 return ret;
1919 }
1920
1921 ring->status_page.obj = obj;
1922 }
62fdfeaf 1923
f343c5f6 1924 ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
9da3da66 1925 ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
8187a2b7 1926 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
62fdfeaf 1927
8187a2b7
ZN
1928 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1929 ring->name, ring->status_page.gfx_addr);
62fdfeaf
EA
1930
1931 return 0;
62fdfeaf
EA
1932}
1933
a4872ba6 1934static int init_phys_status_page(struct intel_engine_cs *ring)
6b8294a4
CW
1935{
1936 struct drm_i915_private *dev_priv = ring->dev->dev_private;
6b8294a4
CW
1937
1938 if (!dev_priv->status_page_dmah) {
1939 dev_priv->status_page_dmah =
1940 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1941 if (!dev_priv->status_page_dmah)
1942 return -ENOMEM;
1943 }
1944
6b8294a4
CW
1945 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1946 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1947
1948 return 0;
1949}
1950
7ba717cf 1951void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2919d291 1952{
2919d291 1953 iounmap(ringbuf->virtual_start);
7ba717cf 1954 ringbuf->virtual_start = NULL;
2919d291 1955 i915_gem_object_ggtt_unpin(ringbuf->obj);
7ba717cf
TD
1956}
1957
1958int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
1959 struct intel_ringbuffer *ringbuf)
1960{
1961 struct drm_i915_private *dev_priv = to_i915(dev);
1962 struct drm_i915_gem_object *obj = ringbuf->obj;
1963 int ret;
1964
1965 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
1966 if (ret)
1967 return ret;
1968
1969 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1970 if (ret) {
1971 i915_gem_object_ggtt_unpin(obj);
1972 return ret;
1973 }
1974
1975 ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
1976 i915_gem_obj_ggtt_offset(obj), ringbuf->size);
1977 if (ringbuf->virtual_start == NULL) {
1978 i915_gem_object_ggtt_unpin(obj);
1979 return -EINVAL;
1980 }
1981
1982 return 0;
1983}
1984
1985void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
1986{
2919d291
OM
1987 drm_gem_object_unreference(&ringbuf->obj->base);
1988 ringbuf->obj = NULL;
1989}
1990
84c2377f
OM
1991int intel_alloc_ringbuffer_obj(struct drm_device *dev,
1992 struct intel_ringbuffer *ringbuf)
62fdfeaf 1993{
05394f39 1994 struct drm_i915_gem_object *obj;
62fdfeaf 1995
ebc052e0
CW
1996 obj = NULL;
1997 if (!HAS_LLC(dev))
93b0a4e0 1998 obj = i915_gem_object_create_stolen(dev, ringbuf->size);
ebc052e0 1999 if (obj == NULL)
93b0a4e0 2000 obj = i915_gem_alloc_object(dev, ringbuf->size);
e3efda49
CW
2001 if (obj == NULL)
2002 return -ENOMEM;
8187a2b7 2003
24f3a8cf
AG
2004 /* mark ring buffers as read-only from GPU side by default */
2005 obj->gt_ro = 1;
2006
93b0a4e0 2007 ringbuf->obj = obj;
e3efda49 2008
7ba717cf 2009 return 0;
e3efda49
CW
2010}
2011
2012static int intel_init_ring_buffer(struct drm_device *dev,
a4872ba6 2013 struct intel_engine_cs *ring)
e3efda49 2014{
bfc882b4 2015 struct intel_ringbuffer *ringbuf;
e3efda49
CW
2016 int ret;
2017
bfc882b4
DV
2018 WARN_ON(ring->buffer);
2019
2020 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
2021 if (!ringbuf)
2022 return -ENOMEM;
2023 ring->buffer = ringbuf;
8ee14975 2024
e3efda49
CW
2025 ring->dev = dev;
2026 INIT_LIST_HEAD(&ring->active_list);
2027 INIT_LIST_HEAD(&ring->request_list);
cc9130be 2028 INIT_LIST_HEAD(&ring->execlist_queue);
06fbca71 2029 i915_gem_batch_pool_init(dev, &ring->batch_pool);
93b0a4e0 2030 ringbuf->size = 32 * PAGE_SIZE;
0c7dd53b 2031 ringbuf->ring = ring;
ebc348b2 2032 memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
e3efda49
CW
2033
2034 init_waitqueue_head(&ring->irq_queue);
2035
2036 if (I915_NEED_GFX_HWS(dev)) {
2037 ret = init_status_page(ring);
2038 if (ret)
8ee14975 2039 goto error;
e3efda49
CW
2040 } else {
2041 BUG_ON(ring->id != RCS);
2042 ret = init_phys_status_page(ring);
2043 if (ret)
8ee14975 2044 goto error;
e3efda49
CW
2045 }
2046
bfc882b4 2047 WARN_ON(ringbuf->obj);
7ba717cf 2048
bfc882b4
DV
2049 ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
2050 if (ret) {
2051 DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
2052 ring->name, ret);
2053 goto error;
2054 }
2055
2056 ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2057 if (ret) {
2058 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2059 ring->name, ret);
2060 intel_destroy_ringbuffer_obj(ringbuf);
2061 goto error;
e3efda49 2062 }
62fdfeaf 2063
55249baa
CW
2064 /* Workaround an erratum on the i830 which causes a hang if
2065 * the TAIL pointer points to within the last 2 cachelines
2066 * of the buffer.
2067 */
93b0a4e0 2068 ringbuf->effective_size = ringbuf->size;
e3efda49 2069 if (IS_I830(dev) || IS_845G(dev))
93b0a4e0 2070 ringbuf->effective_size -= 2 * CACHELINE_BYTES;
55249baa 2071
44e895a8
BV
2072 ret = i915_cmd_parser_init_ring(ring);
2073 if (ret)
8ee14975
OM
2074 goto error;
2075
8ee14975 2076 return 0;
351e3db2 2077
8ee14975
OM
2078error:
2079 kfree(ringbuf);
2080 ring->buffer = NULL;
2081 return ret;
62fdfeaf
EA
2082}
2083
a4872ba6 2084void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
62fdfeaf 2085{
6402c330
JH
2086 struct drm_i915_private *dev_priv;
2087 struct intel_ringbuffer *ringbuf;
33626e6a 2088
93b0a4e0 2089 if (!intel_ring_initialized(ring))
62fdfeaf
EA
2090 return;
2091
6402c330
JH
2092 dev_priv = to_i915(ring->dev);
2093 ringbuf = ring->buffer;
2094
e3efda49 2095 intel_stop_ring_buffer(ring);
de8f0a50 2096 WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
33626e6a 2097
7ba717cf 2098 intel_unpin_ringbuffer_obj(ringbuf);
2919d291 2099 intel_destroy_ringbuffer_obj(ringbuf);
6259cead 2100 i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
78501eac 2101
8d19215b
ZN
2102 if (ring->cleanup)
2103 ring->cleanup(ring);
2104
78501eac 2105 cleanup_status_page(ring);
44e895a8
BV
2106
2107 i915_cmd_parser_fini_ring(ring);
06fbca71 2108 i915_gem_batch_pool_fini(&ring->batch_pool);
8ee14975 2109
93b0a4e0 2110 kfree(ringbuf);
8ee14975 2111 ring->buffer = NULL;
62fdfeaf
EA
2112}
2113
595e1eeb 2114static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
a71d8d94 2115{
93b0a4e0 2116 struct intel_ringbuffer *ringbuf = ring->buffer;
a71d8d94 2117 struct drm_i915_gem_request *request;
b4716185
CW
2118 unsigned space;
2119 int ret;
a71d8d94 2120
29b1b415
JH
2121 /* The whole point of reserving space is to not wait! */
2122 WARN_ON(ringbuf->reserved_in_use);
2123
ebd0fd4b
DG
2124 if (intel_ring_space(ringbuf) >= n)
2125 return 0;
a71d8d94
CW
2126
2127 list_for_each_entry(request, &ring->request_list, list) {
b4716185
CW
2128 space = __intel_ring_space(request->postfix, ringbuf->tail,
2129 ringbuf->size);
2130 if (space >= n)
a71d8d94 2131 break;
a71d8d94
CW
2132 }
2133
595e1eeb 2134 if (WARN_ON(&request->list == &ring->request_list))
a71d8d94
CW
2135 return -ENOSPC;
2136
a4b3a571 2137 ret = i915_wait_request(request);
a71d8d94
CW
2138 if (ret)
2139 return ret;
2140
b4716185 2141 ringbuf->space = space;
a71d8d94
CW
2142 return 0;
2143}
2144
a4872ba6 2145static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
3e960501
CW
2146{
2147 uint32_t __iomem *virt;
93b0a4e0
OM
2148 struct intel_ringbuffer *ringbuf = ring->buffer;
2149 int rem = ringbuf->size - ringbuf->tail;
3e960501 2150
29b1b415
JH
2151 /* Can't wrap if space has already been reserved! */
2152 WARN_ON(ringbuf->reserved_in_use);
2153
93b0a4e0 2154 if (ringbuf->space < rem) {
3e960501
CW
2155 int ret = ring_wait_for_space(ring, rem);
2156 if (ret)
2157 return ret;
2158 }
2159
93b0a4e0 2160 virt = ringbuf->virtual_start + ringbuf->tail;
3e960501
CW
2161 rem /= 4;
2162 while (rem--)
2163 iowrite32(MI_NOOP, virt++);
2164
93b0a4e0 2165 ringbuf->tail = 0;
ebd0fd4b 2166 intel_ring_update_space(ringbuf);
3e960501
CW
2167
2168 return 0;
2169}
2170
a4872ba6 2171int intel_ring_idle(struct intel_engine_cs *ring)
3e960501 2172{
a4b3a571 2173 struct drm_i915_gem_request *req;
3e960501
CW
2174
2175 /* We need to add any requests required to flush the objects and ring */
75289874 2176 WARN_ON(ring->outstanding_lazy_request);
bf7dc5b7 2177 if (ring->outstanding_lazy_request)
75289874 2178 i915_add_request(ring->outstanding_lazy_request);
3e960501
CW
2179
2180 /* Wait upon the last request to be completed */
2181 if (list_empty(&ring->request_list))
2182 return 0;
2183
a4b3a571 2184 req = list_entry(ring->request_list.prev,
b4716185
CW
2185 struct drm_i915_gem_request,
2186 list);
2187
2188 /* Make sure we do not trigger any retires */
2189 return __i915_wait_request(req,
2190 atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
2191 to_i915(ring->dev)->mm.interruptible,
2192 NULL, NULL);
3e960501
CW
2193}
2194
6689cb2b 2195int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
9d773091 2196{
6689cb2b 2197 request->ringbuf = request->ring->buffer;
9eba5d4a 2198 return 0;
9d773091
CW
2199}
2200
29b1b415
JH
2201void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
2202{
2203 /* NB: Until request management is fully tidied up and the OLR is
2204 * removed, there are too many ways for get false hits on this
2205 * anti-recursion check! */
2206 /*WARN_ON(ringbuf->reserved_size);*/
2207 WARN_ON(ringbuf->reserved_in_use);
2208
2209 ringbuf->reserved_size = size;
2210
2211 /*
2212 * Really need to call _begin() here but that currently leads to
2213 * recursion problems! This will be fixed later but for now just
2214 * return and hope for the best. Note that there is only a real
2215 * problem if the create of the request never actually calls _begin()
2216 * but if they are not submitting any work then why did they create
2217 * the request in the first place?
2218 */
2219}
2220
2221void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
2222{
2223 WARN_ON(ringbuf->reserved_in_use);
2224
2225 ringbuf->reserved_size = 0;
2226 ringbuf->reserved_in_use = false;
2227}
2228
2229void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2230{
2231 WARN_ON(ringbuf->reserved_in_use);
2232
2233 ringbuf->reserved_in_use = true;
2234 ringbuf->reserved_tail = ringbuf->tail;
2235}
2236
2237void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
2238{
2239 WARN_ON(!ringbuf->reserved_in_use);
2240 WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
2241 "request reserved size too small: %d vs %d!\n",
2242 ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
2243
2244 ringbuf->reserved_size = 0;
2245 ringbuf->reserved_in_use = false;
2246}
2247
2248static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
cbcc80df 2249{
93b0a4e0 2250 struct intel_ringbuffer *ringbuf = ring->buffer;
cbcc80df
MK
2251 int ret;
2252
29b1b415
JH
2253 /*
2254 * Add on the reserved size to the request to make sure that after
2255 * the intended commands have been emitted, there is guaranteed to
2256 * still be enough free space to send them to the hardware.
2257 */
2258 if (!ringbuf->reserved_in_use)
2259 bytes += ringbuf->reserved_size;
2260
93b0a4e0 2261 if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
cbcc80df
MK
2262 ret = intel_wrap_ring_buffer(ring);
2263 if (unlikely(ret))
2264 return ret;
29b1b415
JH
2265
2266 if(ringbuf->reserved_size) {
2267 uint32_t size = ringbuf->reserved_size;
2268
2269 intel_ring_reserved_space_cancel(ringbuf);
2270 intel_ring_reserved_space_reserve(ringbuf, size);
2271 }
cbcc80df
MK
2272 }
2273
93b0a4e0 2274 if (unlikely(ringbuf->space < bytes)) {
cbcc80df
MK
2275 ret = ring_wait_for_space(ring, bytes);
2276 if (unlikely(ret))
2277 return ret;
2278 }
2279
cbcc80df
MK
2280 return 0;
2281}
2282
a4872ba6 2283int intel_ring_begin(struct intel_engine_cs *ring,
e1f99ce6 2284 int num_dwords)
8187a2b7 2285{
217e46b5 2286 struct drm_i915_gem_request *req;
4640c4ff 2287 struct drm_i915_private *dev_priv = ring->dev->dev_private;
e1f99ce6 2288 int ret;
78501eac 2289
33196ded
DV
2290 ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2291 dev_priv->mm.interruptible);
de2b9985
DV
2292 if (ret)
2293 return ret;
21dd3734 2294
304d695c
CW
2295 ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
2296 if (ret)
2297 return ret;
2298
9d773091 2299 /* Preallocate the olr before touching the ring */
217e46b5 2300 ret = i915_gem_request_alloc(ring, ring->default_context, &req);
9d773091
CW
2301 if (ret)
2302 return ret;
2303
ee1b1e5e 2304 ring->buffer->space -= num_dwords * sizeof(uint32_t);
304d695c 2305 return 0;
8187a2b7 2306}
78501eac 2307
753b1ad4 2308/* Align the ring tail to a cacheline boundary */
a4872ba6 2309int intel_ring_cacheline_align(struct intel_engine_cs *ring)
753b1ad4 2310{
ee1b1e5e 2311 int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
753b1ad4
VS
2312 int ret;
2313
2314 if (num_dwords == 0)
2315 return 0;
2316
18393f63 2317 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
753b1ad4
VS
2318 ret = intel_ring_begin(ring, num_dwords);
2319 if (ret)
2320 return ret;
2321
2322 while (num_dwords--)
2323 intel_ring_emit(ring, MI_NOOP);
2324
2325 intel_ring_advance(ring);
2326
2327 return 0;
2328}
2329
a4872ba6 2330void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
498d2ac1 2331{
3b2cc8ab
OM
2332 struct drm_device *dev = ring->dev;
2333 struct drm_i915_private *dev_priv = dev->dev_private;
498d2ac1 2334
6259cead 2335 BUG_ON(ring->outstanding_lazy_request);
498d2ac1 2336
3b2cc8ab 2337 if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
f7e98ad4
MK
2338 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2339 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
3b2cc8ab 2340 if (HAS_VEBOX(dev))
5020150b 2341 I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
e1f99ce6 2342 }
d97ed339 2343
f7e98ad4 2344 ring->set_seqno(ring, seqno);
92cab734 2345 ring->hangcheck.seqno = seqno;
8187a2b7 2346}
62fdfeaf 2347
a4872ba6 2348static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
297b0c5b 2349 u32 value)
881f47b6 2350{
4640c4ff 2351 struct drm_i915_private *dev_priv = ring->dev->dev_private;
881f47b6
XH
2352
2353 /* Every tail move must follow the sequence below */
12f55818
CW
2354
2355 /* Disable notification that the ring is IDLE. The GT
2356 * will then assume that it is busy and bring it out of rc6.
2357 */
0206e353 2358 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
12f55818
CW
2359 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2360
2361 /* Clear the context id. Here be magic! */
2362 I915_WRITE64(GEN6_BSD_RNCID, 0x0);
0206e353 2363
12f55818 2364 /* Wait for the ring not to be idle, i.e. for it to wake up. */
0206e353 2365 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
12f55818
CW
2366 GEN6_BSD_SLEEP_INDICATOR) == 0,
2367 50))
2368 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
0206e353 2369
12f55818 2370 /* Now that the ring is fully powered up, update the tail */
0206e353 2371 I915_WRITE_TAIL(ring, value);
12f55818
CW
2372 POSTING_READ(RING_TAIL(ring->mmio_base));
2373
2374 /* Let the ring send IDLE messages to the GT again,
2375 * and so let it sleep to conserve power when idle.
2376 */
0206e353 2377 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
12f55818 2378 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
881f47b6
XH
2379}
2380
a84c3ae1 2381static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
ea251324 2382 u32 invalidate, u32 flush)
881f47b6 2383{
a84c3ae1 2384 struct intel_engine_cs *ring = req->ring;
71a77e07 2385 uint32_t cmd;
b72f3acb
CW
2386 int ret;
2387
b72f3acb
CW
2388 ret = intel_ring_begin(ring, 4);
2389 if (ret)
2390 return ret;
2391
71a77e07 2392 cmd = MI_FLUSH_DW;
075b3bba
BW
2393 if (INTEL_INFO(ring->dev)->gen >= 8)
2394 cmd += 1;
f0a1fb10
CW
2395
2396 /* We always require a command barrier so that subsequent
2397 * commands, such as breadcrumb interrupts, are strictly ordered
2398 * wrt the contents of the write cache being flushed to memory
2399 * (and thus being coherent from the CPU).
2400 */
2401 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2402
9a289771
JB
2403 /*
2404 * Bspec vol 1c.5 - video engine command streamer:
2405 * "If ENABLED, all TLBs will be invalidated once the flush
2406 * operation is complete. This bit is only valid when the
2407 * Post-Sync Operation field is a value of 1h or 3h."
2408 */
71a77e07 2409 if (invalidate & I915_GEM_GPU_DOMAINS)
f0a1fb10
CW
2410 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2411
71a77e07 2412 intel_ring_emit(ring, cmd);
9a289771 2413 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
075b3bba
BW
2414 if (INTEL_INFO(ring->dev)->gen >= 8) {
2415 intel_ring_emit(ring, 0); /* upper addr */
2416 intel_ring_emit(ring, 0); /* value */
2417 } else {
2418 intel_ring_emit(ring, 0);
2419 intel_ring_emit(ring, MI_NOOP);
2420 }
b72f3acb
CW
2421 intel_ring_advance(ring);
2422 return 0;
881f47b6
XH
2423}
2424
1c7a0623 2425static int
a4872ba6 2426gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
9bcb144c 2427 u64 offset, u32 len,
8e004efc 2428 unsigned dispatch_flags)
1c7a0623 2429{
8e004efc
JH
2430 bool ppgtt = USES_PPGTT(ring->dev) &&
2431 !(dispatch_flags & I915_DISPATCH_SECURE);
1c7a0623
BW
2432 int ret;
2433
2434 ret = intel_ring_begin(ring, 4);
2435 if (ret)
2436 return ret;
2437
2438 /* FIXME(BDW): Address space and security selectors. */
28cf5415 2439 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
9bcb144c
BW
2440 intel_ring_emit(ring, lower_32_bits(offset));
2441 intel_ring_emit(ring, upper_32_bits(offset));
1c7a0623
BW
2442 intel_ring_emit(ring, MI_NOOP);
2443 intel_ring_advance(ring);
2444
2445 return 0;
2446}
2447
d7d4eedd 2448static int
a4872ba6 2449hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
8e004efc
JH
2450 u64 offset, u32 len,
2451 unsigned dispatch_flags)
d7d4eedd
CW
2452{
2453 int ret;
2454
2455 ret = intel_ring_begin(ring, 2);
2456 if (ret)
2457 return ret;
2458
2459 intel_ring_emit(ring,
77072258 2460 MI_BATCH_BUFFER_START |
8e004efc 2461 (dispatch_flags & I915_DISPATCH_SECURE ?
77072258 2462 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
d7d4eedd
CW
2463 /* bit0-7 is the length on GEN6+ */
2464 intel_ring_emit(ring, offset);
2465 intel_ring_advance(ring);
2466
2467 return 0;
2468}
2469
881f47b6 2470static int
a4872ba6 2471gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
9bcb144c 2472 u64 offset, u32 len,
8e004efc 2473 unsigned dispatch_flags)
881f47b6 2474{
0206e353 2475 int ret;
ab6f8e32 2476
0206e353
AJ
2477 ret = intel_ring_begin(ring, 2);
2478 if (ret)
2479 return ret;
e1f99ce6 2480
d7d4eedd
CW
2481 intel_ring_emit(ring,
2482 MI_BATCH_BUFFER_START |
8e004efc
JH
2483 (dispatch_flags & I915_DISPATCH_SECURE ?
2484 0 : MI_BATCH_NON_SECURE_I965));
0206e353
AJ
2485 /* bit0-7 is the length on GEN6+ */
2486 intel_ring_emit(ring, offset);
2487 intel_ring_advance(ring);
ab6f8e32 2488
0206e353 2489 return 0;
881f47b6
XH
2490}
2491
549f7365
CW
2492/* Blitter support (SandyBridge+) */
2493
a84c3ae1 2494static int gen6_ring_flush(struct drm_i915_gem_request *req,
ea251324 2495 u32 invalidate, u32 flush)
8d19215b 2496{
a84c3ae1 2497 struct intel_engine_cs *ring = req->ring;
fd3da6c9 2498 struct drm_device *dev = ring->dev;
71a77e07 2499 uint32_t cmd;
b72f3acb
CW
2500 int ret;
2501
6a233c78 2502 ret = intel_ring_begin(ring, 4);
b72f3acb
CW
2503 if (ret)
2504 return ret;
2505
71a77e07 2506 cmd = MI_FLUSH_DW;
dbef0f15 2507 if (INTEL_INFO(dev)->gen >= 8)
075b3bba 2508 cmd += 1;
f0a1fb10
CW
2509
2510 /* We always require a command barrier so that subsequent
2511 * commands, such as breadcrumb interrupts, are strictly ordered
2512 * wrt the contents of the write cache being flushed to memory
2513 * (and thus being coherent from the CPU).
2514 */
2515 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2516
9a289771
JB
2517 /*
2518 * Bspec vol 1c.3 - blitter engine command streamer:
2519 * "If ENABLED, all TLBs will be invalidated once the flush
2520 * operation is complete. This bit is only valid when the
2521 * Post-Sync Operation field is a value of 1h or 3h."
2522 */
71a77e07 2523 if (invalidate & I915_GEM_DOMAIN_RENDER)
f0a1fb10 2524 cmd |= MI_INVALIDATE_TLB;
71a77e07 2525 intel_ring_emit(ring, cmd);
9a289771 2526 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
dbef0f15 2527 if (INTEL_INFO(dev)->gen >= 8) {
075b3bba
BW
2528 intel_ring_emit(ring, 0); /* upper addr */
2529 intel_ring_emit(ring, 0); /* value */
2530 } else {
2531 intel_ring_emit(ring, 0);
2532 intel_ring_emit(ring, MI_NOOP);
2533 }
b72f3acb 2534 intel_ring_advance(ring);
fd3da6c9 2535
b72f3acb 2536 return 0;
8d19215b
ZN
2537}
2538
5c1143bb
XH
2539int intel_init_render_ring_buffer(struct drm_device *dev)
2540{
4640c4ff 2541 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2542 struct intel_engine_cs *ring = &dev_priv->ring[RCS];
3e78998a
BW
2543 struct drm_i915_gem_object *obj;
2544 int ret;
5c1143bb 2545
59465b5f
DV
2546 ring->name = "render ring";
2547 ring->id = RCS;
2548 ring->mmio_base = RENDER_RING_BASE;
2549
707d9cf9 2550 if (INTEL_INFO(dev)->gen >= 8) {
3e78998a
BW
2551 if (i915_semaphore_is_enabled(dev)) {
2552 obj = i915_gem_alloc_object(dev, 4096);
2553 if (obj == NULL) {
2554 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2555 i915.semaphores = 0;
2556 } else {
2557 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2558 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2559 if (ret != 0) {
2560 drm_gem_object_unreference(&obj->base);
2561 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2562 i915.semaphores = 0;
2563 } else
2564 dev_priv->semaphore_obj = obj;
2565 }
2566 }
7225342a 2567
8f0e2b9d 2568 ring->init_context = intel_rcs_ctx_init;
707d9cf9
BW
2569 ring->add_request = gen6_add_request;
2570 ring->flush = gen8_render_ring_flush;
2571 ring->irq_get = gen8_ring_get_irq;
2572 ring->irq_put = gen8_ring_put_irq;
2573 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2574 ring->get_seqno = gen6_ring_get_seqno;
2575 ring->set_seqno = ring_set_seqno;
2576 if (i915_semaphore_is_enabled(dev)) {
3e78998a 2577 WARN_ON(!dev_priv->semaphore_obj);
5ee426ca 2578 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2579 ring->semaphore.signal = gen8_rcs_signal;
2580 GEN8_RING_SEMAPHORE_INIT;
707d9cf9
BW
2581 }
2582 } else if (INTEL_INFO(dev)->gen >= 6) {
1ec14ad3 2583 ring->add_request = gen6_add_request;
4772eaeb 2584 ring->flush = gen7_render_ring_flush;
6c6cf5aa 2585 if (INTEL_INFO(dev)->gen == 6)
b3111509 2586 ring->flush = gen6_render_ring_flush;
707d9cf9
BW
2587 ring->irq_get = gen6_ring_get_irq;
2588 ring->irq_put = gen6_ring_put_irq;
cc609d5d 2589 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
4cd53c0c 2590 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2591 ring->set_seqno = ring_set_seqno;
707d9cf9
BW
2592 if (i915_semaphore_is_enabled(dev)) {
2593 ring->semaphore.sync_to = gen6_ring_sync;
2594 ring->semaphore.signal = gen6_signal;
2595 /*
2596 * The current semaphore is only applied on pre-gen8
2597 * platform. And there is no VCS2 ring on the pre-gen8
2598 * platform. So the semaphore between RCS and VCS2 is
2599 * initialized as INVALID. Gen8 will initialize the
2600 * sema between VCS2 and RCS later.
2601 */
2602 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2603 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2604 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2605 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2606 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2607 ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2608 ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2609 ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2610 ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2611 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2612 }
c6df541c
CW
2613 } else if (IS_GEN5(dev)) {
2614 ring->add_request = pc_render_add_request;
46f0f8d1 2615 ring->flush = gen4_render_ring_flush;
c6df541c 2616 ring->get_seqno = pc_render_get_seqno;
b70ec5bf 2617 ring->set_seqno = pc_render_set_seqno;
e48d8634
DV
2618 ring->irq_get = gen5_ring_get_irq;
2619 ring->irq_put = gen5_ring_put_irq;
cc609d5d
BW
2620 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2621 GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
59465b5f 2622 } else {
8620a3a9 2623 ring->add_request = i9xx_add_request;
46f0f8d1
CW
2624 if (INTEL_INFO(dev)->gen < 4)
2625 ring->flush = gen2_render_ring_flush;
2626 else
2627 ring->flush = gen4_render_ring_flush;
59465b5f 2628 ring->get_seqno = ring_get_seqno;
b70ec5bf 2629 ring->set_seqno = ring_set_seqno;
c2798b19
CW
2630 if (IS_GEN2(dev)) {
2631 ring->irq_get = i8xx_ring_get_irq;
2632 ring->irq_put = i8xx_ring_put_irq;
2633 } else {
2634 ring->irq_get = i9xx_ring_get_irq;
2635 ring->irq_put = i9xx_ring_put_irq;
2636 }
e3670319 2637 ring->irq_enable_mask = I915_USER_INTERRUPT;
1ec14ad3 2638 }
59465b5f 2639 ring->write_tail = ring_write_tail;
707d9cf9 2640
d7d4eedd
CW
2641 if (IS_HASWELL(dev))
2642 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1c7a0623
BW
2643 else if (IS_GEN8(dev))
2644 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
d7d4eedd 2645 else if (INTEL_INFO(dev)->gen >= 6)
fb3256da
DV
2646 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2647 else if (INTEL_INFO(dev)->gen >= 4)
2648 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2649 else if (IS_I830(dev) || IS_845G(dev))
2650 ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2651 else
2652 ring->dispatch_execbuffer = i915_dispatch_execbuffer;
ecfe00d8 2653 ring->init_hw = init_render_ring;
59465b5f
DV
2654 ring->cleanup = render_ring_cleanup;
2655
b45305fc
DV
2656 /* Workaround batchbuffer to combat CS tlb bug. */
2657 if (HAS_BROKEN_CS_TLB(dev)) {
c4d69da1 2658 obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
b45305fc
DV
2659 if (obj == NULL) {
2660 DRM_ERROR("Failed to allocate batch bo\n");
2661 return -ENOMEM;
2662 }
2663
be1fa129 2664 ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
b45305fc
DV
2665 if (ret != 0) {
2666 drm_gem_object_unreference(&obj->base);
2667 DRM_ERROR("Failed to ping batch bo\n");
2668 return ret;
2669 }
2670
0d1aacac
CW
2671 ring->scratch.obj = obj;
2672 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
b45305fc
DV
2673 }
2674
99be1dfe
DV
2675 ret = intel_init_ring_buffer(dev, ring);
2676 if (ret)
2677 return ret;
2678
2679 if (INTEL_INFO(dev)->gen >= 5) {
2680 ret = intel_init_pipe_control(ring);
2681 if (ret)
2682 return ret;
2683 }
2684
2685 return 0;
5c1143bb
XH
2686}
2687
2688int intel_init_bsd_ring_buffer(struct drm_device *dev)
2689{
4640c4ff 2690 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2691 struct intel_engine_cs *ring = &dev_priv->ring[VCS];
5c1143bb 2692
58fa3835
DV
2693 ring->name = "bsd ring";
2694 ring->id = VCS;
2695
0fd2c201 2696 ring->write_tail = ring_write_tail;
780f18c8 2697 if (INTEL_INFO(dev)->gen >= 6) {
58fa3835 2698 ring->mmio_base = GEN6_BSD_RING_BASE;
0fd2c201
DV
2699 /* gen6 bsd needs a special wa for tail updates */
2700 if (IS_GEN6(dev))
2701 ring->write_tail = gen6_bsd_ring_write_tail;
ea251324 2702 ring->flush = gen6_bsd_ring_flush;
58fa3835
DV
2703 ring->add_request = gen6_add_request;
2704 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2705 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2706 if (INTEL_INFO(dev)->gen >= 8) {
2707 ring->irq_enable_mask =
2708 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2709 ring->irq_get = gen8_ring_get_irq;
2710 ring->irq_put = gen8_ring_put_irq;
1c7a0623
BW
2711 ring->dispatch_execbuffer =
2712 gen8_ring_dispatch_execbuffer;
707d9cf9 2713 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2714 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2715 ring->semaphore.signal = gen8_xcs_signal;
2716 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 2717 }
abd58f01
BW
2718 } else {
2719 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2720 ring->irq_get = gen6_ring_get_irq;
2721 ring->irq_put = gen6_ring_put_irq;
1c7a0623
BW
2722 ring->dispatch_execbuffer =
2723 gen6_ring_dispatch_execbuffer;
707d9cf9
BW
2724 if (i915_semaphore_is_enabled(dev)) {
2725 ring->semaphore.sync_to = gen6_ring_sync;
2726 ring->semaphore.signal = gen6_signal;
2727 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2728 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2729 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2730 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2731 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2732 ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2733 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2734 ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2735 ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2736 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2737 }
abd58f01 2738 }
58fa3835
DV
2739 } else {
2740 ring->mmio_base = BSD_RING_BASE;
58fa3835 2741 ring->flush = bsd_ring_flush;
8620a3a9 2742 ring->add_request = i9xx_add_request;
58fa3835 2743 ring->get_seqno = ring_get_seqno;
b70ec5bf 2744 ring->set_seqno = ring_set_seqno;
e48d8634 2745 if (IS_GEN5(dev)) {
cc609d5d 2746 ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
e48d8634
DV
2747 ring->irq_get = gen5_ring_get_irq;
2748 ring->irq_put = gen5_ring_put_irq;
2749 } else {
e3670319 2750 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
e48d8634
DV
2751 ring->irq_get = i9xx_ring_get_irq;
2752 ring->irq_put = i9xx_ring_put_irq;
2753 }
fb3256da 2754 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
58fa3835 2755 }
ecfe00d8 2756 ring->init_hw = init_ring_common;
58fa3835 2757
1ec14ad3 2758 return intel_init_ring_buffer(dev, ring);
5c1143bb 2759}
549f7365 2760
845f74a7 2761/**
62659920 2762 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
845f74a7
ZY
2763 */
2764int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2765{
2766 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2767 struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
845f74a7 2768
f7b64236 2769 ring->name = "bsd2 ring";
845f74a7
ZY
2770 ring->id = VCS2;
2771
2772 ring->write_tail = ring_write_tail;
2773 ring->mmio_base = GEN8_BSD2_RING_BASE;
2774 ring->flush = gen6_bsd_ring_flush;
2775 ring->add_request = gen6_add_request;
2776 ring->get_seqno = gen6_ring_get_seqno;
2777 ring->set_seqno = ring_set_seqno;
2778 ring->irq_enable_mask =
2779 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2780 ring->irq_get = gen8_ring_get_irq;
2781 ring->irq_put = gen8_ring_put_irq;
2782 ring->dispatch_execbuffer =
2783 gen8_ring_dispatch_execbuffer;
3e78998a 2784 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2785 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2786 ring->semaphore.signal = gen8_xcs_signal;
2787 GEN8_RING_SEMAPHORE_INIT;
2788 }
ecfe00d8 2789 ring->init_hw = init_ring_common;
845f74a7
ZY
2790
2791 return intel_init_ring_buffer(dev, ring);
2792}
2793
549f7365
CW
2794int intel_init_blt_ring_buffer(struct drm_device *dev)
2795{
4640c4ff 2796 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2797 struct intel_engine_cs *ring = &dev_priv->ring[BCS];
549f7365 2798
3535d9dd
DV
2799 ring->name = "blitter ring";
2800 ring->id = BCS;
2801
2802 ring->mmio_base = BLT_RING_BASE;
2803 ring->write_tail = ring_write_tail;
ea251324 2804 ring->flush = gen6_ring_flush;
3535d9dd
DV
2805 ring->add_request = gen6_add_request;
2806 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2807 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2808 if (INTEL_INFO(dev)->gen >= 8) {
2809 ring->irq_enable_mask =
2810 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2811 ring->irq_get = gen8_ring_get_irq;
2812 ring->irq_put = gen8_ring_put_irq;
1c7a0623 2813 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
707d9cf9 2814 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2815 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2816 ring->semaphore.signal = gen8_xcs_signal;
2817 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 2818 }
abd58f01
BW
2819 } else {
2820 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2821 ring->irq_get = gen6_ring_get_irq;
2822 ring->irq_put = gen6_ring_put_irq;
1c7a0623 2823 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
707d9cf9
BW
2824 if (i915_semaphore_is_enabled(dev)) {
2825 ring->semaphore.signal = gen6_signal;
2826 ring->semaphore.sync_to = gen6_ring_sync;
2827 /*
2828 * The current semaphore is only applied on pre-gen8
2829 * platform. And there is no VCS2 ring on the pre-gen8
2830 * platform. So the semaphore between BCS and VCS2 is
2831 * initialized as INVALID. Gen8 will initialize the
2832 * sema between BCS and VCS2 later.
2833 */
2834 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
2835 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
2836 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
2837 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
2838 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2839 ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
2840 ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
2841 ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
2842 ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
2843 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2844 }
abd58f01 2845 }
ecfe00d8 2846 ring->init_hw = init_ring_common;
549f7365 2847
1ec14ad3 2848 return intel_init_ring_buffer(dev, ring);
549f7365 2849}
a7b9761d 2850
9a8a2213
BW
2851int intel_init_vebox_ring_buffer(struct drm_device *dev)
2852{
4640c4ff 2853 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2854 struct intel_engine_cs *ring = &dev_priv->ring[VECS];
9a8a2213
BW
2855
2856 ring->name = "video enhancement ring";
2857 ring->id = VECS;
2858
2859 ring->mmio_base = VEBOX_RING_BASE;
2860 ring->write_tail = ring_write_tail;
2861 ring->flush = gen6_ring_flush;
2862 ring->add_request = gen6_add_request;
2863 ring->get_seqno = gen6_ring_get_seqno;
2864 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2865
2866 if (INTEL_INFO(dev)->gen >= 8) {
2867 ring->irq_enable_mask =
40c499f9 2868 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
abd58f01
BW
2869 ring->irq_get = gen8_ring_get_irq;
2870 ring->irq_put = gen8_ring_put_irq;
1c7a0623 2871 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
707d9cf9 2872 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2873 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2874 ring->semaphore.signal = gen8_xcs_signal;
2875 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 2876 }
abd58f01
BW
2877 } else {
2878 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2879 ring->irq_get = hsw_vebox_get_irq;
2880 ring->irq_put = hsw_vebox_put_irq;
1c7a0623 2881 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
707d9cf9
BW
2882 if (i915_semaphore_is_enabled(dev)) {
2883 ring->semaphore.sync_to = gen6_ring_sync;
2884 ring->semaphore.signal = gen6_signal;
2885 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
2886 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
2887 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
2888 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
2889 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2890 ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
2891 ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
2892 ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
2893 ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
2894 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2895 }
abd58f01 2896 }
ecfe00d8 2897 ring->init_hw = init_ring_common;
9a8a2213
BW
2898
2899 return intel_init_ring_buffer(dev, ring);
2900}
2901
a7b9761d 2902int
4866d729 2903intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
a7b9761d 2904{
4866d729 2905 struct intel_engine_cs *ring = req->ring;
a7b9761d
CW
2906 int ret;
2907
2908 if (!ring->gpu_caches_dirty)
2909 return 0;
2910
a84c3ae1 2911 ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
a7b9761d
CW
2912 if (ret)
2913 return ret;
2914
a84c3ae1 2915 trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
a7b9761d
CW
2916
2917 ring->gpu_caches_dirty = false;
2918 return 0;
2919}
2920
2921int
2f20055d 2922intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
a7b9761d 2923{
2f20055d 2924 struct intel_engine_cs *ring = req->ring;
a7b9761d
CW
2925 uint32_t flush_domains;
2926 int ret;
2927
2928 flush_domains = 0;
2929 if (ring->gpu_caches_dirty)
2930 flush_domains = I915_GEM_GPU_DOMAINS;
2931
a84c3ae1 2932 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
a7b9761d
CW
2933 if (ret)
2934 return ret;
2935
a84c3ae1 2936 trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
a7b9761d
CW
2937
2938 ring->gpu_caches_dirty = false;
2939 return 0;
2940}
e3efda49
CW
2941
2942void
a4872ba6 2943intel_stop_ring_buffer(struct intel_engine_cs *ring)
e3efda49
CW
2944{
2945 int ret;
2946
2947 if (!intel_ring_initialized(ring))
2948 return;
2949
2950 ret = intel_ring_idle(ring);
2951 if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
2952 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
2953 ring->name, ret);
2954
2955 stop_ring(ring);
2956}