]>
Commit | Line | Data |
---|---|---|
7d3c425f OM |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2014-2018 Intel Corporation | |
5 | */ | |
6 | ||
7 | #include "i915_drv.h" | |
10be98a7 | 8 | #include "intel_context.h" |
de5825be | 9 | #include "intel_engine_pm.h" |
d10cfee4 | 10 | #include "intel_gt.h" |
2871ea85 | 11 | #include "intel_ring.h" |
7d3c425f OM |
12 | #include "intel_workarounds.h" |
13 | ||
14 | /** | |
15 | * DOC: Hardware workarounds | |
16 | * | |
17 | * This file is intended as a central place to implement most [1]_ of the | |
18 | * required workarounds for hardware to work as originally intended. They fall | |
19 | * in five basic categories depending on how/when they are applied: | |
20 | * | |
21 | * - Workarounds that touch registers that are saved/restored to/from the HW | |
22 | * context image. The list is emitted (via Load Register Immediate commands) | |
23 | * everytime a new context is created. | |
24 | * - GT workarounds. The list of these WAs is applied whenever these registers | |
25 | * revert to default values (on GPU reset, suspend/resume [2]_, etc..). | |
26 | * - Display workarounds. The list is applied during display clock-gating | |
27 | * initialization. | |
28 | * - Workarounds that whitelist a privileged register, so that UMDs can manage | |
29 | * them directly. This is just a special case of a MMMIO workaround (as we | |
30 | * write the list of these to/be-whitelisted registers to some special HW | |
31 | * registers). | |
32 | * - Workaround batchbuffers, that get executed automatically by the hardware | |
33 | * on every HW context restore. | |
34 | * | |
35 | * .. [1] Please notice that there are other WAs that, due to their nature, | |
36 | * cannot be applied from a central place. Those are peppered around the rest | |
37 | * of the code, as needed. | |
38 | * | |
39 | * .. [2] Technically, some registers are powercontext saved & restored, so they | |
40 | * survive a suspend/resume. In practice, writing them again is not too | |
41 | * costly and simplifies things. We can revisit this in the future. | |
42 | * | |
43 | * Layout | |
551bd336 | 44 | * ~~~~~~ |
7d3c425f OM |
45 | * |
46 | * Keep things in this file ordered by WA type, as per the above (context, GT, | |
47 | * display, register whitelist, batchbuffer). Then, inside each type, keep the | |
48 | * following order: | |
49 | * | |
50 | * - Infrastructure functions and macros | |
51 | * - WAs per platform in standard gen/chrono order | |
52 | * - Public functions to init or apply the given workaround type. | |
53 | */ | |
54 | ||
3e1f0a51 | 55 | static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) |
25d140fa TU |
56 | { |
57 | wal->name = name; | |
3e1f0a51 | 58 | wal->engine_name = engine_name; |
25d140fa TU |
59 | } |
60 | ||
4d8d9fc7 TU |
61 | #define WA_LIST_CHUNK (1 << 4) |
62 | ||
25d140fa TU |
63 | static void wa_init_finish(struct i915_wa_list *wal) |
64 | { | |
4d8d9fc7 TU |
65 | /* Trim unused entries. */ |
66 | if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { | |
67 | struct i915_wa *list = kmemdup(wal->list, | |
68 | wal->count * sizeof(*list), | |
69 | GFP_KERNEL); | |
70 | ||
71 | if (list) { | |
72 | kfree(wal->list); | |
73 | wal->list = list; | |
74 | } | |
75 | } | |
76 | ||
25d140fa TU |
77 | if (!wal->count) |
78 | return; | |
79 | ||
3e1f0a51 JH |
80 | DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", |
81 | wal->wa_count, wal->name, wal->engine_name); | |
25d140fa TU |
82 | } |
83 | ||
452420d2 | 84 | static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) |
7d3c425f | 85 | { |
452420d2 TU |
86 | unsigned int addr = i915_mmio_reg_offset(wa->reg); |
87 | unsigned int start = 0, end = wal->count; | |
4d8d9fc7 | 88 | const unsigned int grow = WA_LIST_CHUNK; |
452420d2 TU |
89 | struct i915_wa *wa_; |
90 | ||
91 | GEM_BUG_ON(!is_power_of_2(grow)); | |
92 | ||
93 | if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ | |
94 | struct i915_wa *list; | |
95 | ||
96 | list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), | |
97 | GFP_KERNEL); | |
98 | if (!list) { | |
99 | DRM_ERROR("No space for workaround init!\n"); | |
100 | return; | |
101 | } | |
102 | ||
103 | if (wal->list) | |
104 | memcpy(list, wal->list, sizeof(*wa) * wal->count); | |
105 | ||
106 | wal->list = list; | |
107 | } | |
548764bb CW |
108 | |
109 | while (start < end) { | |
110 | unsigned int mid = start + (end - start) / 2; | |
111 | ||
452420d2 | 112 | if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { |
548764bb | 113 | start = mid + 1; |
452420d2 | 114 | } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { |
548764bb CW |
115 | end = mid; |
116 | } else { | |
452420d2 | 117 | wa_ = &wal->list[mid]; |
548764bb | 118 | |
452420d2 | 119 | if ((wa->mask & ~wa_->mask) == 0) { |
548764bb | 120 | DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", |
452420d2 TU |
121 | i915_mmio_reg_offset(wa_->reg), |
122 | wa_->mask, wa_->val); | |
548764bb | 123 | |
452420d2 | 124 | wa_->val &= ~wa->mask; |
548764bb CW |
125 | } |
126 | ||
452420d2 TU |
127 | wal->wa_count++; |
128 | wa_->val |= wa->val; | |
129 | wa_->mask |= wa->mask; | |
769f0dab | 130 | wa_->read |= wa->read; |
548764bb CW |
131 | return; |
132 | } | |
133 | } | |
7d3c425f | 134 | |
452420d2 TU |
135 | wal->wa_count++; |
136 | wa_ = &wal->list[wal->count++]; | |
137 | *wa_ = *wa; | |
7d3c425f | 138 | |
452420d2 TU |
139 | while (wa_-- > wal->list) { |
140 | GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == | |
141 | i915_mmio_reg_offset(wa_[1].reg)); | |
142 | if (i915_mmio_reg_offset(wa_[1].reg) > | |
143 | i915_mmio_reg_offset(wa_[0].reg)) | |
548764bb | 144 | break; |
7d3c425f | 145 | |
452420d2 | 146 | swap(wa_[1], wa_[0]); |
548764bb | 147 | } |
7d3c425f OM |
148 | } |
149 | ||
ff690b21 MT |
150 | static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, |
151 | u32 val, u32 read_mask) | |
452420d2 TU |
152 | { |
153 | struct i915_wa wa = { | |
769f0dab | 154 | .reg = reg, |
452420d2 | 155 | .mask = mask, |
769f0dab | 156 | .val = val, |
ff690b21 | 157 | .read = read_mask, |
452420d2 TU |
158 | }; |
159 | ||
160 | _wa_add(wal, &wa); | |
161 | } | |
162 | ||
ff690b21 MT |
163 | static void |
164 | wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, | |
165 | u32 val) | |
166 | { | |
167 | wa_add(wal, reg, mask, val, mask); | |
168 | } | |
169 | ||
69b768f2 TU |
170 | static void |
171 | wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) | |
172 | { | |
ae598b0d | 173 | wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); |
69b768f2 TU |
174 | } |
175 | ||
176 | static void | |
177 | wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) | |
178 | { | |
179 | wa_write_masked_or(wal, reg, ~0, val); | |
180 | } | |
181 | ||
182 | static void | |
183 | wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) | |
184 | { | |
185 | wa_write_masked_or(wal, reg, val, val); | |
186 | } | |
187 | ||
7d3c425f | 188 | #define WA_SET_BIT_MASKED(addr, mask) \ |
ae598b0d | 189 | wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) |
7d3c425f OM |
190 | |
191 | #define WA_CLR_BIT_MASKED(addr, mask) \ | |
ae598b0d | 192 | wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) |
7d3c425f OM |
193 | |
194 | #define WA_SET_FIELD_MASKED(addr, mask, value) \ | |
ae598b0d | 195 | wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) |
7d3c425f | 196 | |
fde93886 TU |
197 | static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, |
198 | struct i915_wa_list *wal) | |
7d3c425f | 199 | { |
7d3c425f OM |
200 | WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); |
201 | ||
202 | /* WaDisableAsyncFlipPerfMode:bdw,chv */ | |
203 | WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); | |
204 | ||
205 | /* WaDisablePartialInstShootdown:bdw,chv */ | |
206 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | |
207 | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); | |
208 | ||
209 | /* Use Force Non-Coherent whenever executing a 3D context. This is a | |
210 | * workaround for for a possible hang in the unlikely event a TLB | |
211 | * invalidation occurs during a PSD flush. | |
212 | */ | |
213 | /* WaForceEnableNonCoherent:bdw,chv */ | |
214 | /* WaHdcDisableFetchWhenMasked:bdw,chv */ | |
215 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | |
216 | HDC_DONOT_FETCH_MEM_WHEN_MASKED | | |
217 | HDC_FORCE_NON_COHERENT); | |
218 | ||
219 | /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: | |
220 | * "The Hierarchical Z RAW Stall Optimization allows non-overlapping | |
221 | * polygons in the same 8x4 pixel/sample area to be processed without | |
222 | * stalling waiting for the earlier ones to write to Hierarchical Z | |
223 | * buffer." | |
224 | * | |
225 | * This optimization is off by default for BDW and CHV; turn it on. | |
226 | */ | |
227 | WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); | |
228 | ||
229 | /* Wa4x4STCOptimizationDisable:bdw,chv */ | |
230 | WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); | |
231 | ||
232 | /* | |
233 | * BSpec recommends 8x4 when MSAA is used, | |
234 | * however in practice 16x4 seems fastest. | |
235 | * | |
236 | * Note that PS/WM thread counts depend on the WIZ hashing | |
237 | * disable bit, which we don't touch here, but it's good | |
238 | * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). | |
239 | */ | |
240 | WA_SET_FIELD_MASKED(GEN7_GT_MODE, | |
241 | GEN6_WIZ_HASHING_MASK, | |
242 | GEN6_WIZ_HASHING_16x4); | |
7d3c425f OM |
243 | } |
244 | ||
fde93886 TU |
245 | static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, |
246 | struct i915_wa_list *wal) | |
7d3c425f | 247 | { |
452420d2 | 248 | struct drm_i915_private *i915 = engine->i915; |
7d3c425f | 249 | |
fde93886 | 250 | gen8_ctx_workarounds_init(engine, wal); |
7d3c425f OM |
251 | |
252 | /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ | |
253 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); | |
254 | ||
255 | /* WaDisableDopClockGating:bdw | |
256 | * | |
72588ffd | 257 | * Also see the related UCGTCL1 write in bdw_init_clock_gating() |
7d3c425f OM |
258 | * to disable EUTC clock gating. |
259 | */ | |
260 | WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, | |
261 | DOP_CLOCK_GATING_DISABLE); | |
262 | ||
263 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, | |
264 | GEN8_SAMPLER_POWER_BYPASS_DIS); | |
265 | ||
266 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | |
267 | /* WaForceContextSaveRestoreNonCoherent:bdw */ | |
268 | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | | |
269 | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ | |
452420d2 | 270 | (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); |
7d3c425f OM |
271 | } |
272 | ||
fde93886 TU |
273 | static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, |
274 | struct i915_wa_list *wal) | |
7d3c425f | 275 | { |
fde93886 | 276 | gen8_ctx_workarounds_init(engine, wal); |
7d3c425f OM |
277 | |
278 | /* WaDisableThreadStallDopClockGating:chv */ | |
279 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); | |
280 | ||
281 | /* Improve HiZ throughput on CHV. */ | |
282 | WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); | |
7d3c425f OM |
283 | } |
284 | ||
fde93886 TU |
285 | static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, |
286 | struct i915_wa_list *wal) | |
7d3c425f | 287 | { |
452420d2 | 288 | struct drm_i915_private *i915 = engine->i915; |
452420d2 TU |
289 | |
290 | if (HAS_LLC(i915)) { | |
7d3c425f OM |
291 | /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl |
292 | * | |
293 | * Must match Display Engine. See | |
294 | * WaCompressedResourceDisplayNewHashMode. | |
295 | */ | |
296 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, | |
297 | GEN9_PBE_COMPRESSED_HASH_SELECTION); | |
298 | WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, | |
299 | GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); | |
7d3c425f OM |
300 | } |
301 | ||
302 | /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ | |
303 | /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ | |
304 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | |
305 | FLOW_CONTROL_ENABLE | | |
306 | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); | |
307 | ||
7d3c425f OM |
308 | /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ |
309 | /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ | |
310 | WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, | |
311 | GEN9_ENABLE_YV12_BUGFIX | | |
312 | GEN9_ENABLE_GPGPU_PREEMPTION); | |
313 | ||
314 | /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ | |
315 | /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ | |
316 | WA_SET_BIT_MASKED(CACHE_MODE_1, | |
317 | GEN8_4x4_STC_OPTIMIZATION_DISABLE | | |
318 | GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); | |
319 | ||
320 | /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ | |
321 | WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, | |
322 | GEN9_CCS_TLB_PREFETCH_ENABLE); | |
323 | ||
324 | /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ | |
325 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | |
326 | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | | |
327 | HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); | |
328 | ||
329 | /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are | |
330 | * both tied to WaForceContextSaveRestoreNonCoherent | |
331 | * in some hsds for skl. We keep the tie for all gen9. The | |
332 | * documentation is a bit hazy and so we want to get common behaviour, | |
333 | * even though there is no clear evidence we would need both on kbl/bxt. | |
334 | * This area has been source of system hangs so we play it safe | |
335 | * and mimic the skl regardless of what bspec says. | |
336 | * | |
337 | * Use Force Non-Coherent whenever executing a 3D context. This | |
338 | * is a workaround for a possible hang in the unlikely event | |
339 | * a TLB invalidation occurs during a PSD flush. | |
340 | */ | |
341 | ||
342 | /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ | |
343 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | |
344 | HDC_FORCE_NON_COHERENT); | |
345 | ||
7d3c425f | 346 | /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ |
452420d2 | 347 | if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) |
7d3c425f OM |
348 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, |
349 | GEN8_SAMPLER_POWER_BYPASS_DIS); | |
350 | ||
351 | /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ | |
352 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); | |
353 | ||
7d3c425f OM |
354 | /* |
355 | * Supporting preemption with fine-granularity requires changes in the | |
356 | * batch buffer programming. Since we can't break old userspace, we | |
357 | * need to set our default preemption level to safe value. Userspace is | |
358 | * still able to use more fine-grained preemption levels, since in | |
359 | * WaEnablePreemptionGranularityControlByUMD we're whitelisting the | |
360 | * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are | |
361 | * not real HW workarounds, but merely a way to start using preemption | |
362 | * while maintaining old contract with userspace. | |
363 | */ | |
364 | ||
365 | /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ | |
366 | WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); | |
367 | ||
368 | /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ | |
369 | WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, | |
370 | GEN9_PREEMPT_GPGPU_LEVEL_MASK, | |
371 | GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); | |
372 | ||
0c79f9cb | 373 | /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ |
452420d2 | 374 | if (IS_GEN9_LP(i915)) |
0c79f9cb | 375 | WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); |
7d3c425f OM |
376 | } |
377 | ||
fde93886 TU |
378 | static void skl_tune_iz_hashing(struct intel_engine_cs *engine, |
379 | struct i915_wa_list *wal) | |
7d3c425f | 380 | { |
452420d2 | 381 | struct drm_i915_private *i915 = engine->i915; |
7d3c425f OM |
382 | u8 vals[3] = { 0, 0, 0 }; |
383 | unsigned int i; | |
384 | ||
385 | for (i = 0; i < 3; i++) { | |
386 | u8 ss; | |
387 | ||
388 | /* | |
389 | * Only consider slices where one, and only one, subslice has 7 | |
390 | * EUs | |
391 | */ | |
0258404f | 392 | if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) |
7d3c425f OM |
393 | continue; |
394 | ||
395 | /* | |
396 | * subslice_7eu[i] != 0 (because of the check above) and | |
397 | * ss_max == 4 (maximum number of subslices possible per slice) | |
398 | * | |
399 | * -> 0 <= ss <= 3; | |
400 | */ | |
0258404f | 401 | ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; |
7d3c425f OM |
402 | vals[i] = 3 - ss; |
403 | } | |
404 | ||
405 | if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) | |
452420d2 | 406 | return; |
7d3c425f OM |
407 | |
408 | /* Tune IZ hashing. See intel_device_info_runtime_init() */ | |
409 | WA_SET_FIELD_MASKED(GEN7_GT_MODE, | |
410 | GEN9_IZ_HASHING_MASK(2) | | |
411 | GEN9_IZ_HASHING_MASK(1) | | |
412 | GEN9_IZ_HASHING_MASK(0), | |
413 | GEN9_IZ_HASHING(2, vals[2]) | | |
414 | GEN9_IZ_HASHING(1, vals[1]) | | |
415 | GEN9_IZ_HASHING(0, vals[0])); | |
7d3c425f OM |
416 | } |
417 | ||
fde93886 TU |
418 | static void skl_ctx_workarounds_init(struct intel_engine_cs *engine, |
419 | struct i915_wa_list *wal) | |
7d3c425f | 420 | { |
fde93886 TU |
421 | gen9_ctx_workarounds_init(engine, wal); |
422 | skl_tune_iz_hashing(engine, wal); | |
59b449d5 | 423 | } |
7d3c425f | 424 | |
fde93886 TU |
425 | static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, |
426 | struct i915_wa_list *wal) | |
59b449d5 | 427 | { |
fde93886 | 428 | gen9_ctx_workarounds_init(engine, wal); |
7d3c425f | 429 | |
59b449d5 OM |
430 | /* WaDisableThreadStallDopClockGating:bxt */ |
431 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | |
432 | STALL_DOP_GATING_DISABLE); | |
433 | ||
434 | /* WaToEnableHwFixForPushConstHWBug:bxt */ | |
435 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, | |
436 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | |
7d3c425f OM |
437 | } |
438 | ||
fde93886 TU |
439 | static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, |
440 | struct i915_wa_list *wal) | |
7d3c425f | 441 | { |
452420d2 | 442 | struct drm_i915_private *i915 = engine->i915; |
7d3c425f | 443 | |
fde93886 | 444 | gen9_ctx_workarounds_init(engine, wal); |
7d3c425f | 445 | |
59b449d5 | 446 | /* WaToEnableHwFixForPushConstHWBug:kbl */ |
452420d2 | 447 | if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) |
59b449d5 OM |
448 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, |
449 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | |
7d3c425f | 450 | |
59b449d5 OM |
451 | /* WaDisableSbeCacheDispatchPortSharing:kbl */ |
452 | WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, | |
453 | GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); | |
59b449d5 OM |
454 | } |
455 | ||
fde93886 TU |
456 | static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, |
457 | struct i915_wa_list *wal) | |
59b449d5 | 458 | { |
fde93886 | 459 | gen9_ctx_workarounds_init(engine, wal); |
59b449d5 OM |
460 | |
461 | /* WaToEnableHwFixForPushConstHWBug:glk */ | |
7d3c425f OM |
462 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, |
463 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | |
7d3c425f OM |
464 | } |
465 | ||
fde93886 TU |
466 | static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, |
467 | struct i915_wa_list *wal) | |
7d3c425f | 468 | { |
fde93886 | 469 | gen9_ctx_workarounds_init(engine, wal); |
59b449d5 OM |
470 | |
471 | /* WaToEnableHwFixForPushConstHWBug:cfl */ | |
472 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, | |
473 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | |
7d3c425f | 474 | |
59b449d5 OM |
475 | /* WaDisableSbeCacheDispatchPortSharing:cfl */ |
476 | WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, | |
477 | GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); | |
59b449d5 OM |
478 | } |
479 | ||
fde93886 TU |
480 | static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, |
481 | struct i915_wa_list *wal) | |
59b449d5 | 482 | { |
452420d2 | 483 | struct drm_i915_private *i915 = engine->i915; |
452420d2 | 484 | |
7d3c425f OM |
485 | /* WaForceContextSaveRestoreNonCoherent:cnl */ |
486 | WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, | |
487 | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); | |
488 | ||
489 | /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ | |
452420d2 | 490 | if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) |
7d3c425f OM |
491 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); |
492 | ||
493 | /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ | |
494 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, | |
495 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | |
496 | ||
497 | /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ | |
452420d2 | 498 | if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) |
7d3c425f OM |
499 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, |
500 | GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); | |
501 | ||
7d3c425f OM |
502 | /* WaPushConstantDereferenceHoldDisable:cnl */ |
503 | WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); | |
504 | ||
59b449d5 | 505 | /* FtrEnableFastAnisoL1BankingFix:cnl */ |
7d3c425f OM |
506 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); |
507 | ||
508 | /* WaDisable3DMidCmdPreemption:cnl */ | |
509 | WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); | |
510 | ||
511 | /* WaDisableGPGPUMidCmdPreemption:cnl */ | |
512 | WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, | |
513 | GEN9_PREEMPT_GPGPU_LEVEL_MASK, | |
514 | GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); | |
515 | ||
7d3c425f OM |
516 | /* WaDisableEarlyEOT:cnl */ |
517 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); | |
7d3c425f OM |
518 | } |
519 | ||
fde93886 TU |
520 | static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, |
521 | struct i915_wa_list *wal) | |
cc38cae7 | 522 | { |
452420d2 | 523 | struct drm_i915_private *i915 = engine->i915; |
452420d2 | 524 | |
cbe3e1d1 TU |
525 | /* WaDisableBankHangMode:icl */ |
526 | wa_write(wal, | |
527 | GEN8_L3CNTLREG, | |
528 | intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | | |
529 | GEN8_ERRDETBCTRL); | |
530 | ||
cc38cae7 OM |
531 | /* Wa_1604370585:icl (pre-prod) |
532 | * Formerly known as WaPushConstantDereferenceHoldDisable | |
533 | */ | |
452420d2 | 534 | if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) |
cc38cae7 OM |
535 | WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, |
536 | PUSH_CONSTANT_DEREF_DISABLE); | |
537 | ||
538 | /* WaForceEnableNonCoherent:icl | |
539 | * This is not the same workaround as in early Gen9 platforms, where | |
540 | * lacking this could cause system hangs, but coherency performance | |
541 | * overhead is high and only a few compute workloads really need it | |
542 | * (the register is whitelisted in hardware now, so UMDs can opt in | |
543 | * for coherency if they have a good reason). | |
544 | */ | |
545 | WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); | |
546 | ||
3c7ab278 OM |
547 | /* Wa_2006611047:icl (pre-prod) |
548 | * Formerly known as WaDisableImprovedTdlClkGating | |
549 | */ | |
452420d2 | 550 | if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) |
3c7ab278 OM |
551 | WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, |
552 | GEN11_TDL_CLOCK_GATING_FIX_DISABLE); | |
553 | ||
b1f88820 | 554 | /* Wa_2006665173:icl (pre-prod) */ |
452420d2 | 555 | if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) |
b1f88820 OM |
556 | WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, |
557 | GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); | |
0b904c89 TN |
558 | |
559 | /* WaEnableFloatBlendOptimization:icl */ | |
560 | wa_write_masked_or(wal, | |
561 | GEN10_CACHE_MODE_SS, | |
562 | 0, /* write-only, so skip validation */ | |
563 | _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); | |
d846325a MW |
564 | |
565 | /* WaDisableGPGPUMidThreadPreemption:icl */ | |
566 | WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, | |
567 | GEN9_PREEMPT_GPGPU_LEVEL_MASK, | |
568 | GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); | |
397049a0 DK |
569 | |
570 | /* allow headerless messages for preemptible GPGPU context */ | |
571 | WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, | |
572 | GEN11_SAMPLER_ENABLE_HEADLESS_MSG); | |
cc38cae7 OM |
573 | } |
574 | ||
13e53c5c LDM |
575 | static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, |
576 | struct i915_wa_list *wal) | |
577 | { | |
99739f94 | 578 | /* Wa_1409142259:tgl */ |
1c757497 RS |
579 | WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, |
580 | GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); | |
ff690b21 | 581 | |
ff690b21 | 582 | /* |
eddf309a LDM |
583 | * Wa_1604555607:gen12 and Wa_1608008084:gen12 |
584 | * FF_MODE2 register will return the wrong value when read. The default | |
585 | * value for this register is zero for all fields and there are no bit | |
586 | * masks. So instead of doing a RMW we should just write the TDS timer | |
587 | * value for Wa_1604555607. | |
ff690b21 | 588 | */ |
eddf309a LDM |
589 | wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, |
590 | FF_MODE2_TDS_TIMER_128, 0); | |
13e53c5c LDM |
591 | } |
592 | ||
fde93886 TU |
593 | static void |
594 | __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, | |
595 | struct i915_wa_list *wal, | |
596 | const char *name) | |
7d3c425f | 597 | { |
452420d2 | 598 | struct drm_i915_private *i915 = engine->i915; |
452420d2 | 599 | |
fde93886 TU |
600 | if (engine->class != RENDER_CLASS) |
601 | return; | |
602 | ||
3e1f0a51 | 603 | wa_init_start(wal, name, engine->name); |
452420d2 | 604 | |
13e53c5c LDM |
605 | if (IS_GEN(i915, 12)) |
606 | tgl_ctx_workarounds_init(engine, wal); | |
607 | else if (IS_GEN(i915, 11)) | |
fde93886 | 608 | icl_ctx_workarounds_init(engine, wal); |
452420d2 | 609 | else if (IS_CANNONLAKE(i915)) |
fde93886 | 610 | cnl_ctx_workarounds_init(engine, wal); |
9e01d944 | 611 | else if (IS_COFFEELAKE(i915)) |
fde93886 | 612 | cfl_ctx_workarounds_init(engine, wal); |
9e01d944 | 613 | else if (IS_GEMINILAKE(i915)) |
fde93886 | 614 | glk_ctx_workarounds_init(engine, wal); |
9e01d944 | 615 | else if (IS_KABYLAKE(i915)) |
fde93886 | 616 | kbl_ctx_workarounds_init(engine, wal); |
9e01d944 | 617 | else if (IS_BROXTON(i915)) |
fde93886 | 618 | bxt_ctx_workarounds_init(engine, wal); |
9e01d944 | 619 | else if (IS_SKYLAKE(i915)) |
fde93886 | 620 | skl_ctx_workarounds_init(engine, wal); |
9e01d944 | 621 | else if (IS_CHERRYVIEW(i915)) |
fde93886 | 622 | chv_ctx_workarounds_init(engine, wal); |
9e01d944 | 623 | else if (IS_BROADWELL(i915)) |
fde93886 | 624 | bdw_ctx_workarounds_init(engine, wal); |
9e01d944 RV |
625 | else if (INTEL_GEN(i915) < 8) |
626 | return; | |
59b449d5 | 627 | else |
452420d2 | 628 | MISSING_CASE(INTEL_GEN(i915)); |
59b449d5 | 629 | |
452420d2 | 630 | wa_init_finish(wal); |
59b449d5 OM |
631 | } |
632 | ||
fde93886 TU |
633 | void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) |
634 | { | |
635 | __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context"); | |
636 | } | |
637 | ||
452420d2 | 638 | int intel_engine_emit_ctx_wa(struct i915_request *rq) |
59b449d5 | 639 | { |
452420d2 TU |
640 | struct i915_wa_list *wal = &rq->engine->ctx_wa_list; |
641 | struct i915_wa *wa; | |
642 | unsigned int i; | |
59b449d5 | 643 | u32 *cs; |
452420d2 | 644 | int ret; |
59b449d5 | 645 | |
452420d2 | 646 | if (wal->count == 0) |
59b449d5 OM |
647 | return 0; |
648 | ||
649 | ret = rq->engine->emit_flush(rq, EMIT_BARRIER); | |
7d3c425f OM |
650 | if (ret) |
651 | return ret; | |
652 | ||
452420d2 | 653 | cs = intel_ring_begin(rq, (wal->count * 2 + 2)); |
59b449d5 OM |
654 | if (IS_ERR(cs)) |
655 | return PTR_ERR(cs); | |
656 | ||
452420d2 TU |
657 | *cs++ = MI_LOAD_REGISTER_IMM(wal->count); |
658 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { | |
659 | *cs++ = i915_mmio_reg_offset(wa->reg); | |
660 | *cs++ = wa->val; | |
59b449d5 OM |
661 | } |
662 | *cs++ = MI_NOOP; | |
663 | ||
664 | intel_ring_advance(rq, cs); | |
665 | ||
666 | ret = rq->engine->emit_flush(rq, EMIT_BARRIER); | |
667 | if (ret) | |
668 | return ret; | |
669 | ||
670 | return 0; | |
671 | } | |
672 | ||
f663b0ca DCS |
673 | static void |
674 | gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
25d140fa | 675 | { |
59b449d5 | 676 | /* WaDisableKillLogic:bxt,skl,kbl */ |
25d140fa TU |
677 | if (!IS_COFFEELAKE(i915)) |
678 | wa_write_or(wal, | |
679 | GAM_ECOCHK, | |
680 | ECOCHK_DIS_TLB); | |
59b449d5 | 681 | |
25d140fa | 682 | if (HAS_LLC(i915)) { |
59b449d5 OM |
683 | /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl |
684 | * | |
685 | * Must match Display Engine. See | |
686 | * WaCompressedResourceDisplayNewHashMode. | |
687 | */ | |
25d140fa TU |
688 | wa_write_or(wal, |
689 | MMCD_MISC_CTRL, | |
690 | MMCD_PCLA | MMCD_HOTSPOT_EN); | |
59b449d5 OM |
691 | } |
692 | ||
693 | /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ | |
25d140fa TU |
694 | wa_write_or(wal, |
695 | GAM_ECOCHK, | |
696 | BDW_DISABLE_HDC_INVALIDATION); | |
59b449d5 OM |
697 | } |
698 | ||
f663b0ca DCS |
699 | static void |
700 | skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
59b449d5 | 701 | { |
f663b0ca | 702 | gen9_gt_workarounds_init(i915, wal); |
59b449d5 | 703 | |
59b449d5 | 704 | /* WaDisableGafsUnitClkGating:skl */ |
25d140fa TU |
705 | wa_write_or(wal, |
706 | GEN7_UCGCTL4, | |
707 | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); | |
59b449d5 OM |
708 | |
709 | /* WaInPlaceDecompressionHang:skl */ | |
25d140fa TU |
710 | if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) |
711 | wa_write_or(wal, | |
712 | GEN9_GAMT_ECO_REG_RW_IA, | |
713 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | |
59b449d5 OM |
714 | } |
715 | ||
f663b0ca DCS |
716 | static void |
717 | bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
59b449d5 | 718 | { |
f663b0ca | 719 | gen9_gt_workarounds_init(i915, wal); |
59b449d5 | 720 | |
59b449d5 | 721 | /* WaInPlaceDecompressionHang:bxt */ |
25d140fa TU |
722 | wa_write_or(wal, |
723 | GEN9_GAMT_ECO_REG_RW_IA, | |
724 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | |
59b449d5 OM |
725 | } |
726 | ||
f663b0ca DCS |
727 | static void |
728 | kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
59b449d5 | 729 | { |
f663b0ca | 730 | gen9_gt_workarounds_init(i915, wal); |
59b449d5 | 731 | |
7d3c425f | 732 | /* WaDisableDynamicCreditSharing:kbl */ |
25d140fa TU |
733 | if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) |
734 | wa_write_or(wal, | |
735 | GAMT_CHKN_BIT_REG, | |
736 | GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); | |
7d3c425f | 737 | |
59b449d5 | 738 | /* WaDisableGafsUnitClkGating:kbl */ |
25d140fa TU |
739 | wa_write_or(wal, |
740 | GEN7_UCGCTL4, | |
741 | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); | |
7d3c425f | 742 | |
59b449d5 | 743 | /* WaInPlaceDecompressionHang:kbl */ |
25d140fa TU |
744 | wa_write_or(wal, |
745 | GEN9_GAMT_ECO_REG_RW_IA, | |
746 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | |
59b449d5 | 747 | } |
7d3c425f | 748 | |
f663b0ca DCS |
749 | static void |
750 | glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
59b449d5 | 751 | { |
f663b0ca | 752 | gen9_gt_workarounds_init(i915, wal); |
59b449d5 OM |
753 | } |
754 | ||
f663b0ca DCS |
755 | static void |
756 | cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
59b449d5 | 757 | { |
f663b0ca | 758 | gen9_gt_workarounds_init(i915, wal); |
59b449d5 | 759 | |
59b449d5 | 760 | /* WaDisableGafsUnitClkGating:cfl */ |
25d140fa TU |
761 | wa_write_or(wal, |
762 | GEN7_UCGCTL4, | |
763 | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); | |
7d3c425f | 764 | |
59b449d5 | 765 | /* WaInPlaceDecompressionHang:cfl */ |
25d140fa TU |
766 | wa_write_or(wal, |
767 | GEN9_GAMT_ECO_REG_RW_IA, | |
768 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | |
59b449d5 | 769 | } |
7d3c425f | 770 | |
f663b0ca | 771 | static void |
5d75dc2b | 772 | wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) |
1e40d4ae | 773 | { |
5d75dc2b | 774 | const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; |
6c2b0103 TU |
775 | unsigned int slice, subslice; |
776 | u32 l3_en, mcr, mcr_mask; | |
777 | ||
778 | GEM_BUG_ON(INTEL_GEN(i915) < 10); | |
1e40d4ae | 779 | |
fe864b76 YZ |
780 | /* |
781 | * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl | |
782 | * L3Banks could be fused off in single slice scenario. If that is | |
783 | * the case, we might need to program MCR select to a valid L3Bank | |
784 | * by default, to make sure we correctly read certain registers | |
785 | * later on (in the range 0xB100 - 0xB3FF). | |
6c2b0103 | 786 | * |
d78fa508 | 787 | * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl |
1e40d4ae YZ |
788 | * Before any MMIO read into slice/subslice specific registers, MCR |
789 | * packet control register needs to be programmed to point to any | |
790 | * enabled s/ss pair. Otherwise, incorrect values will be returned. | |
791 | * This means each subsequent MMIO read will be forwarded to an | |
792 | * specific s/ss combination, but this is OK since these registers | |
793 | * are consistent across s/ss in almost all cases. In the rare | |
794 | * occasions, such as INSTDONE, where this value is dependent | |
795 | * on s/ss combo, the read should be done with read_subslice_reg. | |
6c2b0103 TU |
796 | * |
797 | * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both | |
798 | * to which subslice, or to which L3 bank, the respective mmio reads | |
799 | * will go, we have to find a common index which works for both | |
800 | * accesses. | |
801 | * | |
802 | * Case where we cannot find a common index fortunately should not | |
803 | * happen in production hardware, so we only emit a warning instead of | |
804 | * implementing something more complex that requires checking the range | |
805 | * of every MMIO read. | |
1e40d4ae | 806 | */ |
6c2b0103 TU |
807 | |
808 | if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { | |
809 | u32 l3_fuse = | |
810 | intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & | |
811 | GEN10_L3BANK_MASK; | |
812 | ||
813 | DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse); | |
814 | l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); | |
815 | } else { | |
816 | l3_en = ~0; | |
817 | } | |
818 | ||
819 | slice = fls(sseu->slice_mask) - 1; | |
100f5f7f | 820 | subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); |
6c2b0103 TU |
821 | if (!subslice) { |
822 | DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", | |
100f5f7f | 823 | intel_sseu_get_subslices(sseu, slice), l3_en); |
6c2b0103 TU |
824 | subslice = fls(l3_en); |
825 | WARN_ON(!subslice); | |
826 | } | |
827 | subslice--; | |
828 | ||
829 | if (INTEL_GEN(i915) >= 11) { | |
830 | mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); | |
831 | mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; | |
832 | } else { | |
833 | mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); | |
834 | mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; | |
835 | } | |
836 | ||
837 | DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr); | |
838 | ||
839 | wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); | |
1e40d4ae YZ |
840 | } |
841 | ||
f663b0ca DCS |
842 | static void |
843 | cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
59b449d5 | 844 | { |
f663b0ca | 845 | wa_init_mcr(i915, wal); |
1e40d4ae | 846 | |
59b449d5 | 847 | /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ |
25d140fa TU |
848 | if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) |
849 | wa_write_or(wal, | |
850 | GAMT_CHKN_BIT_REG, | |
851 | GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); | |
59b449d5 OM |
852 | |
853 | /* WaInPlaceDecompressionHang:cnl */ | |
25d140fa TU |
854 | wa_write_or(wal, |
855 | GEN9_GAMT_ECO_REG_RW_IA, | |
856 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | |
59b449d5 OM |
857 | } |
858 | ||
f663b0ca DCS |
859 | static void |
860 | icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
cc38cae7 | 861 | { |
f663b0ca | 862 | wa_init_mcr(i915, wal); |
d78fa508 | 863 | |
cc38cae7 | 864 | /* WaInPlaceDecompressionHang:icl */ |
25d140fa TU |
865 | wa_write_or(wal, |
866 | GEN9_GAMT_ECO_REG_RW_IA, | |
867 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | |
cc38cae7 | 868 | |
f4a35714 | 869 | /* WaModifyGamTlbPartitioning:icl */ |
25d140fa TU |
870 | wa_write_masked_or(wal, |
871 | GEN11_GACB_PERF_CTRL, | |
872 | GEN11_HASH_CTRL_MASK, | |
873 | GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); | |
5246ae4b | 874 | |
6b967dc3 OM |
875 | /* Wa_1405766107:icl |
876 | * Formerly known as WaCL2SFHalfMaxAlloc | |
877 | */ | |
25d140fa TU |
878 | wa_write_or(wal, |
879 | GEN11_LSN_UNSLCVC, | |
880 | GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | | |
881 | GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); | |
908ae051 OM |
882 | |
883 | /* Wa_220166154:icl | |
884 | * Formerly known as WaDisCtxReload | |
885 | */ | |
25d140fa TU |
886 | wa_write_or(wal, |
887 | GEN8_GAMW_ECO_DEV_RW_IA, | |
888 | GAMW_ECO_DEV_CTX_RELOAD_DISABLE); | |
0a437d49 OM |
889 | |
890 | /* Wa_1405779004:icl (pre-prod) */ | |
25d140fa TU |
891 | if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) |
892 | wa_write_or(wal, | |
893 | SLICE_UNIT_LEVEL_CLKGATE, | |
894 | MSCUNIT_CLKGATE_DIS); | |
36204d80 OM |
895 | |
896 | /* Wa_1406680159:icl */ | |
25d140fa TU |
897 | wa_write_or(wal, |
898 | SUBSLICE_UNIT_LEVEL_CLKGATE, | |
899 | GWUNIT_CLKGATE_DIS); | |
5215eef3 | 900 | |
5ba700c7 | 901 | /* Wa_1406838659:icl (pre-prod) */ |
25d140fa TU |
902 | if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) |
903 | wa_write_or(wal, | |
904 | INF_UNIT_LEVEL_CLKGATE, | |
905 | CGPSF_CLKGATE_DIS); | |
73f4e8a3 | 906 | |
4ece66b1 OM |
907 | /* Wa_1406463099:icl |
908 | * Formerly known as WaGamTlbPendError | |
909 | */ | |
25d140fa TU |
910 | wa_write_or(wal, |
911 | GAMT_CHKN_BIT_REG, | |
912 | GAMT_CHKN_DISABLE_L3_COH_PIPE); | |
da5d2ca8 MK |
913 | |
914 | /* Wa_1607087056:icl */ | |
915 | wa_write_or(wal, | |
916 | SLICE_UNIT_LEVEL_CLKGATE, | |
917 | L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); | |
cc38cae7 OM |
918 | } |
919 | ||
13e53c5c LDM |
920 | static void |
921 | tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
922 | { | |
65df78bd MK |
923 | /* Wa_1409420604:tgl */ |
924 | if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) | |
925 | wa_write_or(wal, | |
926 | SUBSLICE_UNIT_LEVEL_CLKGATE2, | |
927 | CPSSUNIT_CLKGATE_DIS); | |
2cbe2d8c MK |
928 | |
929 | /* Wa_1409180338:tgl */ | |
930 | if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) | |
931 | wa_write_or(wal, | |
932 | SLICE_UNIT_LEVEL_CLKGATE, | |
933 | L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); | |
13e53c5c LDM |
934 | } |
935 | ||
f663b0ca DCS |
936 | static void |
937 | gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) | |
59b449d5 | 938 | { |
13e53c5c LDM |
939 | if (IS_GEN(i915, 12)) |
940 | tgl_gt_workarounds_init(i915, wal); | |
941 | else if (IS_GEN(i915, 11)) | |
993298af | 942 | icl_gt_workarounds_init(i915, wal); |
25d140fa | 943 | else if (IS_CANNONLAKE(i915)) |
f663b0ca | 944 | cnl_gt_workarounds_init(i915, wal); |
993298af RV |
945 | else if (IS_COFFEELAKE(i915)) |
946 | cfl_gt_workarounds_init(i915, wal); | |
947 | else if (IS_GEMINILAKE(i915)) | |
948 | glk_gt_workarounds_init(i915, wal); | |
949 | else if (IS_KABYLAKE(i915)) | |
950 | kbl_gt_workarounds_init(i915, wal); | |
951 | else if (IS_BROXTON(i915)) | |
952 | bxt_gt_workarounds_init(i915, wal); | |
953 | else if (IS_SKYLAKE(i915)) | |
954 | skl_gt_workarounds_init(i915, wal); | |
955 | else if (INTEL_GEN(i915) <= 8) | |
956 | return; | |
59b449d5 | 957 | else |
25d140fa | 958 | MISSING_CASE(INTEL_GEN(i915)); |
f663b0ca DCS |
959 | } |
960 | ||
961 | void intel_gt_init_workarounds(struct drm_i915_private *i915) | |
962 | { | |
963 | struct i915_wa_list *wal = &i915->gt_wa_list; | |
25d140fa | 964 | |
3e1f0a51 | 965 | wa_init_start(wal, "GT", "global"); |
f663b0ca | 966 | gt_init_workarounds(i915, wal); |
25d140fa TU |
967 | wa_init_finish(wal); |
968 | } | |
969 | ||
970 | static enum forcewake_domains | |
5d75dc2b | 971 | wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) |
25d140fa TU |
972 | { |
973 | enum forcewake_domains fw = 0; | |
974 | struct i915_wa *wa; | |
975 | unsigned int i; | |
976 | ||
977 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) | |
5d75dc2b | 978 | fw |= intel_uncore_forcewake_for_reg(uncore, |
25d140fa TU |
979 | wa->reg, |
980 | FW_REG_READ | | |
981 | FW_REG_WRITE); | |
982 | ||
983 | return fw; | |
984 | } | |
985 | ||
4f1cb587 CW |
986 | static bool |
987 | wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) | |
988 | { | |
769f0dab | 989 | if ((cur ^ wa->val) & wa->read) { |
4f1cb587 | 990 | DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", |
769f0dab CW |
991 | name, from, i915_mmio_reg_offset(wa->reg), |
992 | cur, cur & wa->read, | |
993 | wa->val, wa->mask); | |
4f1cb587 CW |
994 | |
995 | return false; | |
996 | } | |
997 | ||
998 | return true; | |
999 | } | |
1000 | ||
25d140fa | 1001 | static void |
5d75dc2b | 1002 | wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) |
25d140fa TU |
1003 | { |
1004 | enum forcewake_domains fw; | |
1005 | unsigned long flags; | |
1006 | struct i915_wa *wa; | |
1007 | unsigned int i; | |
1008 | ||
1009 | if (!wal->count) | |
1010 | return; | |
1011 | ||
5d75dc2b | 1012 | fw = wal_get_fw_for_rmw(uncore, wal); |
25d140fa | 1013 | |
5d75dc2b CW |
1014 | spin_lock_irqsave(&uncore->lock, flags); |
1015 | intel_uncore_forcewake_get__locked(uncore, fw); | |
25d140fa TU |
1016 | |
1017 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { | |
5d75dc2b | 1018 | intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); |
4f1cb587 CW |
1019 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
1020 | wa_verify(wa, | |
1021 | intel_uncore_read_fw(uncore, wa->reg), | |
1022 | wal->name, "application"); | |
25d140fa TU |
1023 | } |
1024 | ||
5d75dc2b CW |
1025 | intel_uncore_forcewake_put__locked(uncore, fw); |
1026 | spin_unlock_irqrestore(&uncore->lock, flags); | |
25d140fa TU |
1027 | } |
1028 | ||
d10cfee4 | 1029 | void intel_gt_apply_workarounds(struct intel_gt *gt) |
25d140fa | 1030 | { |
d10cfee4 | 1031 | wa_list_apply(gt->uncore, >->i915->gt_wa_list); |
59b449d5 OM |
1032 | } |
1033 | ||
5d75dc2b | 1034 | static bool wa_list_verify(struct intel_uncore *uncore, |
094304be TU |
1035 | const struct i915_wa_list *wal, |
1036 | const char *from) | |
1037 | { | |
1038 | struct i915_wa *wa; | |
1039 | unsigned int i; | |
1040 | bool ok = true; | |
1041 | ||
1042 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) | |
5d75dc2b CW |
1043 | ok &= wa_verify(wa, |
1044 | intel_uncore_read(uncore, wa->reg), | |
1045 | wal->name, from); | |
094304be TU |
1046 | |
1047 | return ok; | |
1048 | } | |
1049 | ||
d10cfee4 | 1050 | bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) |
094304be | 1051 | { |
d10cfee4 | 1052 | return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); |
094304be TU |
1053 | } |
1054 | ||
1e2b7f49 JH |
1055 | static inline bool is_nonpriv_flags_valid(u32 flags) |
1056 | { | |
1057 | /* Check only valid flag bits are set */ | |
1058 | if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) | |
1059 | return false; | |
1060 | ||
1061 | /* NB: Only 3 out of 4 enum values are valid for access field */ | |
1062 | if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == | |
1063 | RING_FORCE_TO_NONPRIV_ACCESS_INVALID) | |
1064 | return false; | |
1065 | ||
1066 | return true; | |
1067 | } | |
1068 | ||
69bcdecf | 1069 | static void |
5380d0b7 | 1070 | whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) |
f4ecfbfc | 1071 | { |
69bcdecf TU |
1072 | struct i915_wa wa = { |
1073 | .reg = reg | |
1074 | }; | |
59b449d5 | 1075 | |
69bcdecf TU |
1076 | if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) |
1077 | return; | |
59b449d5 | 1078 | |
1e2b7f49 JH |
1079 | if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) |
1080 | return; | |
1081 | ||
5380d0b7 | 1082 | wa.reg.reg |= flags; |
452420d2 | 1083 | _wa_add(wal, &wa); |
59b449d5 OM |
1084 | } |
1085 | ||
5380d0b7 JH |
1086 | static void |
1087 | whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) | |
1088 | { | |
1e2b7f49 | 1089 | whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); |
5380d0b7 JH |
1090 | } |
1091 | ||
69bcdecf | 1092 | static void gen9_whitelist_build(struct i915_wa_list *w) |
59b449d5 | 1093 | { |
59b449d5 | 1094 | /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ |
f4ecfbfc | 1095 | whitelist_reg(w, GEN9_CTX_PREEMPT_REG); |
59b449d5 OM |
1096 | |
1097 | /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ | |
f4ecfbfc | 1098 | whitelist_reg(w, GEN8_CS_CHICKEN1); |
59b449d5 OM |
1099 | |
1100 | /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ | |
f4ecfbfc | 1101 | whitelist_reg(w, GEN8_HDC_CHICKEN1); |
0606259e KG |
1102 | |
1103 | /* WaSendPushConstantsFromMMIO:skl,bxt */ | |
1104 | whitelist_reg(w, COMMON_SLICE_CHICKEN2); | |
59b449d5 OM |
1105 | } |
1106 | ||
ebd2de47 | 1107 | static void skl_whitelist_build(struct intel_engine_cs *engine) |
59b449d5 | 1108 | { |
ebd2de47 JH |
1109 | struct i915_wa_list *w = &engine->whitelist; |
1110 | ||
1111 | if (engine->class != RENDER_CLASS) | |
1112 | return; | |
1113 | ||
f4ecfbfc | 1114 | gen9_whitelist_build(w); |
59b449d5 OM |
1115 | |
1116 | /* WaDisableLSQCROPERFforOCL:skl */ | |
f4ecfbfc | 1117 | whitelist_reg(w, GEN8_L3SQCREG4); |
7d3c425f OM |
1118 | } |
1119 | ||
ebd2de47 | 1120 | static void bxt_whitelist_build(struct intel_engine_cs *engine) |
7d3c425f | 1121 | { |
ebd2de47 JH |
1122 | if (engine->class != RENDER_CLASS) |
1123 | return; | |
1124 | ||
1125 | gen9_whitelist_build(&engine->whitelist); | |
59b449d5 OM |
1126 | } |
1127 | ||
ebd2de47 | 1128 | static void kbl_whitelist_build(struct intel_engine_cs *engine) |
59b449d5 | 1129 | { |
ebd2de47 JH |
1130 | struct i915_wa_list *w = &engine->whitelist; |
1131 | ||
1132 | if (engine->class != RENDER_CLASS) | |
1133 | return; | |
1134 | ||
f4ecfbfc | 1135 | gen9_whitelist_build(w); |
7d3c425f | 1136 | |
59b449d5 | 1137 | /* WaDisableLSQCROPERFforOCL:kbl */ |
f4ecfbfc | 1138 | whitelist_reg(w, GEN8_L3SQCREG4); |
7d3c425f OM |
1139 | } |
1140 | ||
ebd2de47 | 1141 | static void glk_whitelist_build(struct intel_engine_cs *engine) |
7d3c425f | 1142 | { |
ebd2de47 JH |
1143 | struct i915_wa_list *w = &engine->whitelist; |
1144 | ||
1145 | if (engine->class != RENDER_CLASS) | |
1146 | return; | |
1147 | ||
f4ecfbfc | 1148 | gen9_whitelist_build(w); |
7d3c425f | 1149 | |
59b449d5 | 1150 | /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ |
f4ecfbfc | 1151 | whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); |
59b449d5 | 1152 | } |
7d3c425f | 1153 | |
ebd2de47 | 1154 | static void cfl_whitelist_build(struct intel_engine_cs *engine) |
59b449d5 | 1155 | { |
2c903da5 LL |
1156 | struct i915_wa_list *w = &engine->whitelist; |
1157 | ||
ebd2de47 JH |
1158 | if (engine->class != RENDER_CLASS) |
1159 | return; | |
1160 | ||
2c903da5 LL |
1161 | gen9_whitelist_build(w); |
1162 | ||
1163 | /* | |
1164 | * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml | |
1165 | * | |
1166 | * This covers 4 register which are next to one another : | |
1167 | * - PS_INVOCATION_COUNT | |
1168 | * - PS_INVOCATION_COUNT_UDW | |
1169 | * - PS_DEPTH_COUNT | |
1170 | * - PS_DEPTH_COUNT_UDW | |
1171 | */ | |
1172 | whitelist_reg_ext(w, PS_INVOCATION_COUNT, | |
1e2b7f49 | 1173 | RING_FORCE_TO_NONPRIV_ACCESS_RD | |
2c903da5 | 1174 | RING_FORCE_TO_NONPRIV_RANGE_4); |
59b449d5 OM |
1175 | } |
1176 | ||
ebd2de47 | 1177 | static void cnl_whitelist_build(struct intel_engine_cs *engine) |
59b449d5 | 1178 | { |
ebd2de47 JH |
1179 | struct i915_wa_list *w = &engine->whitelist; |
1180 | ||
1181 | if (engine->class != RENDER_CLASS) | |
1182 | return; | |
1183 | ||
59b449d5 | 1184 | /* WaEnablePreemptionGranularityControlByUMD:cnl */ |
f4ecfbfc CW |
1185 | whitelist_reg(w, GEN8_CS_CHICKEN1); |
1186 | } | |
1187 | ||
ebd2de47 | 1188 | static void icl_whitelist_build(struct intel_engine_cs *engine) |
cc38cae7 | 1189 | { |
ebd2de47 JH |
1190 | struct i915_wa_list *w = &engine->whitelist; |
1191 | ||
7b3d4063 JH |
1192 | switch (engine->class) { |
1193 | case RENDER_CLASS: | |
1194 | /* WaAllowUMDToModifyHalfSliceChicken7:icl */ | |
1195 | whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); | |
1196 | ||
1197 | /* WaAllowUMDToModifySamplerMode:icl */ | |
1198 | whitelist_reg(w, GEN10_SAMPLER_MODE); | |
1199 | ||
1200 | /* WaEnableStateCacheRedirectToCS:icl */ | |
1201 | whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); | |
3fe0107e LL |
1202 | |
1203 | /* | |
1204 | * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl | |
1205 | * | |
1206 | * This covers 4 register which are next to one another : | |
1207 | * - PS_INVOCATION_COUNT | |
1208 | * - PS_INVOCATION_COUNT_UDW | |
1209 | * - PS_DEPTH_COUNT | |
1210 | * - PS_DEPTH_COUNT_UDW | |
1211 | */ | |
1212 | whitelist_reg_ext(w, PS_INVOCATION_COUNT, | |
1e2b7f49 | 1213 | RING_FORCE_TO_NONPRIV_ACCESS_RD | |
3fe0107e | 1214 | RING_FORCE_TO_NONPRIV_RANGE_4); |
7b3d4063 JH |
1215 | break; |
1216 | ||
1217 | case VIDEO_DECODE_CLASS: | |
1218 | /* hucStatusRegOffset */ | |
1219 | whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), | |
1e2b7f49 | 1220 | RING_FORCE_TO_NONPRIV_ACCESS_RD); |
7b3d4063 JH |
1221 | /* hucUKernelHdrInfoRegOffset */ |
1222 | whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), | |
1e2b7f49 | 1223 | RING_FORCE_TO_NONPRIV_ACCESS_RD); |
7b3d4063 JH |
1224 | /* hucStatus2RegOffset */ |
1225 | whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), | |
1e2b7f49 | 1226 | RING_FORCE_TO_NONPRIV_ACCESS_RD); |
7b3d4063 JH |
1227 | break; |
1228 | ||
1229 | default: | |
1230 | break; | |
1231 | } | |
cc38cae7 OM |
1232 | } |
1233 | ||
13e53c5c LDM |
1234 | static void tgl_whitelist_build(struct intel_engine_cs *engine) |
1235 | { | |
772d1dea TP |
1236 | struct i915_wa_list *w = &engine->whitelist; |
1237 | ||
1238 | switch (engine->class) { | |
1239 | case RENDER_CLASS: | |
1240 | /* | |
1241 | * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl | |
1242 | * | |
1243 | * This covers 4 registers which are next to one another : | |
1244 | * - PS_INVOCATION_COUNT | |
1245 | * - PS_INVOCATION_COUNT_UDW | |
1246 | * - PS_DEPTH_COUNT | |
1247 | * - PS_DEPTH_COUNT_UDW | |
1248 | */ | |
1249 | whitelist_reg_ext(w, PS_INVOCATION_COUNT, | |
1250 | RING_FORCE_TO_NONPRIV_ACCESS_RD | | |
1251 | RING_FORCE_TO_NONPRIV_RANGE_4); | |
1252 | break; | |
1253 | default: | |
1254 | break; | |
1255 | } | |
13e53c5c LDM |
1256 | } |
1257 | ||
69bcdecf | 1258 | void intel_engine_init_whitelist(struct intel_engine_cs *engine) |
f4ecfbfc CW |
1259 | { |
1260 | struct drm_i915_private *i915 = engine->i915; | |
69bcdecf | 1261 | struct i915_wa_list *w = &engine->whitelist; |
f4ecfbfc | 1262 | |
3e1f0a51 | 1263 | wa_init_start(w, "whitelist", engine->name); |
f4ecfbfc | 1264 | |
13e53c5c LDM |
1265 | if (IS_GEN(i915, 12)) |
1266 | tgl_whitelist_build(engine); | |
1267 | else if (IS_GEN(i915, 11)) | |
ebd2de47 | 1268 | icl_whitelist_build(engine); |
f4ecfbfc | 1269 | else if (IS_CANNONLAKE(i915)) |
ebd2de47 | 1270 | cnl_whitelist_build(engine); |
993298af | 1271 | else if (IS_COFFEELAKE(i915)) |
ebd2de47 | 1272 | cfl_whitelist_build(engine); |
993298af | 1273 | else if (IS_GEMINILAKE(i915)) |
ebd2de47 | 1274 | glk_whitelist_build(engine); |
993298af | 1275 | else if (IS_KABYLAKE(i915)) |
ebd2de47 | 1276 | kbl_whitelist_build(engine); |
993298af | 1277 | else if (IS_BROXTON(i915)) |
ebd2de47 | 1278 | bxt_whitelist_build(engine); |
993298af | 1279 | else if (IS_SKYLAKE(i915)) |
ebd2de47 | 1280 | skl_whitelist_build(engine); |
993298af RV |
1281 | else if (INTEL_GEN(i915) <= 8) |
1282 | return; | |
f4ecfbfc CW |
1283 | else |
1284 | MISSING_CASE(INTEL_GEN(i915)); | |
7d3c425f | 1285 | |
69bcdecf | 1286 | wa_init_finish(w); |
7d3c425f OM |
1287 | } |
1288 | ||
69bcdecf | 1289 | void intel_engine_apply_whitelist(struct intel_engine_cs *engine) |
7d3c425f | 1290 | { |
69bcdecf | 1291 | const struct i915_wa_list *wal = &engine->whitelist; |
5d75dc2b | 1292 | struct intel_uncore *uncore = engine->uncore; |
f4ecfbfc | 1293 | const u32 base = engine->mmio_base; |
69bcdecf | 1294 | struct i915_wa *wa; |
f4ecfbfc CW |
1295 | unsigned int i; |
1296 | ||
69bcdecf | 1297 | if (!wal->count) |
f4ecfbfc | 1298 | return; |
7d3c425f | 1299 | |
69bcdecf | 1300 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) |
5d75dc2b CW |
1301 | intel_uncore_write(uncore, |
1302 | RING_FORCE_TO_NONPRIV(base, i), | |
1303 | i915_mmio_reg_offset(wa->reg)); | |
7d3c425f | 1304 | |
f4ecfbfc CW |
1305 | /* And clear the rest just in case of garbage */ |
1306 | for (; i < RING_MAX_NONPRIV_SLOTS; i++) | |
5d75dc2b CW |
1307 | intel_uncore_write(uncore, |
1308 | RING_FORCE_TO_NONPRIV(base, i), | |
1309 | i915_mmio_reg_offset(RING_NOPID(base))); | |
f4ecfbfc CW |
1310 | } |
1311 | ||
f663b0ca DCS |
1312 | static void |
1313 | rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) | |
4a15c75c TU |
1314 | { |
1315 | struct drm_i915_private *i915 = engine->i915; | |
4a15c75c | 1316 | |
99739f94 MK |
1317 | if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { |
1318 | /* Wa_1606700617:tgl */ | |
1319 | wa_masked_en(wal, | |
1320 | GEN9_CS_DEBUG_MODE1, | |
1321 | FF_DOP_CLOCK_GATE_DISABLE); | |
79bfa607 MK |
1322 | |
1323 | /* Wa_1607138336:tgl */ | |
1324 | wa_write_or(wal, | |
1325 | GEN9_CTX_PREEMPT_REG, | |
1326 | GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); | |
99db8c59 MK |
1327 | |
1328 | /* Wa_1607030317:tgl */ | |
1329 | /* Wa_1607186500:tgl */ | |
1330 | /* Wa_1607297627:tgl */ | |
1331 | wa_masked_en(wal, | |
1332 | GEN6_RC_SLEEP_PSMI_CONTROL, | |
1333 | GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | | |
1334 | GEN8_RC_SEMA_IDLE_MSG_DISABLE); | |
5ba2bb58 RS |
1335 | |
1336 | /* | |
1337 | * Wa_1606679103:tgl | |
1338 | * (see also Wa_1606682166:icl) | |
1339 | */ | |
1340 | wa_write_or(wal, | |
1341 | GEN7_SARCHKMD, | |
1342 | GEN7_DISABLE_SAMPLER_PREFETCH); | |
99739f94 MK |
1343 | } |
1344 | ||
39564ae8 | 1345 | if (IS_GEN(i915, 11)) { |
4a15c75c TU |
1346 | /* This is not an Wa. Enable for better image quality */ |
1347 | wa_masked_en(wal, | |
1348 | _3D_CHICKEN3, | |
1349 | _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); | |
1350 | ||
1351 | /* WaPipelineFlushCoherentLines:icl */ | |
935ba6f3 TU |
1352 | wa_write_or(wal, |
1353 | GEN8_L3SQCREG4, | |
1354 | GEN8_LQSC_FLUSH_COHERENT_LINES); | |
4a15c75c TU |
1355 | |
1356 | /* | |
1357 | * Wa_1405543622:icl | |
1358 | * Formerly known as WaGAPZPriorityScheme | |
1359 | */ | |
1360 | wa_write_or(wal, | |
1361 | GEN8_GARBCNTL, | |
1362 | GEN11_ARBITRATION_PRIO_ORDER_MASK); | |
1363 | ||
1364 | /* | |
1365 | * Wa_1604223664:icl | |
1366 | * Formerly known as WaL3BankAddressHashing | |
1367 | */ | |
1368 | wa_write_masked_or(wal, | |
1369 | GEN8_GARBCNTL, | |
1370 | GEN11_HASH_CTRL_EXCL_MASK, | |
1371 | GEN11_HASH_CTRL_EXCL_BIT0); | |
1372 | wa_write_masked_or(wal, | |
1373 | GEN11_GLBLINVL, | |
1374 | GEN11_BANK_HASH_ADDR_EXCL_MASK, | |
1375 | GEN11_BANK_HASH_ADDR_EXCL_BIT0); | |
1376 | ||
1377 | /* | |
1378 | * Wa_1405733216:icl | |
1379 | * Formerly known as WaDisableCleanEvicts | |
1380 | */ | |
935ba6f3 TU |
1381 | wa_write_or(wal, |
1382 | GEN8_L3SQCREG4, | |
1383 | GEN11_LQSC_CLEAN_EVICT_DISABLE); | |
4a15c75c TU |
1384 | |
1385 | /* WaForwardProgressSoftReset:icl */ | |
1386 | wa_write_or(wal, | |
1387 | GEN10_SCRATCH_LNCF2, | |
1388 | PMFLUSHDONE_LNICRSDROP | | |
1389 | PMFLUSH_GAPL3UNBLOCK | | |
1390 | PMFLUSHDONE_LNEBLK); | |
1391 | ||
1392 | /* Wa_1406609255:icl (pre-prod) */ | |
1393 | if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) | |
1394 | wa_write_or(wal, | |
1395 | GEN7_SARCHKMD, | |
f9a39387 KG |
1396 | GEN7_DISABLE_DEMAND_PREFETCH); |
1397 | ||
1398 | /* Wa_1606682166:icl */ | |
1399 | wa_write_or(wal, | |
1400 | GEN7_SARCHKMD, | |
1401 | GEN7_DISABLE_SAMPLER_PREFETCH); | |
b83a309a TU |
1402 | |
1403 | /* Wa_1409178092:icl */ | |
1404 | wa_write_masked_or(wal, | |
1405 | GEN11_SCRATCH2, | |
1406 | GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, | |
1407 | 0); | |
4a15c75c TU |
1408 | } |
1409 | ||
d846325a MW |
1410 | if (IS_GEN_RANGE(i915, 9, 11)) { |
1411 | /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ | |
4a15c75c TU |
1412 | wa_masked_en(wal, |
1413 | GEN7_FF_SLICE_CS_CHICKEN1, | |
1414 | GEN9_FFSC_PERCTX_PREEMPT_CTRL); | |
1415 | } | |
1416 | ||
1417 | if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { | |
1418 | /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ | |
1419 | wa_write_or(wal, | |
1420 | GEN8_GARBCNTL, | |
1421 | GEN9_GAPS_TSV_CREDIT_DISABLE); | |
1422 | } | |
1423 | ||
1424 | if (IS_BROXTON(i915)) { | |
1425 | /* WaDisablePooledEuLoadBalancingFix:bxt */ | |
1426 | wa_masked_en(wal, | |
1427 | FF_SLICE_CS_CHICKEN2, | |
1428 | GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); | |
1429 | } | |
1430 | ||
cf819eff | 1431 | if (IS_GEN(i915, 9)) { |
4a15c75c TU |
1432 | /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ |
1433 | wa_masked_en(wal, | |
1434 | GEN9_CSFE_CHICKEN1_RCS, | |
1435 | GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); | |
1436 | ||
1437 | /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ | |
1438 | wa_write_or(wal, | |
1439 | BDW_SCRATCH1, | |
1440 | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); | |
1441 | ||
1442 | /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ | |
1443 | if (IS_GEN9_LP(i915)) | |
1444 | wa_write_masked_or(wal, | |
1445 | GEN8_L3SQCREG1, | |
1446 | L3_PRIO_CREDITS_MASK, | |
1447 | L3_GENERAL_PRIO_CREDITS(62) | | |
1448 | L3_HIGH_PRIO_CREDITS(2)); | |
1449 | ||
1450 | /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ | |
1451 | wa_write_or(wal, | |
1452 | GEN8_L3SQCREG4, | |
1453 | GEN8_LQSC_FLUSH_COHERENT_LINES); | |
1454 | } | |
1455 | } | |
1456 | ||
f663b0ca DCS |
1457 | static void |
1458 | xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) | |
4a15c75c TU |
1459 | { |
1460 | struct drm_i915_private *i915 = engine->i915; | |
4a15c75c TU |
1461 | |
1462 | /* WaKBLVECSSemaphoreWaitPoll:kbl */ | |
1463 | if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { | |
1464 | wa_write(wal, | |
1465 | RING_SEMA_WAIT_POLL(engine->mmio_base), | |
1466 | 1); | |
1467 | } | |
1468 | } | |
1469 | ||
f663b0ca DCS |
1470 | static void |
1471 | engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) | |
1472 | { | |
1473 | if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) | |
1474 | return; | |
1475 | ||
7c6d6867 | 1476 | if (engine->class == RENDER_CLASS) |
f663b0ca DCS |
1477 | rcs_engine_wa_init(engine, wal); |
1478 | else | |
1479 | xcs_engine_wa_init(engine, wal); | |
1480 | } | |
1481 | ||
4a15c75c TU |
1482 | void intel_engine_init_workarounds(struct intel_engine_cs *engine) |
1483 | { | |
1484 | struct i915_wa_list *wal = &engine->wa_list; | |
1485 | ||
ab9e2f77 | 1486 | if (INTEL_GEN(engine->i915) < 8) |
4a15c75c TU |
1487 | return; |
1488 | ||
3e1f0a51 | 1489 | wa_init_start(wal, "engine", engine->name); |
f663b0ca | 1490 | engine_init_workarounds(engine, wal); |
4a15c75c TU |
1491 | wa_init_finish(wal); |
1492 | } | |
1493 | ||
1494 | void intel_engine_apply_workarounds(struct intel_engine_cs *engine) | |
1495 | { | |
5d75dc2b | 1496 | wa_list_apply(engine->uncore, &engine->wa_list); |
4a15c75c TU |
1497 | } |
1498 | ||
254e1186 CW |
1499 | static struct i915_vma * |
1500 | create_scratch(struct i915_address_space *vm, int count) | |
1501 | { | |
1502 | struct drm_i915_gem_object *obj; | |
1503 | struct i915_vma *vma; | |
1504 | unsigned int size; | |
1505 | int err; | |
1506 | ||
1507 | size = round_up(count * sizeof(u32), PAGE_SIZE); | |
1508 | obj = i915_gem_object_create_internal(vm->i915, size); | |
1509 | if (IS_ERR(obj)) | |
1510 | return ERR_CAST(obj); | |
1511 | ||
1512 | i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); | |
1513 | ||
1514 | vma = i915_vma_instance(obj, vm, NULL); | |
1515 | if (IS_ERR(vma)) { | |
1516 | err = PTR_ERR(vma); | |
1517 | goto err_obj; | |
1518 | } | |
1519 | ||
1520 | err = i915_vma_pin(vma, 0, 0, | |
1521 | i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); | |
1522 | if (err) | |
1523 | goto err_obj; | |
1524 | ||
1525 | return vma; | |
1526 | ||
1527 | err_obj: | |
1528 | i915_gem_object_put(obj); | |
1529 | return ERR_PTR(err); | |
1530 | } | |
1531 | ||
fe8b7085 MR |
1532 | static const struct { |
1533 | u32 start; | |
1534 | u32 end; | |
1535 | } mcr_ranges_gen8[] = { | |
1536 | { .start = 0x5500, .end = 0x55ff }, | |
1537 | { .start = 0x7000, .end = 0x7fff }, | |
1538 | { .start = 0x9400, .end = 0x97ff }, | |
1539 | { .start = 0xb000, .end = 0xb3ff }, | |
1540 | { .start = 0xe000, .end = 0xe7ff }, | |
1541 | {}, | |
1542 | }; | |
1543 | ||
fa380486 TU |
1544 | static bool mcr_range(struct drm_i915_private *i915, u32 offset) |
1545 | { | |
fe8b7085 MR |
1546 | int i; |
1547 | ||
1548 | if (INTEL_GEN(i915) < 8) | |
1549 | return false; | |
1550 | ||
fa380486 | 1551 | /* |
fe8b7085 | 1552 | * Registers in these ranges are affected by the MCR selector |
fa380486 TU |
1553 | * which only controls CPU initiated MMIO. Routing does not |
1554 | * work for CS access so we cannot verify them on this path. | |
1555 | */ | |
fe8b7085 MR |
1556 | for (i = 0; mcr_ranges_gen8[i].start; i++) |
1557 | if (offset >= mcr_ranges_gen8[i].start && | |
1558 | offset <= mcr_ranges_gen8[i].end) | |
1559 | return true; | |
fa380486 TU |
1560 | |
1561 | return false; | |
1562 | } | |
1563 | ||
254e1186 CW |
1564 | static int |
1565 | wa_list_srm(struct i915_request *rq, | |
1566 | const struct i915_wa_list *wal, | |
1567 | struct i915_vma *vma) | |
1568 | { | |
fa380486 TU |
1569 | struct drm_i915_private *i915 = rq->i915; |
1570 | unsigned int i, count = 0; | |
254e1186 | 1571 | const struct i915_wa *wa; |
254e1186 CW |
1572 | u32 srm, *cs; |
1573 | ||
1574 | srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; | |
fa380486 | 1575 | if (INTEL_GEN(i915) >= 8) |
254e1186 CW |
1576 | srm++; |
1577 | ||
fa380486 TU |
1578 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { |
1579 | if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg))) | |
1580 | count++; | |
1581 | } | |
1582 | ||
1583 | cs = intel_ring_begin(rq, 4 * count); | |
254e1186 CW |
1584 | if (IS_ERR(cs)) |
1585 | return PTR_ERR(cs); | |
1586 | ||
1587 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { | |
fa380486 TU |
1588 | u32 offset = i915_mmio_reg_offset(wa->reg); |
1589 | ||
1590 | if (mcr_range(i915, offset)) | |
1591 | continue; | |
1592 | ||
254e1186 | 1593 | *cs++ = srm; |
fa380486 | 1594 | *cs++ = offset; |
254e1186 CW |
1595 | *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; |
1596 | *cs++ = 0; | |
1597 | } | |
1598 | intel_ring_advance(rq, cs); | |
1599 | ||
1600 | return 0; | |
1601 | } | |
1602 | ||
fde93886 | 1603 | static int engine_wa_list_verify(struct intel_context *ce, |
254e1186 CW |
1604 | const struct i915_wa_list * const wal, |
1605 | const char *from) | |
1606 | { | |
1607 | const struct i915_wa *wa; | |
1608 | struct i915_request *rq; | |
1609 | struct i915_vma *vma; | |
1610 | unsigned int i; | |
1611 | u32 *results; | |
1612 | int err; | |
1613 | ||
1614 | if (!wal->count) | |
1615 | return 0; | |
1616 | ||
ba4134a4 | 1617 | vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); |
254e1186 CW |
1618 | if (IS_ERR(vma)) |
1619 | return PTR_ERR(vma); | |
1620 | ||
de5825be | 1621 | intel_engine_pm_get(ce->engine); |
fde93886 | 1622 | rq = intel_context_create_request(ce); |
de5825be | 1623 | intel_engine_pm_put(ce->engine); |
254e1186 CW |
1624 | if (IS_ERR(rq)) { |
1625 | err = PTR_ERR(rq); | |
1626 | goto err_vma; | |
1627 | } | |
1628 | ||
1629 | err = wa_list_srm(rq, wal, vma); | |
1630 | if (err) | |
1631 | goto err_vma; | |
1632 | ||
090a82e9 | 1633 | i915_request_get(rq); |
254e1186 | 1634 | i915_request_add(rq); |
2f530945 | 1635 | if (i915_request_wait(rq, 0, HZ / 5) < 0) { |
254e1186 | 1636 | err = -ETIME; |
090a82e9 | 1637 | goto err_rq; |
254e1186 CW |
1638 | } |
1639 | ||
1640 | results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); | |
1641 | if (IS_ERR(results)) { | |
1642 | err = PTR_ERR(results); | |
090a82e9 | 1643 | goto err_rq; |
254e1186 CW |
1644 | } |
1645 | ||
1646 | err = 0; | |
fa380486 TU |
1647 | for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { |
1648 | if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) | |
1649 | continue; | |
1650 | ||
254e1186 CW |
1651 | if (!wa_verify(wa, results[i], wal->name, from)) |
1652 | err = -ENXIO; | |
fa380486 | 1653 | } |
254e1186 CW |
1654 | |
1655 | i915_gem_object_unpin_map(vma->obj); | |
1656 | ||
090a82e9 CW |
1657 | err_rq: |
1658 | i915_request_put(rq); | |
254e1186 CW |
1659 | err_vma: |
1660 | i915_vma_unpin(vma); | |
1661 | i915_vma_put(vma); | |
1662 | return err; | |
1663 | } | |
1664 | ||
1665 | int intel_engine_verify_workarounds(struct intel_engine_cs *engine, | |
1666 | const char *from) | |
1667 | { | |
fde93886 TU |
1668 | return engine_wa_list_verify(engine->kernel_context, |
1669 | &engine->wa_list, | |
1670 | from); | |
254e1186 CW |
1671 | } |
1672 | ||
f4ecfbfc | 1673 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
112ed2d3 | 1674 | #include "selftest_workarounds.c" |
f4ecfbfc | 1675 | #endif |