]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/gpu/drm/radeon/cik.c
5e0a41a8e7930544ced2b968ce1faffbf56a9a9b
[mirror_ubuntu-artful-kernel.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
67 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
68 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
69 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
70 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
71 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
72
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
78 extern void sumo_rlc_fini(struct radeon_device *rdev);
79 extern int sumo_rlc_init(struct radeon_device *rdev);
80 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
81 extern void si_rlc_reset(struct radeon_device *rdev);
82 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
83 extern int cik_sdma_resume(struct radeon_device *rdev);
84 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
85 extern void cik_sdma_fini(struct radeon_device *rdev);
86 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
87 static void cik_rlc_stop(struct radeon_device *rdev);
88 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
89 static void cik_program_aspm(struct radeon_device *rdev);
90 static void cik_init_pg(struct radeon_device *rdev);
91 static void cik_init_cg(struct radeon_device *rdev);
92 static void cik_fini_pg(struct radeon_device *rdev);
93 static void cik_fini_cg(struct radeon_device *rdev);
94 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
95 bool enable);
96
97 /* get temperature in millidegrees */
98 int ci_get_temp(struct radeon_device *rdev)
99 {
100 u32 temp;
101 int actual_temp = 0;
102
103 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
104 CTF_TEMP_SHIFT;
105
106 if (temp & 0x200)
107 actual_temp = 255;
108 else
109 actual_temp = temp & 0x1ff;
110
111 actual_temp = actual_temp * 1000;
112
113 return actual_temp;
114 }
115
116 /* get temperature in millidegrees */
117 int kv_get_temp(struct radeon_device *rdev)
118 {
119 u32 temp;
120 int actual_temp = 0;
121
122 temp = RREG32_SMC(0xC0300E0C);
123
124 if (temp)
125 actual_temp = (temp / 8) - 49;
126 else
127 actual_temp = 0;
128
129 actual_temp = actual_temp * 1000;
130
131 return actual_temp;
132 }
133
134 /*
135 * Indirect registers accessor
136 */
137 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
138 {
139 unsigned long flags;
140 u32 r;
141
142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 WREG32(PCIE_INDEX, reg);
144 (void)RREG32(PCIE_INDEX);
145 r = RREG32(PCIE_DATA);
146 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
147 return r;
148 }
149
150 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
151 {
152 unsigned long flags;
153
154 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
155 WREG32(PCIE_INDEX, reg);
156 (void)RREG32(PCIE_INDEX);
157 WREG32(PCIE_DATA, v);
158 (void)RREG32(PCIE_DATA);
159 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
160 }
161
162 static const u32 spectre_rlc_save_restore_register_list[] =
163 {
164 (0x0e00 << 16) | (0xc12c >> 2),
165 0x00000000,
166 (0x0e00 << 16) | (0xc140 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0xc150 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc15c >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0xc168 >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0xc170 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0xc178 >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0xc204 >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0xc2b4 >> 2),
181 0x00000000,
182 (0x0e00 << 16) | (0xc2b8 >> 2),
183 0x00000000,
184 (0x0e00 << 16) | (0xc2bc >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0xc2c0 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0x8228 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0x829c >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0x869c >> 2),
193 0x00000000,
194 (0x0600 << 16) | (0x98f4 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0x98f8 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0x9900 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc260 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0x90e8 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0x3c000 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0x3c00c >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0x8c1c >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0x9700 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xcd20 >> 2),
213 0x00000000,
214 (0x4e00 << 16) | (0xcd20 >> 2),
215 0x00000000,
216 (0x5e00 << 16) | (0xcd20 >> 2),
217 0x00000000,
218 (0x6e00 << 16) | (0xcd20 >> 2),
219 0x00000000,
220 (0x7e00 << 16) | (0xcd20 >> 2),
221 0x00000000,
222 (0x8e00 << 16) | (0xcd20 >> 2),
223 0x00000000,
224 (0x9e00 << 16) | (0xcd20 >> 2),
225 0x00000000,
226 (0xae00 << 16) | (0xcd20 >> 2),
227 0x00000000,
228 (0xbe00 << 16) | (0xcd20 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0x89bc >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0x8900 >> 2),
233 0x00000000,
234 0x3,
235 (0x0e00 << 16) | (0xc130 >> 2),
236 0x00000000,
237 (0x0e00 << 16) | (0xc134 >> 2),
238 0x00000000,
239 (0x0e00 << 16) | (0xc1fc >> 2),
240 0x00000000,
241 (0x0e00 << 16) | (0xc208 >> 2),
242 0x00000000,
243 (0x0e00 << 16) | (0xc264 >> 2),
244 0x00000000,
245 (0x0e00 << 16) | (0xc268 >> 2),
246 0x00000000,
247 (0x0e00 << 16) | (0xc26c >> 2),
248 0x00000000,
249 (0x0e00 << 16) | (0xc270 >> 2),
250 0x00000000,
251 (0x0e00 << 16) | (0xc274 >> 2),
252 0x00000000,
253 (0x0e00 << 16) | (0xc278 >> 2),
254 0x00000000,
255 (0x0e00 << 16) | (0xc27c >> 2),
256 0x00000000,
257 (0x0e00 << 16) | (0xc280 >> 2),
258 0x00000000,
259 (0x0e00 << 16) | (0xc284 >> 2),
260 0x00000000,
261 (0x0e00 << 16) | (0xc288 >> 2),
262 0x00000000,
263 (0x0e00 << 16) | (0xc28c >> 2),
264 0x00000000,
265 (0x0e00 << 16) | (0xc290 >> 2),
266 0x00000000,
267 (0x0e00 << 16) | (0xc294 >> 2),
268 0x00000000,
269 (0x0e00 << 16) | (0xc298 >> 2),
270 0x00000000,
271 (0x0e00 << 16) | (0xc29c >> 2),
272 0x00000000,
273 (0x0e00 << 16) | (0xc2a0 >> 2),
274 0x00000000,
275 (0x0e00 << 16) | (0xc2a4 >> 2),
276 0x00000000,
277 (0x0e00 << 16) | (0xc2a8 >> 2),
278 0x00000000,
279 (0x0e00 << 16) | (0xc2ac >> 2),
280 0x00000000,
281 (0x0e00 << 16) | (0xc2b0 >> 2),
282 0x00000000,
283 (0x0e00 << 16) | (0x301d0 >> 2),
284 0x00000000,
285 (0x0e00 << 16) | (0x30238 >> 2),
286 0x00000000,
287 (0x0e00 << 16) | (0x30250 >> 2),
288 0x00000000,
289 (0x0e00 << 16) | (0x30254 >> 2),
290 0x00000000,
291 (0x0e00 << 16) | (0x30258 >> 2),
292 0x00000000,
293 (0x0e00 << 16) | (0x3025c >> 2),
294 0x00000000,
295 (0x4e00 << 16) | (0xc900 >> 2),
296 0x00000000,
297 (0x5e00 << 16) | (0xc900 >> 2),
298 0x00000000,
299 (0x6e00 << 16) | (0xc900 >> 2),
300 0x00000000,
301 (0x7e00 << 16) | (0xc900 >> 2),
302 0x00000000,
303 (0x8e00 << 16) | (0xc900 >> 2),
304 0x00000000,
305 (0x9e00 << 16) | (0xc900 >> 2),
306 0x00000000,
307 (0xae00 << 16) | (0xc900 >> 2),
308 0x00000000,
309 (0xbe00 << 16) | (0xc900 >> 2),
310 0x00000000,
311 (0x4e00 << 16) | (0xc904 >> 2),
312 0x00000000,
313 (0x5e00 << 16) | (0xc904 >> 2),
314 0x00000000,
315 (0x6e00 << 16) | (0xc904 >> 2),
316 0x00000000,
317 (0x7e00 << 16) | (0xc904 >> 2),
318 0x00000000,
319 (0x8e00 << 16) | (0xc904 >> 2),
320 0x00000000,
321 (0x9e00 << 16) | (0xc904 >> 2),
322 0x00000000,
323 (0xae00 << 16) | (0xc904 >> 2),
324 0x00000000,
325 (0xbe00 << 16) | (0xc904 >> 2),
326 0x00000000,
327 (0x4e00 << 16) | (0xc908 >> 2),
328 0x00000000,
329 (0x5e00 << 16) | (0xc908 >> 2),
330 0x00000000,
331 (0x6e00 << 16) | (0xc908 >> 2),
332 0x00000000,
333 (0x7e00 << 16) | (0xc908 >> 2),
334 0x00000000,
335 (0x8e00 << 16) | (0xc908 >> 2),
336 0x00000000,
337 (0x9e00 << 16) | (0xc908 >> 2),
338 0x00000000,
339 (0xae00 << 16) | (0xc908 >> 2),
340 0x00000000,
341 (0xbe00 << 16) | (0xc908 >> 2),
342 0x00000000,
343 (0x4e00 << 16) | (0xc90c >> 2),
344 0x00000000,
345 (0x5e00 << 16) | (0xc90c >> 2),
346 0x00000000,
347 (0x6e00 << 16) | (0xc90c >> 2),
348 0x00000000,
349 (0x7e00 << 16) | (0xc90c >> 2),
350 0x00000000,
351 (0x8e00 << 16) | (0xc90c >> 2),
352 0x00000000,
353 (0x9e00 << 16) | (0xc90c >> 2),
354 0x00000000,
355 (0xae00 << 16) | (0xc90c >> 2),
356 0x00000000,
357 (0xbe00 << 16) | (0xc90c >> 2),
358 0x00000000,
359 (0x4e00 << 16) | (0xc910 >> 2),
360 0x00000000,
361 (0x5e00 << 16) | (0xc910 >> 2),
362 0x00000000,
363 (0x6e00 << 16) | (0xc910 >> 2),
364 0x00000000,
365 (0x7e00 << 16) | (0xc910 >> 2),
366 0x00000000,
367 (0x8e00 << 16) | (0xc910 >> 2),
368 0x00000000,
369 (0x9e00 << 16) | (0xc910 >> 2),
370 0x00000000,
371 (0xae00 << 16) | (0xc910 >> 2),
372 0x00000000,
373 (0xbe00 << 16) | (0xc910 >> 2),
374 0x00000000,
375 (0x0e00 << 16) | (0xc99c >> 2),
376 0x00000000,
377 (0x0e00 << 16) | (0x9834 >> 2),
378 0x00000000,
379 (0x0000 << 16) | (0x30f00 >> 2),
380 0x00000000,
381 (0x0001 << 16) | (0x30f00 >> 2),
382 0x00000000,
383 (0x0000 << 16) | (0x30f04 >> 2),
384 0x00000000,
385 (0x0001 << 16) | (0x30f04 >> 2),
386 0x00000000,
387 (0x0000 << 16) | (0x30f08 >> 2),
388 0x00000000,
389 (0x0001 << 16) | (0x30f08 >> 2),
390 0x00000000,
391 (0x0000 << 16) | (0x30f0c >> 2),
392 0x00000000,
393 (0x0001 << 16) | (0x30f0c >> 2),
394 0x00000000,
395 (0x0600 << 16) | (0x9b7c >> 2),
396 0x00000000,
397 (0x0e00 << 16) | (0x8a14 >> 2),
398 0x00000000,
399 (0x0e00 << 16) | (0x8a18 >> 2),
400 0x00000000,
401 (0x0600 << 16) | (0x30a00 >> 2),
402 0x00000000,
403 (0x0e00 << 16) | (0x8bf0 >> 2),
404 0x00000000,
405 (0x0e00 << 16) | (0x8bcc >> 2),
406 0x00000000,
407 (0x0e00 << 16) | (0x8b24 >> 2),
408 0x00000000,
409 (0x0e00 << 16) | (0x30a04 >> 2),
410 0x00000000,
411 (0x0600 << 16) | (0x30a10 >> 2),
412 0x00000000,
413 (0x0600 << 16) | (0x30a14 >> 2),
414 0x00000000,
415 (0x0600 << 16) | (0x30a18 >> 2),
416 0x00000000,
417 (0x0600 << 16) | (0x30a2c >> 2),
418 0x00000000,
419 (0x0e00 << 16) | (0xc700 >> 2),
420 0x00000000,
421 (0x0e00 << 16) | (0xc704 >> 2),
422 0x00000000,
423 (0x0e00 << 16) | (0xc708 >> 2),
424 0x00000000,
425 (0x0e00 << 16) | (0xc768 >> 2),
426 0x00000000,
427 (0x0400 << 16) | (0xc770 >> 2),
428 0x00000000,
429 (0x0400 << 16) | (0xc774 >> 2),
430 0x00000000,
431 (0x0400 << 16) | (0xc778 >> 2),
432 0x00000000,
433 (0x0400 << 16) | (0xc77c >> 2),
434 0x00000000,
435 (0x0400 << 16) | (0xc780 >> 2),
436 0x00000000,
437 (0x0400 << 16) | (0xc784 >> 2),
438 0x00000000,
439 (0x0400 << 16) | (0xc788 >> 2),
440 0x00000000,
441 (0x0400 << 16) | (0xc78c >> 2),
442 0x00000000,
443 (0x0400 << 16) | (0xc798 >> 2),
444 0x00000000,
445 (0x0400 << 16) | (0xc79c >> 2),
446 0x00000000,
447 (0x0400 << 16) | (0xc7a0 >> 2),
448 0x00000000,
449 (0x0400 << 16) | (0xc7a4 >> 2),
450 0x00000000,
451 (0x0400 << 16) | (0xc7a8 >> 2),
452 0x00000000,
453 (0x0400 << 16) | (0xc7ac >> 2),
454 0x00000000,
455 (0x0400 << 16) | (0xc7b0 >> 2),
456 0x00000000,
457 (0x0400 << 16) | (0xc7b4 >> 2),
458 0x00000000,
459 (0x0e00 << 16) | (0x9100 >> 2),
460 0x00000000,
461 (0x0e00 << 16) | (0x3c010 >> 2),
462 0x00000000,
463 (0x0e00 << 16) | (0x92a8 >> 2),
464 0x00000000,
465 (0x0e00 << 16) | (0x92ac >> 2),
466 0x00000000,
467 (0x0e00 << 16) | (0x92b4 >> 2),
468 0x00000000,
469 (0x0e00 << 16) | (0x92b8 >> 2),
470 0x00000000,
471 (0x0e00 << 16) | (0x92bc >> 2),
472 0x00000000,
473 (0x0e00 << 16) | (0x92c0 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0x92c4 >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0x92c8 >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0x92cc >> 2),
480 0x00000000,
481 (0x0e00 << 16) | (0x92d0 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0x8c00 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0x8c04 >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0x8c20 >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0x8c38 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0x8c3c >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0xae00 >> 2),
494 0x00000000,
495 (0x0e00 << 16) | (0x9604 >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0xac08 >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0xac0c >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0xac10 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0xac14 >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0xac58 >> 2),
506 0x00000000,
507 (0x0e00 << 16) | (0xac68 >> 2),
508 0x00000000,
509 (0x0e00 << 16) | (0xac6c >> 2),
510 0x00000000,
511 (0x0e00 << 16) | (0xac70 >> 2),
512 0x00000000,
513 (0x0e00 << 16) | (0xac74 >> 2),
514 0x00000000,
515 (0x0e00 << 16) | (0xac78 >> 2),
516 0x00000000,
517 (0x0e00 << 16) | (0xac7c >> 2),
518 0x00000000,
519 (0x0e00 << 16) | (0xac80 >> 2),
520 0x00000000,
521 (0x0e00 << 16) | (0xac84 >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0xac88 >> 2),
524 0x00000000,
525 (0x0e00 << 16) | (0xac8c >> 2),
526 0x00000000,
527 (0x0e00 << 16) | (0x970c >> 2),
528 0x00000000,
529 (0x0e00 << 16) | (0x9714 >> 2),
530 0x00000000,
531 (0x0e00 << 16) | (0x9718 >> 2),
532 0x00000000,
533 (0x0e00 << 16) | (0x971c >> 2),
534 0x00000000,
535 (0x0e00 << 16) | (0x31068 >> 2),
536 0x00000000,
537 (0x4e00 << 16) | (0x31068 >> 2),
538 0x00000000,
539 (0x5e00 << 16) | (0x31068 >> 2),
540 0x00000000,
541 (0x6e00 << 16) | (0x31068 >> 2),
542 0x00000000,
543 (0x7e00 << 16) | (0x31068 >> 2),
544 0x00000000,
545 (0x8e00 << 16) | (0x31068 >> 2),
546 0x00000000,
547 (0x9e00 << 16) | (0x31068 >> 2),
548 0x00000000,
549 (0xae00 << 16) | (0x31068 >> 2),
550 0x00000000,
551 (0xbe00 << 16) | (0x31068 >> 2),
552 0x00000000,
553 (0x0e00 << 16) | (0xcd10 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0xcd14 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x88b0 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x88b4 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x88b8 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x88bc >> 2),
564 0x00000000,
565 (0x0400 << 16) | (0x89c0 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x88c4 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x88c8 >> 2),
570 0x00000000,
571 (0x0e00 << 16) | (0x88d0 >> 2),
572 0x00000000,
573 (0x0e00 << 16) | (0x88d4 >> 2),
574 0x00000000,
575 (0x0e00 << 16) | (0x88d8 >> 2),
576 0x00000000,
577 (0x0e00 << 16) | (0x8980 >> 2),
578 0x00000000,
579 (0x0e00 << 16) | (0x30938 >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0x3093c >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0x30940 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0x89a0 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0x30900 >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0x30904 >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0x89b4 >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0x3c210 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0x3c214 >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0x3c218 >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0x8904 >> 2),
600 0x00000000,
601 0x5,
602 (0x0e00 << 16) | (0x8c28 >> 2),
603 (0x0e00 << 16) | (0x8c2c >> 2),
604 (0x0e00 << 16) | (0x8c30 >> 2),
605 (0x0e00 << 16) | (0x8c34 >> 2),
606 (0x0e00 << 16) | (0x9600 >> 2),
607 };
608
609 static const u32 kalindi_rlc_save_restore_register_list[] =
610 {
611 (0x0e00 << 16) | (0xc12c >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xc140 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc150 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xc15c >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xc168 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0xc170 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xc204 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc2b4 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc2b8 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc2bc >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc2c0 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0x8228 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0x829c >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x869c >> 2),
638 0x00000000,
639 (0x0600 << 16) | (0x98f4 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0x98f8 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x9900 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc260 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0x90e8 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0x3c000 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0x3c00c >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0x8c1c >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x9700 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0xcd20 >> 2),
658 0x00000000,
659 (0x4e00 << 16) | (0xcd20 >> 2),
660 0x00000000,
661 (0x5e00 << 16) | (0xcd20 >> 2),
662 0x00000000,
663 (0x6e00 << 16) | (0xcd20 >> 2),
664 0x00000000,
665 (0x7e00 << 16) | (0xcd20 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x89bc >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0x8900 >> 2),
670 0x00000000,
671 0x3,
672 (0x0e00 << 16) | (0xc130 >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0xc134 >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0xc1fc >> 2),
677 0x00000000,
678 (0x0e00 << 16) | (0xc208 >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0xc264 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0xc268 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0xc26c >> 2),
685 0x00000000,
686 (0x0e00 << 16) | (0xc270 >> 2),
687 0x00000000,
688 (0x0e00 << 16) | (0xc274 >> 2),
689 0x00000000,
690 (0x0e00 << 16) | (0xc28c >> 2),
691 0x00000000,
692 (0x0e00 << 16) | (0xc290 >> 2),
693 0x00000000,
694 (0x0e00 << 16) | (0xc294 >> 2),
695 0x00000000,
696 (0x0e00 << 16) | (0xc298 >> 2),
697 0x00000000,
698 (0x0e00 << 16) | (0xc2a0 >> 2),
699 0x00000000,
700 (0x0e00 << 16) | (0xc2a4 >> 2),
701 0x00000000,
702 (0x0e00 << 16) | (0xc2a8 >> 2),
703 0x00000000,
704 (0x0e00 << 16) | (0xc2ac >> 2),
705 0x00000000,
706 (0x0e00 << 16) | (0x301d0 >> 2),
707 0x00000000,
708 (0x0e00 << 16) | (0x30238 >> 2),
709 0x00000000,
710 (0x0e00 << 16) | (0x30250 >> 2),
711 0x00000000,
712 (0x0e00 << 16) | (0x30254 >> 2),
713 0x00000000,
714 (0x0e00 << 16) | (0x30258 >> 2),
715 0x00000000,
716 (0x0e00 << 16) | (0x3025c >> 2),
717 0x00000000,
718 (0x4e00 << 16) | (0xc900 >> 2),
719 0x00000000,
720 (0x5e00 << 16) | (0xc900 >> 2),
721 0x00000000,
722 (0x6e00 << 16) | (0xc900 >> 2),
723 0x00000000,
724 (0x7e00 << 16) | (0xc900 >> 2),
725 0x00000000,
726 (0x4e00 << 16) | (0xc904 >> 2),
727 0x00000000,
728 (0x5e00 << 16) | (0xc904 >> 2),
729 0x00000000,
730 (0x6e00 << 16) | (0xc904 >> 2),
731 0x00000000,
732 (0x7e00 << 16) | (0xc904 >> 2),
733 0x00000000,
734 (0x4e00 << 16) | (0xc908 >> 2),
735 0x00000000,
736 (0x5e00 << 16) | (0xc908 >> 2),
737 0x00000000,
738 (0x6e00 << 16) | (0xc908 >> 2),
739 0x00000000,
740 (0x7e00 << 16) | (0xc908 >> 2),
741 0x00000000,
742 (0x4e00 << 16) | (0xc90c >> 2),
743 0x00000000,
744 (0x5e00 << 16) | (0xc90c >> 2),
745 0x00000000,
746 (0x6e00 << 16) | (0xc90c >> 2),
747 0x00000000,
748 (0x7e00 << 16) | (0xc90c >> 2),
749 0x00000000,
750 (0x4e00 << 16) | (0xc910 >> 2),
751 0x00000000,
752 (0x5e00 << 16) | (0xc910 >> 2),
753 0x00000000,
754 (0x6e00 << 16) | (0xc910 >> 2),
755 0x00000000,
756 (0x7e00 << 16) | (0xc910 >> 2),
757 0x00000000,
758 (0x0e00 << 16) | (0xc99c >> 2),
759 0x00000000,
760 (0x0e00 << 16) | (0x9834 >> 2),
761 0x00000000,
762 (0x0000 << 16) | (0x30f00 >> 2),
763 0x00000000,
764 (0x0000 << 16) | (0x30f04 >> 2),
765 0x00000000,
766 (0x0000 << 16) | (0x30f08 >> 2),
767 0x00000000,
768 (0x0000 << 16) | (0x30f0c >> 2),
769 0x00000000,
770 (0x0600 << 16) | (0x9b7c >> 2),
771 0x00000000,
772 (0x0e00 << 16) | (0x8a14 >> 2),
773 0x00000000,
774 (0x0e00 << 16) | (0x8a18 >> 2),
775 0x00000000,
776 (0x0600 << 16) | (0x30a00 >> 2),
777 0x00000000,
778 (0x0e00 << 16) | (0x8bf0 >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0x8bcc >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0x8b24 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0x30a04 >> 2),
785 0x00000000,
786 (0x0600 << 16) | (0x30a10 >> 2),
787 0x00000000,
788 (0x0600 << 16) | (0x30a14 >> 2),
789 0x00000000,
790 (0x0600 << 16) | (0x30a18 >> 2),
791 0x00000000,
792 (0x0600 << 16) | (0x30a2c >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0xc700 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0xc704 >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0xc708 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0xc768 >> 2),
801 0x00000000,
802 (0x0400 << 16) | (0xc770 >> 2),
803 0x00000000,
804 (0x0400 << 16) | (0xc774 >> 2),
805 0x00000000,
806 (0x0400 << 16) | (0xc798 >> 2),
807 0x00000000,
808 (0x0400 << 16) | (0xc79c >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0x9100 >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0x3c010 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0x8c00 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0x8c04 >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0x8c20 >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0x8c38 >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0x8c3c >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0xae00 >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0x9604 >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0xac08 >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0xac0c >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0xac10 >> 2),
833 0x00000000,
834 (0x0e00 << 16) | (0xac14 >> 2),
835 0x00000000,
836 (0x0e00 << 16) | (0xac58 >> 2),
837 0x00000000,
838 (0x0e00 << 16) | (0xac68 >> 2),
839 0x00000000,
840 (0x0e00 << 16) | (0xac6c >> 2),
841 0x00000000,
842 (0x0e00 << 16) | (0xac70 >> 2),
843 0x00000000,
844 (0x0e00 << 16) | (0xac74 >> 2),
845 0x00000000,
846 (0x0e00 << 16) | (0xac78 >> 2),
847 0x00000000,
848 (0x0e00 << 16) | (0xac7c >> 2),
849 0x00000000,
850 (0x0e00 << 16) | (0xac80 >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0xac84 >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0xac88 >> 2),
855 0x00000000,
856 (0x0e00 << 16) | (0xac8c >> 2),
857 0x00000000,
858 (0x0e00 << 16) | (0x970c >> 2),
859 0x00000000,
860 (0x0e00 << 16) | (0x9714 >> 2),
861 0x00000000,
862 (0x0e00 << 16) | (0x9718 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0x971c >> 2),
865 0x00000000,
866 (0x0e00 << 16) | (0x31068 >> 2),
867 0x00000000,
868 (0x4e00 << 16) | (0x31068 >> 2),
869 0x00000000,
870 (0x5e00 << 16) | (0x31068 >> 2),
871 0x00000000,
872 (0x6e00 << 16) | (0x31068 >> 2),
873 0x00000000,
874 (0x7e00 << 16) | (0x31068 >> 2),
875 0x00000000,
876 (0x0e00 << 16) | (0xcd10 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0xcd14 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x88b0 >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x88b4 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x88b8 >> 2),
885 0x00000000,
886 (0x0e00 << 16) | (0x88bc >> 2),
887 0x00000000,
888 (0x0400 << 16) | (0x89c0 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x88c4 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x88c8 >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x88d0 >> 2),
895 0x00000000,
896 (0x0e00 << 16) | (0x88d4 >> 2),
897 0x00000000,
898 (0x0e00 << 16) | (0x88d8 >> 2),
899 0x00000000,
900 (0x0e00 << 16) | (0x8980 >> 2),
901 0x00000000,
902 (0x0e00 << 16) | (0x30938 >> 2),
903 0x00000000,
904 (0x0e00 << 16) | (0x3093c >> 2),
905 0x00000000,
906 (0x0e00 << 16) | (0x30940 >> 2),
907 0x00000000,
908 (0x0e00 << 16) | (0x89a0 >> 2),
909 0x00000000,
910 (0x0e00 << 16) | (0x30900 >> 2),
911 0x00000000,
912 (0x0e00 << 16) | (0x30904 >> 2),
913 0x00000000,
914 (0x0e00 << 16) | (0x89b4 >> 2),
915 0x00000000,
916 (0x0e00 << 16) | (0x3e1fc >> 2),
917 0x00000000,
918 (0x0e00 << 16) | (0x3c210 >> 2),
919 0x00000000,
920 (0x0e00 << 16) | (0x3c214 >> 2),
921 0x00000000,
922 (0x0e00 << 16) | (0x3c218 >> 2),
923 0x00000000,
924 (0x0e00 << 16) | (0x8904 >> 2),
925 0x00000000,
926 0x5,
927 (0x0e00 << 16) | (0x8c28 >> 2),
928 (0x0e00 << 16) | (0x8c2c >> 2),
929 (0x0e00 << 16) | (0x8c30 >> 2),
930 (0x0e00 << 16) | (0x8c34 >> 2),
931 (0x0e00 << 16) | (0x9600 >> 2),
932 };
933
934 static const u32 bonaire_golden_spm_registers[] =
935 {
936 0x30800, 0xe0ffffff, 0xe0000000
937 };
938
939 static const u32 bonaire_golden_common_registers[] =
940 {
941 0xc770, 0xffffffff, 0x00000800,
942 0xc774, 0xffffffff, 0x00000800,
943 0xc798, 0xffffffff, 0x00007fbf,
944 0xc79c, 0xffffffff, 0x00007faf
945 };
946
947 static const u32 bonaire_golden_registers[] =
948 {
949 0x3354, 0x00000333, 0x00000333,
950 0x3350, 0x000c0fc0, 0x00040200,
951 0x9a10, 0x00010000, 0x00058208,
952 0x3c000, 0xffff1fff, 0x00140000,
953 0x3c200, 0xfdfc0fff, 0x00000100,
954 0x3c234, 0x40000000, 0x40000200,
955 0x9830, 0xffffffff, 0x00000000,
956 0x9834, 0xf00fffff, 0x00000400,
957 0x9838, 0x0002021c, 0x00020200,
958 0xc78, 0x00000080, 0x00000000,
959 0x5bb0, 0x000000f0, 0x00000070,
960 0x5bc0, 0xf0311fff, 0x80300000,
961 0x98f8, 0x73773777, 0x12010001,
962 0x350c, 0x00810000, 0x408af000,
963 0x7030, 0x31000111, 0x00000011,
964 0x2f48, 0x73773777, 0x12010001,
965 0x220c, 0x00007fb6, 0x0021a1b1,
966 0x2210, 0x00007fb6, 0x002021b1,
967 0x2180, 0x00007fb6, 0x00002191,
968 0x2218, 0x00007fb6, 0x002121b1,
969 0x221c, 0x00007fb6, 0x002021b1,
970 0x21dc, 0x00007fb6, 0x00002191,
971 0x21e0, 0x00007fb6, 0x00002191,
972 0x3628, 0x0000003f, 0x0000000a,
973 0x362c, 0x0000003f, 0x0000000a,
974 0x2ae4, 0x00073ffe, 0x000022a2,
975 0x240c, 0x000007ff, 0x00000000,
976 0x8a14, 0xf000003f, 0x00000007,
977 0x8bf0, 0x00002001, 0x00000001,
978 0x8b24, 0xffffffff, 0x00ffffff,
979 0x30a04, 0x0000ff0f, 0x00000000,
980 0x28a4c, 0x07ffffff, 0x06000000,
981 0x4d8, 0x00000fff, 0x00000100,
982 0x3e78, 0x00000001, 0x00000002,
983 0x9100, 0x03000000, 0x0362c688,
984 0x8c00, 0x000000ff, 0x00000001,
985 0xe40, 0x00001fff, 0x00001fff,
986 0x9060, 0x0000007f, 0x00000020,
987 0x9508, 0x00010000, 0x00010000,
988 0xac14, 0x000003ff, 0x000000f3,
989 0xac0c, 0xffffffff, 0x00001032
990 };
991
992 static const u32 bonaire_mgcg_cgcg_init[] =
993 {
994 0xc420, 0xffffffff, 0xfffffffc,
995 0x30800, 0xffffffff, 0xe0000000,
996 0x3c2a0, 0xffffffff, 0x00000100,
997 0x3c208, 0xffffffff, 0x00000100,
998 0x3c2c0, 0xffffffff, 0xc0000100,
999 0x3c2c8, 0xffffffff, 0xc0000100,
1000 0x3c2c4, 0xffffffff, 0xc0000100,
1001 0x55e4, 0xffffffff, 0x00600100,
1002 0x3c280, 0xffffffff, 0x00000100,
1003 0x3c214, 0xffffffff, 0x06000100,
1004 0x3c220, 0xffffffff, 0x00000100,
1005 0x3c218, 0xffffffff, 0x06000100,
1006 0x3c204, 0xffffffff, 0x00000100,
1007 0x3c2e0, 0xffffffff, 0x00000100,
1008 0x3c224, 0xffffffff, 0x00000100,
1009 0x3c200, 0xffffffff, 0x00000100,
1010 0x3c230, 0xffffffff, 0x00000100,
1011 0x3c234, 0xffffffff, 0x00000100,
1012 0x3c250, 0xffffffff, 0x00000100,
1013 0x3c254, 0xffffffff, 0x00000100,
1014 0x3c258, 0xffffffff, 0x00000100,
1015 0x3c25c, 0xffffffff, 0x00000100,
1016 0x3c260, 0xffffffff, 0x00000100,
1017 0x3c27c, 0xffffffff, 0x00000100,
1018 0x3c278, 0xffffffff, 0x00000100,
1019 0x3c210, 0xffffffff, 0x06000100,
1020 0x3c290, 0xffffffff, 0x00000100,
1021 0x3c274, 0xffffffff, 0x00000100,
1022 0x3c2b4, 0xffffffff, 0x00000100,
1023 0x3c2b0, 0xffffffff, 0x00000100,
1024 0x3c270, 0xffffffff, 0x00000100,
1025 0x30800, 0xffffffff, 0xe0000000,
1026 0x3c020, 0xffffffff, 0x00010000,
1027 0x3c024, 0xffffffff, 0x00030002,
1028 0x3c028, 0xffffffff, 0x00040007,
1029 0x3c02c, 0xffffffff, 0x00060005,
1030 0x3c030, 0xffffffff, 0x00090008,
1031 0x3c034, 0xffffffff, 0x00010000,
1032 0x3c038, 0xffffffff, 0x00030002,
1033 0x3c03c, 0xffffffff, 0x00040007,
1034 0x3c040, 0xffffffff, 0x00060005,
1035 0x3c044, 0xffffffff, 0x00090008,
1036 0x3c048, 0xffffffff, 0x00010000,
1037 0x3c04c, 0xffffffff, 0x00030002,
1038 0x3c050, 0xffffffff, 0x00040007,
1039 0x3c054, 0xffffffff, 0x00060005,
1040 0x3c058, 0xffffffff, 0x00090008,
1041 0x3c05c, 0xffffffff, 0x00010000,
1042 0x3c060, 0xffffffff, 0x00030002,
1043 0x3c064, 0xffffffff, 0x00040007,
1044 0x3c068, 0xffffffff, 0x00060005,
1045 0x3c06c, 0xffffffff, 0x00090008,
1046 0x3c070, 0xffffffff, 0x00010000,
1047 0x3c074, 0xffffffff, 0x00030002,
1048 0x3c078, 0xffffffff, 0x00040007,
1049 0x3c07c, 0xffffffff, 0x00060005,
1050 0x3c080, 0xffffffff, 0x00090008,
1051 0x3c084, 0xffffffff, 0x00010000,
1052 0x3c088, 0xffffffff, 0x00030002,
1053 0x3c08c, 0xffffffff, 0x00040007,
1054 0x3c090, 0xffffffff, 0x00060005,
1055 0x3c094, 0xffffffff, 0x00090008,
1056 0x3c098, 0xffffffff, 0x00010000,
1057 0x3c09c, 0xffffffff, 0x00030002,
1058 0x3c0a0, 0xffffffff, 0x00040007,
1059 0x3c0a4, 0xffffffff, 0x00060005,
1060 0x3c0a8, 0xffffffff, 0x00090008,
1061 0x3c000, 0xffffffff, 0x96e00200,
1062 0x8708, 0xffffffff, 0x00900100,
1063 0xc424, 0xffffffff, 0x0020003f,
1064 0x38, 0xffffffff, 0x0140001c,
1065 0x3c, 0x000f0000, 0x000f0000,
1066 0x220, 0xffffffff, 0xC060000C,
1067 0x224, 0xc0000fff, 0x00000100,
1068 0xf90, 0xffffffff, 0x00000100,
1069 0xf98, 0x00000101, 0x00000000,
1070 0x20a8, 0xffffffff, 0x00000104,
1071 0x55e4, 0xff000fff, 0x00000100,
1072 0x30cc, 0xc0000fff, 0x00000104,
1073 0xc1e4, 0x00000001, 0x00000001,
1074 0xd00c, 0xff000ff0, 0x00000100,
1075 0xd80c, 0xff000ff0, 0x00000100
1076 };
1077
1078 static const u32 spectre_golden_spm_registers[] =
1079 {
1080 0x30800, 0xe0ffffff, 0xe0000000
1081 };
1082
1083 static const u32 spectre_golden_common_registers[] =
1084 {
1085 0xc770, 0xffffffff, 0x00000800,
1086 0xc774, 0xffffffff, 0x00000800,
1087 0xc798, 0xffffffff, 0x00007fbf,
1088 0xc79c, 0xffffffff, 0x00007faf
1089 };
1090
1091 static const u32 spectre_golden_registers[] =
1092 {
1093 0x3c000, 0xffff1fff, 0x96940200,
1094 0x3c00c, 0xffff0001, 0xff000000,
1095 0x3c200, 0xfffc0fff, 0x00000100,
1096 0x6ed8, 0x00010101, 0x00010000,
1097 0x9834, 0xf00fffff, 0x00000400,
1098 0x9838, 0xfffffffc, 0x00020200,
1099 0x5bb0, 0x000000f0, 0x00000070,
1100 0x5bc0, 0xf0311fff, 0x80300000,
1101 0x98f8, 0x73773777, 0x12010001,
1102 0x9b7c, 0x00ff0000, 0x00fc0000,
1103 0x2f48, 0x73773777, 0x12010001,
1104 0x8a14, 0xf000003f, 0x00000007,
1105 0x8b24, 0xffffffff, 0x00ffffff,
1106 0x28350, 0x3f3f3fff, 0x00000082,
1107 0x28354, 0x0000003f, 0x00000000,
1108 0x3e78, 0x00000001, 0x00000002,
1109 0x913c, 0xffff03df, 0x00000004,
1110 0xc768, 0x00000008, 0x00000008,
1111 0x8c00, 0x000008ff, 0x00000800,
1112 0x9508, 0x00010000, 0x00010000,
1113 0xac0c, 0xffffffff, 0x54763210,
1114 0x214f8, 0x01ff01ff, 0x00000002,
1115 0x21498, 0x007ff800, 0x00200000,
1116 0x2015c, 0xffffffff, 0x00000f40,
1117 0x30934, 0xffffffff, 0x00000001
1118 };
1119
1120 static const u32 spectre_mgcg_cgcg_init[] =
1121 {
1122 0xc420, 0xffffffff, 0xfffffffc,
1123 0x30800, 0xffffffff, 0xe0000000,
1124 0x3c2a0, 0xffffffff, 0x00000100,
1125 0x3c208, 0xffffffff, 0x00000100,
1126 0x3c2c0, 0xffffffff, 0x00000100,
1127 0x3c2c8, 0xffffffff, 0x00000100,
1128 0x3c2c4, 0xffffffff, 0x00000100,
1129 0x55e4, 0xffffffff, 0x00600100,
1130 0x3c280, 0xffffffff, 0x00000100,
1131 0x3c214, 0xffffffff, 0x06000100,
1132 0x3c220, 0xffffffff, 0x00000100,
1133 0x3c218, 0xffffffff, 0x06000100,
1134 0x3c204, 0xffffffff, 0x00000100,
1135 0x3c2e0, 0xffffffff, 0x00000100,
1136 0x3c224, 0xffffffff, 0x00000100,
1137 0x3c200, 0xffffffff, 0x00000100,
1138 0x3c230, 0xffffffff, 0x00000100,
1139 0x3c234, 0xffffffff, 0x00000100,
1140 0x3c250, 0xffffffff, 0x00000100,
1141 0x3c254, 0xffffffff, 0x00000100,
1142 0x3c258, 0xffffffff, 0x00000100,
1143 0x3c25c, 0xffffffff, 0x00000100,
1144 0x3c260, 0xffffffff, 0x00000100,
1145 0x3c27c, 0xffffffff, 0x00000100,
1146 0x3c278, 0xffffffff, 0x00000100,
1147 0x3c210, 0xffffffff, 0x06000100,
1148 0x3c290, 0xffffffff, 0x00000100,
1149 0x3c274, 0xffffffff, 0x00000100,
1150 0x3c2b4, 0xffffffff, 0x00000100,
1151 0x3c2b0, 0xffffffff, 0x00000100,
1152 0x3c270, 0xffffffff, 0x00000100,
1153 0x30800, 0xffffffff, 0xe0000000,
1154 0x3c020, 0xffffffff, 0x00010000,
1155 0x3c024, 0xffffffff, 0x00030002,
1156 0x3c028, 0xffffffff, 0x00040007,
1157 0x3c02c, 0xffffffff, 0x00060005,
1158 0x3c030, 0xffffffff, 0x00090008,
1159 0x3c034, 0xffffffff, 0x00010000,
1160 0x3c038, 0xffffffff, 0x00030002,
1161 0x3c03c, 0xffffffff, 0x00040007,
1162 0x3c040, 0xffffffff, 0x00060005,
1163 0x3c044, 0xffffffff, 0x00090008,
1164 0x3c048, 0xffffffff, 0x00010000,
1165 0x3c04c, 0xffffffff, 0x00030002,
1166 0x3c050, 0xffffffff, 0x00040007,
1167 0x3c054, 0xffffffff, 0x00060005,
1168 0x3c058, 0xffffffff, 0x00090008,
1169 0x3c05c, 0xffffffff, 0x00010000,
1170 0x3c060, 0xffffffff, 0x00030002,
1171 0x3c064, 0xffffffff, 0x00040007,
1172 0x3c068, 0xffffffff, 0x00060005,
1173 0x3c06c, 0xffffffff, 0x00090008,
1174 0x3c070, 0xffffffff, 0x00010000,
1175 0x3c074, 0xffffffff, 0x00030002,
1176 0x3c078, 0xffffffff, 0x00040007,
1177 0x3c07c, 0xffffffff, 0x00060005,
1178 0x3c080, 0xffffffff, 0x00090008,
1179 0x3c084, 0xffffffff, 0x00010000,
1180 0x3c088, 0xffffffff, 0x00030002,
1181 0x3c08c, 0xffffffff, 0x00040007,
1182 0x3c090, 0xffffffff, 0x00060005,
1183 0x3c094, 0xffffffff, 0x00090008,
1184 0x3c098, 0xffffffff, 0x00010000,
1185 0x3c09c, 0xffffffff, 0x00030002,
1186 0x3c0a0, 0xffffffff, 0x00040007,
1187 0x3c0a4, 0xffffffff, 0x00060005,
1188 0x3c0a8, 0xffffffff, 0x00090008,
1189 0x3c0ac, 0xffffffff, 0x00010000,
1190 0x3c0b0, 0xffffffff, 0x00030002,
1191 0x3c0b4, 0xffffffff, 0x00040007,
1192 0x3c0b8, 0xffffffff, 0x00060005,
1193 0x3c0bc, 0xffffffff, 0x00090008,
1194 0x3c000, 0xffffffff, 0x96e00200,
1195 0x8708, 0xffffffff, 0x00900100,
1196 0xc424, 0xffffffff, 0x0020003f,
1197 0x38, 0xffffffff, 0x0140001c,
1198 0x3c, 0x000f0000, 0x000f0000,
1199 0x220, 0xffffffff, 0xC060000C,
1200 0x224, 0xc0000fff, 0x00000100,
1201 0xf90, 0xffffffff, 0x00000100,
1202 0xf98, 0x00000101, 0x00000000,
1203 0x20a8, 0xffffffff, 0x00000104,
1204 0x55e4, 0xff000fff, 0x00000100,
1205 0x30cc, 0xc0000fff, 0x00000104,
1206 0xc1e4, 0x00000001, 0x00000001,
1207 0xd00c, 0xff000ff0, 0x00000100,
1208 0xd80c, 0xff000ff0, 0x00000100
1209 };
1210
1211 static const u32 kalindi_golden_spm_registers[] =
1212 {
1213 0x30800, 0xe0ffffff, 0xe0000000
1214 };
1215
1216 static const u32 kalindi_golden_common_registers[] =
1217 {
1218 0xc770, 0xffffffff, 0x00000800,
1219 0xc774, 0xffffffff, 0x00000800,
1220 0xc798, 0xffffffff, 0x00007fbf,
1221 0xc79c, 0xffffffff, 0x00007faf
1222 };
1223
1224 static const u32 kalindi_golden_registers[] =
1225 {
1226 0x3c000, 0xffffdfff, 0x6e944040,
1227 0x55e4, 0xff607fff, 0xfc000100,
1228 0x3c220, 0xff000fff, 0x00000100,
1229 0x3c224, 0xff000fff, 0x00000100,
1230 0x3c200, 0xfffc0fff, 0x00000100,
1231 0x6ed8, 0x00010101, 0x00010000,
1232 0x9830, 0xffffffff, 0x00000000,
1233 0x9834, 0xf00fffff, 0x00000400,
1234 0x5bb0, 0x000000f0, 0x00000070,
1235 0x5bc0, 0xf0311fff, 0x80300000,
1236 0x98f8, 0x73773777, 0x12010001,
1237 0x98fc, 0xffffffff, 0x00000010,
1238 0x9b7c, 0x00ff0000, 0x00fc0000,
1239 0x8030, 0x00001f0f, 0x0000100a,
1240 0x2f48, 0x73773777, 0x12010001,
1241 0x2408, 0x000fffff, 0x000c007f,
1242 0x8a14, 0xf000003f, 0x00000007,
1243 0x8b24, 0x3fff3fff, 0x00ffcfff,
1244 0x30a04, 0x0000ff0f, 0x00000000,
1245 0x28a4c, 0x07ffffff, 0x06000000,
1246 0x4d8, 0x00000fff, 0x00000100,
1247 0x3e78, 0x00000001, 0x00000002,
1248 0xc768, 0x00000008, 0x00000008,
1249 0x8c00, 0x000000ff, 0x00000003,
1250 0x214f8, 0x01ff01ff, 0x00000002,
1251 0x21498, 0x007ff800, 0x00200000,
1252 0x2015c, 0xffffffff, 0x00000f40,
1253 0x88c4, 0x001f3ae3, 0x00000082,
1254 0x88d4, 0x0000001f, 0x00000010,
1255 0x30934, 0xffffffff, 0x00000000
1256 };
1257
1258 static const u32 kalindi_mgcg_cgcg_init[] =
1259 {
1260 0xc420, 0xffffffff, 0xfffffffc,
1261 0x30800, 0xffffffff, 0xe0000000,
1262 0x3c2a0, 0xffffffff, 0x00000100,
1263 0x3c208, 0xffffffff, 0x00000100,
1264 0x3c2c0, 0xffffffff, 0x00000100,
1265 0x3c2c8, 0xffffffff, 0x00000100,
1266 0x3c2c4, 0xffffffff, 0x00000100,
1267 0x55e4, 0xffffffff, 0x00600100,
1268 0x3c280, 0xffffffff, 0x00000100,
1269 0x3c214, 0xffffffff, 0x06000100,
1270 0x3c220, 0xffffffff, 0x00000100,
1271 0x3c218, 0xffffffff, 0x06000100,
1272 0x3c204, 0xffffffff, 0x00000100,
1273 0x3c2e0, 0xffffffff, 0x00000100,
1274 0x3c224, 0xffffffff, 0x00000100,
1275 0x3c200, 0xffffffff, 0x00000100,
1276 0x3c230, 0xffffffff, 0x00000100,
1277 0x3c234, 0xffffffff, 0x00000100,
1278 0x3c250, 0xffffffff, 0x00000100,
1279 0x3c254, 0xffffffff, 0x00000100,
1280 0x3c258, 0xffffffff, 0x00000100,
1281 0x3c25c, 0xffffffff, 0x00000100,
1282 0x3c260, 0xffffffff, 0x00000100,
1283 0x3c27c, 0xffffffff, 0x00000100,
1284 0x3c278, 0xffffffff, 0x00000100,
1285 0x3c210, 0xffffffff, 0x06000100,
1286 0x3c290, 0xffffffff, 0x00000100,
1287 0x3c274, 0xffffffff, 0x00000100,
1288 0x3c2b4, 0xffffffff, 0x00000100,
1289 0x3c2b0, 0xffffffff, 0x00000100,
1290 0x3c270, 0xffffffff, 0x00000100,
1291 0x30800, 0xffffffff, 0xe0000000,
1292 0x3c020, 0xffffffff, 0x00010000,
1293 0x3c024, 0xffffffff, 0x00030002,
1294 0x3c028, 0xffffffff, 0x00040007,
1295 0x3c02c, 0xffffffff, 0x00060005,
1296 0x3c030, 0xffffffff, 0x00090008,
1297 0x3c034, 0xffffffff, 0x00010000,
1298 0x3c038, 0xffffffff, 0x00030002,
1299 0x3c03c, 0xffffffff, 0x00040007,
1300 0x3c040, 0xffffffff, 0x00060005,
1301 0x3c044, 0xffffffff, 0x00090008,
1302 0x3c000, 0xffffffff, 0x96e00200,
1303 0x8708, 0xffffffff, 0x00900100,
1304 0xc424, 0xffffffff, 0x0020003f,
1305 0x38, 0xffffffff, 0x0140001c,
1306 0x3c, 0x000f0000, 0x000f0000,
1307 0x220, 0xffffffff, 0xC060000C,
1308 0x224, 0xc0000fff, 0x00000100,
1309 0x20a8, 0xffffffff, 0x00000104,
1310 0x55e4, 0xff000fff, 0x00000100,
1311 0x30cc, 0xc0000fff, 0x00000104,
1312 0xc1e4, 0x00000001, 0x00000001,
1313 0xd00c, 0xff000ff0, 0x00000100,
1314 0xd80c, 0xff000ff0, 0x00000100
1315 };
1316
1317 static const u32 hawaii_golden_spm_registers[] =
1318 {
1319 0x30800, 0xe0ffffff, 0xe0000000
1320 };
1321
1322 static const u32 hawaii_golden_common_registers[] =
1323 {
1324 0x30800, 0xffffffff, 0xe0000000,
1325 0x28350, 0xffffffff, 0x3a00161a,
1326 0x28354, 0xffffffff, 0x0000002e,
1327 0x9a10, 0xffffffff, 0x00018208,
1328 0x98f8, 0xffffffff, 0x12011003
1329 };
1330
1331 static const u32 hawaii_golden_registers[] =
1332 {
1333 0x3354, 0x00000333, 0x00000333,
1334 0x9a10, 0x00010000, 0x00058208,
1335 0x9830, 0xffffffff, 0x00000000,
1336 0x9834, 0xf00fffff, 0x00000400,
1337 0x9838, 0x0002021c, 0x00020200,
1338 0xc78, 0x00000080, 0x00000000,
1339 0x5bb0, 0x000000f0, 0x00000070,
1340 0x5bc0, 0xf0311fff, 0x80300000,
1341 0x350c, 0x00810000, 0x408af000,
1342 0x7030, 0x31000111, 0x00000011,
1343 0x2f48, 0x73773777, 0x12010001,
1344 0x2120, 0x0000007f, 0x0000001b,
1345 0x21dc, 0x00007fb6, 0x00002191,
1346 0x3628, 0x0000003f, 0x0000000a,
1347 0x362c, 0x0000003f, 0x0000000a,
1348 0x2ae4, 0x00073ffe, 0x000022a2,
1349 0x240c, 0x000007ff, 0x00000000,
1350 0x8bf0, 0x00002001, 0x00000001,
1351 0x8b24, 0xffffffff, 0x00ffffff,
1352 0x30a04, 0x0000ff0f, 0x00000000,
1353 0x28a4c, 0x07ffffff, 0x06000000,
1354 0x3e78, 0x00000001, 0x00000002,
1355 0xc768, 0x00000008, 0x00000008,
1356 0xc770, 0x00000f00, 0x00000800,
1357 0xc774, 0x00000f00, 0x00000800,
1358 0xc798, 0x00ffffff, 0x00ff7fbf,
1359 0xc79c, 0x00ffffff, 0x00ff7faf,
1360 0x8c00, 0x000000ff, 0x00000800,
1361 0xe40, 0x00001fff, 0x00001fff,
1362 0x9060, 0x0000007f, 0x00000020,
1363 0x9508, 0x00010000, 0x00010000,
1364 0xae00, 0x00100000, 0x000ff07c,
1365 0xac14, 0x000003ff, 0x0000000f,
1366 0xac10, 0xffffffff, 0x7564fdec,
1367 0xac0c, 0xffffffff, 0x3120b9a8,
1368 0xac08, 0x20000000, 0x0f9c0000
1369 };
1370
1371 static const u32 hawaii_mgcg_cgcg_init[] =
1372 {
1373 0xc420, 0xffffffff, 0xfffffffd,
1374 0x30800, 0xffffffff, 0xe0000000,
1375 0x3c2a0, 0xffffffff, 0x00000100,
1376 0x3c208, 0xffffffff, 0x00000100,
1377 0x3c2c0, 0xffffffff, 0x00000100,
1378 0x3c2c8, 0xffffffff, 0x00000100,
1379 0x3c2c4, 0xffffffff, 0x00000100,
1380 0x55e4, 0xffffffff, 0x00200100,
1381 0x3c280, 0xffffffff, 0x00000100,
1382 0x3c214, 0xffffffff, 0x06000100,
1383 0x3c220, 0xffffffff, 0x00000100,
1384 0x3c218, 0xffffffff, 0x06000100,
1385 0x3c204, 0xffffffff, 0x00000100,
1386 0x3c2e0, 0xffffffff, 0x00000100,
1387 0x3c224, 0xffffffff, 0x00000100,
1388 0x3c200, 0xffffffff, 0x00000100,
1389 0x3c230, 0xffffffff, 0x00000100,
1390 0x3c234, 0xffffffff, 0x00000100,
1391 0x3c250, 0xffffffff, 0x00000100,
1392 0x3c254, 0xffffffff, 0x00000100,
1393 0x3c258, 0xffffffff, 0x00000100,
1394 0x3c25c, 0xffffffff, 0x00000100,
1395 0x3c260, 0xffffffff, 0x00000100,
1396 0x3c27c, 0xffffffff, 0x00000100,
1397 0x3c278, 0xffffffff, 0x00000100,
1398 0x3c210, 0xffffffff, 0x06000100,
1399 0x3c290, 0xffffffff, 0x00000100,
1400 0x3c274, 0xffffffff, 0x00000100,
1401 0x3c2b4, 0xffffffff, 0x00000100,
1402 0x3c2b0, 0xffffffff, 0x00000100,
1403 0x3c270, 0xffffffff, 0x00000100,
1404 0x30800, 0xffffffff, 0xe0000000,
1405 0x3c020, 0xffffffff, 0x00010000,
1406 0x3c024, 0xffffffff, 0x00030002,
1407 0x3c028, 0xffffffff, 0x00040007,
1408 0x3c02c, 0xffffffff, 0x00060005,
1409 0x3c030, 0xffffffff, 0x00090008,
1410 0x3c034, 0xffffffff, 0x00010000,
1411 0x3c038, 0xffffffff, 0x00030002,
1412 0x3c03c, 0xffffffff, 0x00040007,
1413 0x3c040, 0xffffffff, 0x00060005,
1414 0x3c044, 0xffffffff, 0x00090008,
1415 0x3c048, 0xffffffff, 0x00010000,
1416 0x3c04c, 0xffffffff, 0x00030002,
1417 0x3c050, 0xffffffff, 0x00040007,
1418 0x3c054, 0xffffffff, 0x00060005,
1419 0x3c058, 0xffffffff, 0x00090008,
1420 0x3c05c, 0xffffffff, 0x00010000,
1421 0x3c060, 0xffffffff, 0x00030002,
1422 0x3c064, 0xffffffff, 0x00040007,
1423 0x3c068, 0xffffffff, 0x00060005,
1424 0x3c06c, 0xffffffff, 0x00090008,
1425 0x3c070, 0xffffffff, 0x00010000,
1426 0x3c074, 0xffffffff, 0x00030002,
1427 0x3c078, 0xffffffff, 0x00040007,
1428 0x3c07c, 0xffffffff, 0x00060005,
1429 0x3c080, 0xffffffff, 0x00090008,
1430 0x3c084, 0xffffffff, 0x00010000,
1431 0x3c088, 0xffffffff, 0x00030002,
1432 0x3c08c, 0xffffffff, 0x00040007,
1433 0x3c090, 0xffffffff, 0x00060005,
1434 0x3c094, 0xffffffff, 0x00090008,
1435 0x3c098, 0xffffffff, 0x00010000,
1436 0x3c09c, 0xffffffff, 0x00030002,
1437 0x3c0a0, 0xffffffff, 0x00040007,
1438 0x3c0a4, 0xffffffff, 0x00060005,
1439 0x3c0a8, 0xffffffff, 0x00090008,
1440 0x3c0ac, 0xffffffff, 0x00010000,
1441 0x3c0b0, 0xffffffff, 0x00030002,
1442 0x3c0b4, 0xffffffff, 0x00040007,
1443 0x3c0b8, 0xffffffff, 0x00060005,
1444 0x3c0bc, 0xffffffff, 0x00090008,
1445 0x3c0c0, 0xffffffff, 0x00010000,
1446 0x3c0c4, 0xffffffff, 0x00030002,
1447 0x3c0c8, 0xffffffff, 0x00040007,
1448 0x3c0cc, 0xffffffff, 0x00060005,
1449 0x3c0d0, 0xffffffff, 0x00090008,
1450 0x3c0d4, 0xffffffff, 0x00010000,
1451 0x3c0d8, 0xffffffff, 0x00030002,
1452 0x3c0dc, 0xffffffff, 0x00040007,
1453 0x3c0e0, 0xffffffff, 0x00060005,
1454 0x3c0e4, 0xffffffff, 0x00090008,
1455 0x3c0e8, 0xffffffff, 0x00010000,
1456 0x3c0ec, 0xffffffff, 0x00030002,
1457 0x3c0f0, 0xffffffff, 0x00040007,
1458 0x3c0f4, 0xffffffff, 0x00060005,
1459 0x3c0f8, 0xffffffff, 0x00090008,
1460 0xc318, 0xffffffff, 0x00020200,
1461 0x3350, 0xffffffff, 0x00000200,
1462 0x15c0, 0xffffffff, 0x00000400,
1463 0x55e8, 0xffffffff, 0x00000000,
1464 0x2f50, 0xffffffff, 0x00000902,
1465 0x3c000, 0xffffffff, 0x96940200,
1466 0x8708, 0xffffffff, 0x00900100,
1467 0xc424, 0xffffffff, 0x0020003f,
1468 0x38, 0xffffffff, 0x0140001c,
1469 0x3c, 0x000f0000, 0x000f0000,
1470 0x220, 0xffffffff, 0xc060000c,
1471 0x224, 0xc0000fff, 0x00000100,
1472 0xf90, 0xffffffff, 0x00000100,
1473 0xf98, 0x00000101, 0x00000000,
1474 0x20a8, 0xffffffff, 0x00000104,
1475 0x55e4, 0xff000fff, 0x00000100,
1476 0x30cc, 0xc0000fff, 0x00000104,
1477 0xc1e4, 0x00000001, 0x00000001,
1478 0xd00c, 0xff000ff0, 0x00000100,
1479 0xd80c, 0xff000ff0, 0x00000100
1480 };
1481
1482 static const u32 godavari_golden_registers[] =
1483 {
1484 0x55e4, 0xff607fff, 0xfc000100,
1485 0x6ed8, 0x00010101, 0x00010000,
1486 0x9830, 0xffffffff, 0x00000000,
1487 0x98302, 0xf00fffff, 0x00000400,
1488 0x6130, 0xffffffff, 0x00010000,
1489 0x5bb0, 0x000000f0, 0x00000070,
1490 0x5bc0, 0xf0311fff, 0x80300000,
1491 0x98f8, 0x73773777, 0x12010001,
1492 0x98fc, 0xffffffff, 0x00000010,
1493 0x8030, 0x00001f0f, 0x0000100a,
1494 0x2f48, 0x73773777, 0x12010001,
1495 0x2408, 0x000fffff, 0x000c007f,
1496 0x8a14, 0xf000003f, 0x00000007,
1497 0x8b24, 0xffffffff, 0x00ff0fff,
1498 0x30a04, 0x0000ff0f, 0x00000000,
1499 0x28a4c, 0x07ffffff, 0x06000000,
1500 0x4d8, 0x00000fff, 0x00000100,
1501 0xd014, 0x00010000, 0x00810001,
1502 0xd814, 0x00010000, 0x00810001,
1503 0x3e78, 0x00000001, 0x00000002,
1504 0xc768, 0x00000008, 0x00000008,
1505 0xc770, 0x00000f00, 0x00000800,
1506 0xc774, 0x00000f00, 0x00000800,
1507 0xc798, 0x00ffffff, 0x00ff7fbf,
1508 0xc79c, 0x00ffffff, 0x00ff7faf,
1509 0x8c00, 0x000000ff, 0x00000001,
1510 0x214f8, 0x01ff01ff, 0x00000002,
1511 0x21498, 0x007ff800, 0x00200000,
1512 0x2015c, 0xffffffff, 0x00000f40,
1513 0x88c4, 0x001f3ae3, 0x00000082,
1514 0x88d4, 0x0000001f, 0x00000010,
1515 0x30934, 0xffffffff, 0x00000000
1516 };
1517
1518
1519 static void cik_init_golden_registers(struct radeon_device *rdev)
1520 {
1521 switch (rdev->family) {
1522 case CHIP_BONAIRE:
1523 radeon_program_register_sequence(rdev,
1524 bonaire_mgcg_cgcg_init,
1525 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1526 radeon_program_register_sequence(rdev,
1527 bonaire_golden_registers,
1528 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1529 radeon_program_register_sequence(rdev,
1530 bonaire_golden_common_registers,
1531 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1532 radeon_program_register_sequence(rdev,
1533 bonaire_golden_spm_registers,
1534 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1535 break;
1536 case CHIP_KABINI:
1537 radeon_program_register_sequence(rdev,
1538 kalindi_mgcg_cgcg_init,
1539 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1540 radeon_program_register_sequence(rdev,
1541 kalindi_golden_registers,
1542 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1543 radeon_program_register_sequence(rdev,
1544 kalindi_golden_common_registers,
1545 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1546 radeon_program_register_sequence(rdev,
1547 kalindi_golden_spm_registers,
1548 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1549 break;
1550 case CHIP_MULLINS:
1551 radeon_program_register_sequence(rdev,
1552 kalindi_mgcg_cgcg_init,
1553 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1554 radeon_program_register_sequence(rdev,
1555 godavari_golden_registers,
1556 (const u32)ARRAY_SIZE(godavari_golden_registers));
1557 radeon_program_register_sequence(rdev,
1558 kalindi_golden_common_registers,
1559 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1560 radeon_program_register_sequence(rdev,
1561 kalindi_golden_spm_registers,
1562 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1563 break;
1564 case CHIP_KAVERI:
1565 radeon_program_register_sequence(rdev,
1566 spectre_mgcg_cgcg_init,
1567 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1568 radeon_program_register_sequence(rdev,
1569 spectre_golden_registers,
1570 (const u32)ARRAY_SIZE(spectre_golden_registers));
1571 radeon_program_register_sequence(rdev,
1572 spectre_golden_common_registers,
1573 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1574 radeon_program_register_sequence(rdev,
1575 spectre_golden_spm_registers,
1576 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1577 break;
1578 case CHIP_HAWAII:
1579 radeon_program_register_sequence(rdev,
1580 hawaii_mgcg_cgcg_init,
1581 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1582 radeon_program_register_sequence(rdev,
1583 hawaii_golden_registers,
1584 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1585 radeon_program_register_sequence(rdev,
1586 hawaii_golden_common_registers,
1587 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1588 radeon_program_register_sequence(rdev,
1589 hawaii_golden_spm_registers,
1590 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1591 break;
1592 default:
1593 break;
1594 }
1595 }
1596
1597 /**
1598 * cik_get_xclk - get the xclk
1599 *
1600 * @rdev: radeon_device pointer
1601 *
1602 * Returns the reference clock used by the gfx engine
1603 * (CIK).
1604 */
1605 u32 cik_get_xclk(struct radeon_device *rdev)
1606 {
1607 u32 reference_clock = rdev->clock.spll.reference_freq;
1608
1609 if (rdev->flags & RADEON_IS_IGP) {
1610 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1611 return reference_clock / 2;
1612 } else {
1613 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1614 return reference_clock / 4;
1615 }
1616 return reference_clock;
1617 }
1618
1619 /**
1620 * cik_mm_rdoorbell - read a doorbell dword
1621 *
1622 * @rdev: radeon_device pointer
1623 * @index: doorbell index
1624 *
1625 * Returns the value in the doorbell aperture at the
1626 * requested doorbell index (CIK).
1627 */
1628 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1629 {
1630 if (index < rdev->doorbell.num_doorbells) {
1631 return readl(rdev->doorbell.ptr + index);
1632 } else {
1633 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1634 return 0;
1635 }
1636 }
1637
1638 /**
1639 * cik_mm_wdoorbell - write a doorbell dword
1640 *
1641 * @rdev: radeon_device pointer
1642 * @index: doorbell index
1643 * @v: value to write
1644 *
1645 * Writes @v to the doorbell aperture at the
1646 * requested doorbell index (CIK).
1647 */
1648 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1649 {
1650 if (index < rdev->doorbell.num_doorbells) {
1651 writel(v, rdev->doorbell.ptr + index);
1652 } else {
1653 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1654 }
1655 }
1656
1657 #define BONAIRE_IO_MC_REGS_SIZE 36
1658
1659 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1660 {
1661 {0x00000070, 0x04400000},
1662 {0x00000071, 0x80c01803},
1663 {0x00000072, 0x00004004},
1664 {0x00000073, 0x00000100},
1665 {0x00000074, 0x00ff0000},
1666 {0x00000075, 0x34000000},
1667 {0x00000076, 0x08000014},
1668 {0x00000077, 0x00cc08ec},
1669 {0x00000078, 0x00000400},
1670 {0x00000079, 0x00000000},
1671 {0x0000007a, 0x04090000},
1672 {0x0000007c, 0x00000000},
1673 {0x0000007e, 0x4408a8e8},
1674 {0x0000007f, 0x00000304},
1675 {0x00000080, 0x00000000},
1676 {0x00000082, 0x00000001},
1677 {0x00000083, 0x00000002},
1678 {0x00000084, 0xf3e4f400},
1679 {0x00000085, 0x052024e3},
1680 {0x00000087, 0x00000000},
1681 {0x00000088, 0x01000000},
1682 {0x0000008a, 0x1c0a0000},
1683 {0x0000008b, 0xff010000},
1684 {0x0000008d, 0xffffefff},
1685 {0x0000008e, 0xfff3efff},
1686 {0x0000008f, 0xfff3efbf},
1687 {0x00000092, 0xf7ffffff},
1688 {0x00000093, 0xffffff7f},
1689 {0x00000095, 0x00101101},
1690 {0x00000096, 0x00000fff},
1691 {0x00000097, 0x00116fff},
1692 {0x00000098, 0x60010000},
1693 {0x00000099, 0x10010000},
1694 {0x0000009a, 0x00006000},
1695 {0x0000009b, 0x00001000},
1696 {0x0000009f, 0x00b48000}
1697 };
1698
1699 #define HAWAII_IO_MC_REGS_SIZE 22
1700
1701 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1702 {
1703 {0x0000007d, 0x40000000},
1704 {0x0000007e, 0x40180304},
1705 {0x0000007f, 0x0000ff00},
1706 {0x00000081, 0x00000000},
1707 {0x00000083, 0x00000800},
1708 {0x00000086, 0x00000000},
1709 {0x00000087, 0x00000100},
1710 {0x00000088, 0x00020100},
1711 {0x00000089, 0x00000000},
1712 {0x0000008b, 0x00040000},
1713 {0x0000008c, 0x00000100},
1714 {0x0000008e, 0xff010000},
1715 {0x00000090, 0xffffefff},
1716 {0x00000091, 0xfff3efff},
1717 {0x00000092, 0xfff3efbf},
1718 {0x00000093, 0xf7ffffff},
1719 {0x00000094, 0xffffff7f},
1720 {0x00000095, 0x00000fff},
1721 {0x00000096, 0x00116fff},
1722 {0x00000097, 0x60010000},
1723 {0x00000098, 0x10010000},
1724 {0x0000009f, 0x00c79000}
1725 };
1726
1727
1728 /**
1729 * cik_srbm_select - select specific register instances
1730 *
1731 * @rdev: radeon_device pointer
1732 * @me: selected ME (micro engine)
1733 * @pipe: pipe
1734 * @queue: queue
1735 * @vmid: VMID
1736 *
1737 * Switches the currently active registers instances. Some
1738 * registers are instanced per VMID, others are instanced per
1739 * me/pipe/queue combination.
1740 */
1741 static void cik_srbm_select(struct radeon_device *rdev,
1742 u32 me, u32 pipe, u32 queue, u32 vmid)
1743 {
1744 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1745 MEID(me & 0x3) |
1746 VMID(vmid & 0xf) |
1747 QUEUEID(queue & 0x7));
1748 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1749 }
1750
1751 /* ucode loading */
1752 /**
1753 * ci_mc_load_microcode - load MC ucode into the hw
1754 *
1755 * @rdev: radeon_device pointer
1756 *
1757 * Load the GDDR MC ucode into the hw (CIK).
1758 * Returns 0 on success, error on failure.
1759 */
1760 int ci_mc_load_microcode(struct radeon_device *rdev)
1761 {
1762 const __be32 *fw_data;
1763 u32 running, blackout = 0;
1764 u32 *io_mc_regs;
1765 int i, regs_size, ucode_size;
1766
1767 if (!rdev->mc_fw)
1768 return -EINVAL;
1769
1770 ucode_size = rdev->mc_fw->size / 4;
1771
1772 switch (rdev->family) {
1773 case CHIP_BONAIRE:
1774 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1775 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1776 break;
1777 case CHIP_HAWAII:
1778 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1779 regs_size = HAWAII_IO_MC_REGS_SIZE;
1780 break;
1781 default:
1782 return -EINVAL;
1783 }
1784
1785 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1786
1787 if (running == 0) {
1788 if (running) {
1789 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1790 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1791 }
1792
1793 /* reset the engine and set to writable */
1794 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1795 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1796
1797 /* load mc io regs */
1798 for (i = 0; i < regs_size; i++) {
1799 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1800 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1801 }
1802 /* load the MC ucode */
1803 fw_data = (const __be32 *)rdev->mc_fw->data;
1804 for (i = 0; i < ucode_size; i++)
1805 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1806
1807 /* put the engine back into the active state */
1808 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1809 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1810 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1811
1812 /* wait for training to complete */
1813 for (i = 0; i < rdev->usec_timeout; i++) {
1814 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1815 break;
1816 udelay(1);
1817 }
1818 for (i = 0; i < rdev->usec_timeout; i++) {
1819 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1820 break;
1821 udelay(1);
1822 }
1823
1824 if (running)
1825 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1826 }
1827
1828 return 0;
1829 }
1830
1831 /**
1832 * cik_init_microcode - load ucode images from disk
1833 *
1834 * @rdev: radeon_device pointer
1835 *
1836 * Use the firmware interface to load the ucode images into
1837 * the driver (not loaded into hw).
1838 * Returns 0 on success, error on failure.
1839 */
1840 static int cik_init_microcode(struct radeon_device *rdev)
1841 {
1842 const char *chip_name;
1843 size_t pfp_req_size, me_req_size, ce_req_size,
1844 mec_req_size, rlc_req_size, mc_req_size = 0,
1845 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1846 char fw_name[30];
1847 int err;
1848
1849 DRM_DEBUG("\n");
1850
1851 switch (rdev->family) {
1852 case CHIP_BONAIRE:
1853 chip_name = "BONAIRE";
1854 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1855 me_req_size = CIK_ME_UCODE_SIZE * 4;
1856 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1857 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1858 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1859 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1860 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1861 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1862 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1863 break;
1864 case CHIP_HAWAII:
1865 chip_name = "HAWAII";
1866 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1867 me_req_size = CIK_ME_UCODE_SIZE * 4;
1868 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1869 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1870 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1871 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1872 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1873 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1874 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1875 break;
1876 case CHIP_KAVERI:
1877 chip_name = "KAVERI";
1878 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1879 me_req_size = CIK_ME_UCODE_SIZE * 4;
1880 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1881 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1882 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1883 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1884 break;
1885 case CHIP_KABINI:
1886 chip_name = "KABINI";
1887 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1888 me_req_size = CIK_ME_UCODE_SIZE * 4;
1889 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1890 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1891 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1892 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1893 break;
1894 case CHIP_MULLINS:
1895 chip_name = "MULLINS";
1896 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1897 me_req_size = CIK_ME_UCODE_SIZE * 4;
1898 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1899 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1900 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1901 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1902 break;
1903 default: BUG();
1904 }
1905
1906 DRM_INFO("Loading %s Microcode\n", chip_name);
1907
1908 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1909 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1910 if (err)
1911 goto out;
1912 if (rdev->pfp_fw->size != pfp_req_size) {
1913 printk(KERN_ERR
1914 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1915 rdev->pfp_fw->size, fw_name);
1916 err = -EINVAL;
1917 goto out;
1918 }
1919
1920 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1921 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1922 if (err)
1923 goto out;
1924 if (rdev->me_fw->size != me_req_size) {
1925 printk(KERN_ERR
1926 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1927 rdev->me_fw->size, fw_name);
1928 err = -EINVAL;
1929 }
1930
1931 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1932 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1933 if (err)
1934 goto out;
1935 if (rdev->ce_fw->size != ce_req_size) {
1936 printk(KERN_ERR
1937 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1938 rdev->ce_fw->size, fw_name);
1939 err = -EINVAL;
1940 }
1941
1942 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1943 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1944 if (err)
1945 goto out;
1946 if (rdev->mec_fw->size != mec_req_size) {
1947 printk(KERN_ERR
1948 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1949 rdev->mec_fw->size, fw_name);
1950 err = -EINVAL;
1951 }
1952
1953 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1954 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1955 if (err)
1956 goto out;
1957 if (rdev->rlc_fw->size != rlc_req_size) {
1958 printk(KERN_ERR
1959 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1960 rdev->rlc_fw->size, fw_name);
1961 err = -EINVAL;
1962 }
1963
1964 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1965 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1966 if (err)
1967 goto out;
1968 if (rdev->sdma_fw->size != sdma_req_size) {
1969 printk(KERN_ERR
1970 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1971 rdev->sdma_fw->size, fw_name);
1972 err = -EINVAL;
1973 }
1974
1975 /* No SMC, MC ucode on APUs */
1976 if (!(rdev->flags & RADEON_IS_IGP)) {
1977 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1978 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1979 if (err) {
1980 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1981 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1982 if (err)
1983 goto out;
1984 }
1985 if ((rdev->mc_fw->size != mc_req_size) &&
1986 (rdev->mc_fw->size != mc2_req_size)){
1987 printk(KERN_ERR
1988 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1989 rdev->mc_fw->size, fw_name);
1990 err = -EINVAL;
1991 }
1992 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1993
1994 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1995 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1996 if (err) {
1997 printk(KERN_ERR
1998 "smc: error loading firmware \"%s\"\n",
1999 fw_name);
2000 release_firmware(rdev->smc_fw);
2001 rdev->smc_fw = NULL;
2002 err = 0;
2003 } else if (rdev->smc_fw->size != smc_req_size) {
2004 printk(KERN_ERR
2005 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2006 rdev->smc_fw->size, fw_name);
2007 err = -EINVAL;
2008 }
2009 }
2010
2011 out:
2012 if (err) {
2013 if (err != -EINVAL)
2014 printk(KERN_ERR
2015 "cik_cp: Failed to load firmware \"%s\"\n",
2016 fw_name);
2017 release_firmware(rdev->pfp_fw);
2018 rdev->pfp_fw = NULL;
2019 release_firmware(rdev->me_fw);
2020 rdev->me_fw = NULL;
2021 release_firmware(rdev->ce_fw);
2022 rdev->ce_fw = NULL;
2023 release_firmware(rdev->rlc_fw);
2024 rdev->rlc_fw = NULL;
2025 release_firmware(rdev->mc_fw);
2026 rdev->mc_fw = NULL;
2027 release_firmware(rdev->smc_fw);
2028 rdev->smc_fw = NULL;
2029 }
2030 return err;
2031 }
2032
2033 /*
2034 * Core functions
2035 */
2036 /**
2037 * cik_tiling_mode_table_init - init the hw tiling table
2038 *
2039 * @rdev: radeon_device pointer
2040 *
2041 * Starting with SI, the tiling setup is done globally in a
2042 * set of 32 tiling modes. Rather than selecting each set of
2043 * parameters per surface as on older asics, we just select
2044 * which index in the tiling table we want to use, and the
2045 * surface uses those parameters (CIK).
2046 */
2047 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2048 {
2049 const u32 num_tile_mode_states = 32;
2050 const u32 num_secondary_tile_mode_states = 16;
2051 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2052 u32 num_pipe_configs;
2053 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2054 rdev->config.cik.max_shader_engines;
2055
2056 switch (rdev->config.cik.mem_row_size_in_kb) {
2057 case 1:
2058 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2059 break;
2060 case 2:
2061 default:
2062 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2063 break;
2064 case 4:
2065 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2066 break;
2067 }
2068
2069 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2070 if (num_pipe_configs > 8)
2071 num_pipe_configs = 16;
2072
2073 if (num_pipe_configs == 16) {
2074 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2075 switch (reg_offset) {
2076 case 0:
2077 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2079 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2081 break;
2082 case 1:
2083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2085 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2087 break;
2088 case 2:
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2093 break;
2094 case 3:
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2098 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2099 break;
2100 case 4:
2101 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2103 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2104 TILE_SPLIT(split_equal_to_row_size));
2105 break;
2106 case 5:
2107 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2108 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 break;
2111 case 6:
2112 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2113 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2114 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2115 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2116 break;
2117 case 7:
2118 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2119 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2120 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2121 TILE_SPLIT(split_equal_to_row_size));
2122 break;
2123 case 8:
2124 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2125 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2126 break;
2127 case 9:
2128 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2131 break;
2132 case 10:
2133 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2135 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2137 break;
2138 case 11:
2139 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2141 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2143 break;
2144 case 12:
2145 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2147 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 break;
2150 case 13:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2152 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2153 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2154 break;
2155 case 14:
2156 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2158 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2160 break;
2161 case 16:
2162 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2163 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2166 break;
2167 case 17:
2168 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2169 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2170 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 break;
2173 case 27:
2174 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2175 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2176 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2177 break;
2178 case 28:
2179 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2181 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 break;
2184 case 29:
2185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2186 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2187 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 break;
2190 case 30:
2191 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2193 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2195 break;
2196 default:
2197 gb_tile_moden = 0;
2198 break;
2199 }
2200 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2201 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2202 }
2203 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2204 switch (reg_offset) {
2205 case 0:
2206 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2209 NUM_BANKS(ADDR_SURF_16_BANK));
2210 break;
2211 case 1:
2212 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2215 NUM_BANKS(ADDR_SURF_16_BANK));
2216 break;
2217 case 2:
2218 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2221 NUM_BANKS(ADDR_SURF_16_BANK));
2222 break;
2223 case 3:
2224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2227 NUM_BANKS(ADDR_SURF_16_BANK));
2228 break;
2229 case 4:
2230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2233 NUM_BANKS(ADDR_SURF_8_BANK));
2234 break;
2235 case 5:
2236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2239 NUM_BANKS(ADDR_SURF_4_BANK));
2240 break;
2241 case 6:
2242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2245 NUM_BANKS(ADDR_SURF_2_BANK));
2246 break;
2247 case 8:
2248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2251 NUM_BANKS(ADDR_SURF_16_BANK));
2252 break;
2253 case 9:
2254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 NUM_BANKS(ADDR_SURF_16_BANK));
2258 break;
2259 case 10:
2260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2263 NUM_BANKS(ADDR_SURF_16_BANK));
2264 break;
2265 case 11:
2266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2269 NUM_BANKS(ADDR_SURF_8_BANK));
2270 break;
2271 case 12:
2272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2275 NUM_BANKS(ADDR_SURF_4_BANK));
2276 break;
2277 case 13:
2278 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2281 NUM_BANKS(ADDR_SURF_2_BANK));
2282 break;
2283 case 14:
2284 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2287 NUM_BANKS(ADDR_SURF_2_BANK));
2288 break;
2289 default:
2290 gb_tile_moden = 0;
2291 break;
2292 }
2293 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2294 }
2295 } else if (num_pipe_configs == 8) {
2296 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2297 switch (reg_offset) {
2298 case 0:
2299 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2301 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2302 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2303 break;
2304 case 1:
2305 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2307 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2309 break;
2310 case 2:
2311 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2313 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2315 break;
2316 case 3:
2317 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2319 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2321 break;
2322 case 4:
2323 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2325 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2326 TILE_SPLIT(split_equal_to_row_size));
2327 break;
2328 case 5:
2329 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2330 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332 break;
2333 case 6:
2334 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2338 break;
2339 case 7:
2340 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2343 TILE_SPLIT(split_equal_to_row_size));
2344 break;
2345 case 8:
2346 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2347 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2348 break;
2349 case 9:
2350 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2351 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2353 break;
2354 case 10:
2355 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2357 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 break;
2360 case 11:
2361 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 break;
2366 case 12:
2367 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2371 break;
2372 case 13:
2373 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2376 break;
2377 case 14:
2378 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 break;
2383 case 16:
2384 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 break;
2389 case 17:
2390 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2394 break;
2395 case 27:
2396 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2399 break;
2400 case 28:
2401 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 break;
2406 case 29:
2407 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2409 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 break;
2412 case 30:
2413 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 break;
2418 default:
2419 gb_tile_moden = 0;
2420 break;
2421 }
2422 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2423 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2424 }
2425 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2426 switch (reg_offset) {
2427 case 0:
2428 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2431 NUM_BANKS(ADDR_SURF_16_BANK));
2432 break;
2433 case 1:
2434 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437 NUM_BANKS(ADDR_SURF_16_BANK));
2438 break;
2439 case 2:
2440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443 NUM_BANKS(ADDR_SURF_16_BANK));
2444 break;
2445 case 3:
2446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 NUM_BANKS(ADDR_SURF_16_BANK));
2450 break;
2451 case 4:
2452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 NUM_BANKS(ADDR_SURF_8_BANK));
2456 break;
2457 case 5:
2458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461 NUM_BANKS(ADDR_SURF_4_BANK));
2462 break;
2463 case 6:
2464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 NUM_BANKS(ADDR_SURF_2_BANK));
2468 break;
2469 case 8:
2470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2473 NUM_BANKS(ADDR_SURF_16_BANK));
2474 break;
2475 case 9:
2476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2479 NUM_BANKS(ADDR_SURF_16_BANK));
2480 break;
2481 case 10:
2482 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485 NUM_BANKS(ADDR_SURF_16_BANK));
2486 break;
2487 case 11:
2488 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491 NUM_BANKS(ADDR_SURF_16_BANK));
2492 break;
2493 case 12:
2494 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497 NUM_BANKS(ADDR_SURF_8_BANK));
2498 break;
2499 case 13:
2500 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503 NUM_BANKS(ADDR_SURF_4_BANK));
2504 break;
2505 case 14:
2506 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2509 NUM_BANKS(ADDR_SURF_2_BANK));
2510 break;
2511 default:
2512 gb_tile_moden = 0;
2513 break;
2514 }
2515 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2516 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2517 }
2518 } else if (num_pipe_configs == 4) {
2519 if (num_rbs == 4) {
2520 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2521 switch (reg_offset) {
2522 case 0:
2523 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2527 break;
2528 case 1:
2529 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2532 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2533 break;
2534 case 2:
2535 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2539 break;
2540 case 3:
2541 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2545 break;
2546 case 4:
2547 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550 TILE_SPLIT(split_equal_to_row_size));
2551 break;
2552 case 5:
2553 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2556 break;
2557 case 6:
2558 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2560 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2562 break;
2563 case 7:
2564 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2567 TILE_SPLIT(split_equal_to_row_size));
2568 break;
2569 case 8:
2570 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2571 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2572 break;
2573 case 9:
2574 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2575 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2577 break;
2578 case 10:
2579 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2581 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 break;
2584 case 11:
2585 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589 break;
2590 case 12:
2591 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2592 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595 break;
2596 case 13:
2597 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2598 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2600 break;
2601 case 14:
2602 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 break;
2607 case 16:
2608 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612 break;
2613 case 17:
2614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2616 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618 break;
2619 case 27:
2620 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2623 break;
2624 case 28:
2625 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 break;
2630 case 29:
2631 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635 break;
2636 case 30:
2637 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2638 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2639 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641 break;
2642 default:
2643 gb_tile_moden = 0;
2644 break;
2645 }
2646 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2647 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2648 }
2649 } else if (num_rbs < 4) {
2650 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2651 switch (reg_offset) {
2652 case 0:
2653 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657 break;
2658 case 1:
2659 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2661 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2663 break;
2664 case 2:
2665 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2668 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669 break;
2670 case 3:
2671 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2675 break;
2676 case 4:
2677 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2679 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2680 TILE_SPLIT(split_equal_to_row_size));
2681 break;
2682 case 5:
2683 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2684 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2686 break;
2687 case 6:
2688 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2692 break;
2693 case 7:
2694 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2696 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697 TILE_SPLIT(split_equal_to_row_size));
2698 break;
2699 case 8:
2700 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2701 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2702 break;
2703 case 9:
2704 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2707 break;
2708 case 10:
2709 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2711 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713 break;
2714 case 11:
2715 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2717 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 break;
2720 case 12:
2721 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2722 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2723 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 break;
2726 case 13:
2727 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2730 break;
2731 case 14:
2732 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 break;
2737 case 16:
2738 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 break;
2743 case 17:
2744 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2745 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748 break;
2749 case 27:
2750 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2753 break;
2754 case 28:
2755 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 break;
2760 case 29:
2761 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2763 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765 break;
2766 case 30:
2767 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2769 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2771 break;
2772 default:
2773 gb_tile_moden = 0;
2774 break;
2775 }
2776 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2777 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2778 }
2779 }
2780 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2781 switch (reg_offset) {
2782 case 0:
2783 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786 NUM_BANKS(ADDR_SURF_16_BANK));
2787 break;
2788 case 1:
2789 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792 NUM_BANKS(ADDR_SURF_16_BANK));
2793 break;
2794 case 2:
2795 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798 NUM_BANKS(ADDR_SURF_16_BANK));
2799 break;
2800 case 3:
2801 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2804 NUM_BANKS(ADDR_SURF_16_BANK));
2805 break;
2806 case 4:
2807 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2810 NUM_BANKS(ADDR_SURF_16_BANK));
2811 break;
2812 case 5:
2813 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816 NUM_BANKS(ADDR_SURF_8_BANK));
2817 break;
2818 case 6:
2819 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2822 NUM_BANKS(ADDR_SURF_4_BANK));
2823 break;
2824 case 8:
2825 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2828 NUM_BANKS(ADDR_SURF_16_BANK));
2829 break;
2830 case 9:
2831 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834 NUM_BANKS(ADDR_SURF_16_BANK));
2835 break;
2836 case 10:
2837 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840 NUM_BANKS(ADDR_SURF_16_BANK));
2841 break;
2842 case 11:
2843 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 NUM_BANKS(ADDR_SURF_16_BANK));
2847 break;
2848 case 12:
2849 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 NUM_BANKS(ADDR_SURF_16_BANK));
2853 break;
2854 case 13:
2855 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2858 NUM_BANKS(ADDR_SURF_8_BANK));
2859 break;
2860 case 14:
2861 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2864 NUM_BANKS(ADDR_SURF_4_BANK));
2865 break;
2866 default:
2867 gb_tile_moden = 0;
2868 break;
2869 }
2870 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2871 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2872 }
2873 } else if (num_pipe_configs == 2) {
2874 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2875 switch (reg_offset) {
2876 case 0:
2877 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879 PIPE_CONFIG(ADDR_SURF_P2) |
2880 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881 break;
2882 case 1:
2883 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2885 PIPE_CONFIG(ADDR_SURF_P2) |
2886 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2887 break;
2888 case 2:
2889 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 PIPE_CONFIG(ADDR_SURF_P2) |
2892 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893 break;
2894 case 3:
2895 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897 PIPE_CONFIG(ADDR_SURF_P2) |
2898 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2899 break;
2900 case 4:
2901 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2903 PIPE_CONFIG(ADDR_SURF_P2) |
2904 TILE_SPLIT(split_equal_to_row_size));
2905 break;
2906 case 5:
2907 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908 PIPE_CONFIG(ADDR_SURF_P2) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 break;
2911 case 6:
2912 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914 PIPE_CONFIG(ADDR_SURF_P2) |
2915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2916 break;
2917 case 7:
2918 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2919 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2920 PIPE_CONFIG(ADDR_SURF_P2) |
2921 TILE_SPLIT(split_equal_to_row_size));
2922 break;
2923 case 8:
2924 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2925 PIPE_CONFIG(ADDR_SURF_P2);
2926 break;
2927 case 9:
2928 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930 PIPE_CONFIG(ADDR_SURF_P2));
2931 break;
2932 case 10:
2933 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935 PIPE_CONFIG(ADDR_SURF_P2) |
2936 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2937 break;
2938 case 11:
2939 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941 PIPE_CONFIG(ADDR_SURF_P2) |
2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 break;
2944 case 12:
2945 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 PIPE_CONFIG(ADDR_SURF_P2) |
2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 break;
2950 case 13:
2951 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952 PIPE_CONFIG(ADDR_SURF_P2) |
2953 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2954 break;
2955 case 14:
2956 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 PIPE_CONFIG(ADDR_SURF_P2) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 break;
2961 case 16:
2962 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964 PIPE_CONFIG(ADDR_SURF_P2) |
2965 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966 break;
2967 case 17:
2968 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2969 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970 PIPE_CONFIG(ADDR_SURF_P2) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972 break;
2973 case 27:
2974 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2975 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976 PIPE_CONFIG(ADDR_SURF_P2));
2977 break;
2978 case 28:
2979 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981 PIPE_CONFIG(ADDR_SURF_P2) |
2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 break;
2984 case 29:
2985 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2987 PIPE_CONFIG(ADDR_SURF_P2) |
2988 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2989 break;
2990 case 30:
2991 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2992 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2993 PIPE_CONFIG(ADDR_SURF_P2) |
2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2995 break;
2996 default:
2997 gb_tile_moden = 0;
2998 break;
2999 }
3000 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3001 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3002 }
3003 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3004 switch (reg_offset) {
3005 case 0:
3006 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3007 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3008 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009 NUM_BANKS(ADDR_SURF_16_BANK));
3010 break;
3011 case 1:
3012 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015 NUM_BANKS(ADDR_SURF_16_BANK));
3016 break;
3017 case 2:
3018 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021 NUM_BANKS(ADDR_SURF_16_BANK));
3022 break;
3023 case 3:
3024 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 NUM_BANKS(ADDR_SURF_16_BANK));
3028 break;
3029 case 4:
3030 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3032 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033 NUM_BANKS(ADDR_SURF_16_BANK));
3034 break;
3035 case 5:
3036 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 NUM_BANKS(ADDR_SURF_16_BANK));
3040 break;
3041 case 6:
3042 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3043 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3044 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3045 NUM_BANKS(ADDR_SURF_8_BANK));
3046 break;
3047 case 8:
3048 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3051 NUM_BANKS(ADDR_SURF_16_BANK));
3052 break;
3053 case 9:
3054 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057 NUM_BANKS(ADDR_SURF_16_BANK));
3058 break;
3059 case 10:
3060 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3061 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3062 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3063 NUM_BANKS(ADDR_SURF_16_BANK));
3064 break;
3065 case 11:
3066 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069 NUM_BANKS(ADDR_SURF_16_BANK));
3070 break;
3071 case 12:
3072 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3075 NUM_BANKS(ADDR_SURF_16_BANK));
3076 break;
3077 case 13:
3078 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3081 NUM_BANKS(ADDR_SURF_16_BANK));
3082 break;
3083 case 14:
3084 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3087 NUM_BANKS(ADDR_SURF_8_BANK));
3088 break;
3089 default:
3090 gb_tile_moden = 0;
3091 break;
3092 }
3093 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3094 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3095 }
3096 } else
3097 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3098 }
3099
3100 /**
3101 * cik_select_se_sh - select which SE, SH to address
3102 *
3103 * @rdev: radeon_device pointer
3104 * @se_num: shader engine to address
3105 * @sh_num: sh block to address
3106 *
3107 * Select which SE, SH combinations to address. Certain
3108 * registers are instanced per SE or SH. 0xffffffff means
3109 * broadcast to all SEs or SHs (CIK).
3110 */
3111 static void cik_select_se_sh(struct radeon_device *rdev,
3112 u32 se_num, u32 sh_num)
3113 {
3114 u32 data = INSTANCE_BROADCAST_WRITES;
3115
3116 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3117 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3118 else if (se_num == 0xffffffff)
3119 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3120 else if (sh_num == 0xffffffff)
3121 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3122 else
3123 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3124 WREG32(GRBM_GFX_INDEX, data);
3125 }
3126
3127 /**
3128 * cik_create_bitmask - create a bitmask
3129 *
3130 * @bit_width: length of the mask
3131 *
3132 * create a variable length bit mask (CIK).
3133 * Returns the bitmask.
3134 */
3135 static u32 cik_create_bitmask(u32 bit_width)
3136 {
3137 u32 i, mask = 0;
3138
3139 for (i = 0; i < bit_width; i++) {
3140 mask <<= 1;
3141 mask |= 1;
3142 }
3143 return mask;
3144 }
3145
3146 /**
3147 * cik_get_rb_disabled - computes the mask of disabled RBs
3148 *
3149 * @rdev: radeon_device pointer
3150 * @max_rb_num: max RBs (render backends) for the asic
3151 * @se_num: number of SEs (shader engines) for the asic
3152 * @sh_per_se: number of SH blocks per SE for the asic
3153 *
3154 * Calculates the bitmask of disabled RBs (CIK).
3155 * Returns the disabled RB bitmask.
3156 */
3157 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3158 u32 max_rb_num_per_se,
3159 u32 sh_per_se)
3160 {
3161 u32 data, mask;
3162
3163 data = RREG32(CC_RB_BACKEND_DISABLE);
3164 if (data & 1)
3165 data &= BACKEND_DISABLE_MASK;
3166 else
3167 data = 0;
3168 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3169
3170 data >>= BACKEND_DISABLE_SHIFT;
3171
3172 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3173
3174 return data & mask;
3175 }
3176
3177 /**
3178 * cik_setup_rb - setup the RBs on the asic
3179 *
3180 * @rdev: radeon_device pointer
3181 * @se_num: number of SEs (shader engines) for the asic
3182 * @sh_per_se: number of SH blocks per SE for the asic
3183 * @max_rb_num: max RBs (render backends) for the asic
3184 *
3185 * Configures per-SE/SH RB registers (CIK).
3186 */
3187 static void cik_setup_rb(struct radeon_device *rdev,
3188 u32 se_num, u32 sh_per_se,
3189 u32 max_rb_num_per_se)
3190 {
3191 int i, j;
3192 u32 data, mask;
3193 u32 disabled_rbs = 0;
3194 u32 enabled_rbs = 0;
3195
3196 for (i = 0; i < se_num; i++) {
3197 for (j = 0; j < sh_per_se; j++) {
3198 cik_select_se_sh(rdev, i, j);
3199 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3200 if (rdev->family == CHIP_HAWAII)
3201 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3202 else
3203 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3204 }
3205 }
3206 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3207
3208 mask = 1;
3209 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3210 if (!(disabled_rbs & mask))
3211 enabled_rbs |= mask;
3212 mask <<= 1;
3213 }
3214
3215 rdev->config.cik.backend_enable_mask = enabled_rbs;
3216
3217 for (i = 0; i < se_num; i++) {
3218 cik_select_se_sh(rdev, i, 0xffffffff);
3219 data = 0;
3220 for (j = 0; j < sh_per_se; j++) {
3221 switch (enabled_rbs & 3) {
3222 case 0:
3223 if (j == 0)
3224 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3225 else
3226 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3227 break;
3228 case 1:
3229 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3230 break;
3231 case 2:
3232 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3233 break;
3234 case 3:
3235 default:
3236 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3237 break;
3238 }
3239 enabled_rbs >>= 2;
3240 }
3241 WREG32(PA_SC_RASTER_CONFIG, data);
3242 }
3243 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3244 }
3245
3246 /**
3247 * cik_gpu_init - setup the 3D engine
3248 *
3249 * @rdev: radeon_device pointer
3250 *
3251 * Configures the 3D engine and tiling configuration
3252 * registers so that the 3D engine is usable.
3253 */
3254 static void cik_gpu_init(struct radeon_device *rdev)
3255 {
3256 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3257 u32 mc_shared_chmap, mc_arb_ramcfg;
3258 u32 hdp_host_path_cntl;
3259 u32 tmp;
3260 int i, j;
3261
3262 switch (rdev->family) {
3263 case CHIP_BONAIRE:
3264 rdev->config.cik.max_shader_engines = 2;
3265 rdev->config.cik.max_tile_pipes = 4;
3266 rdev->config.cik.max_cu_per_sh = 7;
3267 rdev->config.cik.max_sh_per_se = 1;
3268 rdev->config.cik.max_backends_per_se = 2;
3269 rdev->config.cik.max_texture_channel_caches = 4;
3270 rdev->config.cik.max_gprs = 256;
3271 rdev->config.cik.max_gs_threads = 32;
3272 rdev->config.cik.max_hw_contexts = 8;
3273
3274 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3275 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3276 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3277 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3278 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3279 break;
3280 case CHIP_HAWAII:
3281 rdev->config.cik.max_shader_engines = 4;
3282 rdev->config.cik.max_tile_pipes = 16;
3283 rdev->config.cik.max_cu_per_sh = 11;
3284 rdev->config.cik.max_sh_per_se = 1;
3285 rdev->config.cik.max_backends_per_se = 4;
3286 rdev->config.cik.max_texture_channel_caches = 16;
3287 rdev->config.cik.max_gprs = 256;
3288 rdev->config.cik.max_gs_threads = 32;
3289 rdev->config.cik.max_hw_contexts = 8;
3290
3291 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3292 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3293 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3294 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3295 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3296 break;
3297 case CHIP_KAVERI:
3298 rdev->config.cik.max_shader_engines = 1;
3299 rdev->config.cik.max_tile_pipes = 4;
3300 if ((rdev->pdev->device == 0x1304) ||
3301 (rdev->pdev->device == 0x1305) ||
3302 (rdev->pdev->device == 0x130C) ||
3303 (rdev->pdev->device == 0x130F) ||
3304 (rdev->pdev->device == 0x1310) ||
3305 (rdev->pdev->device == 0x1311) ||
3306 (rdev->pdev->device == 0x131C)) {
3307 rdev->config.cik.max_cu_per_sh = 8;
3308 rdev->config.cik.max_backends_per_se = 2;
3309 } else if ((rdev->pdev->device == 0x1309) ||
3310 (rdev->pdev->device == 0x130A) ||
3311 (rdev->pdev->device == 0x130D) ||
3312 (rdev->pdev->device == 0x1313) ||
3313 (rdev->pdev->device == 0x131D)) {
3314 rdev->config.cik.max_cu_per_sh = 6;
3315 rdev->config.cik.max_backends_per_se = 2;
3316 } else if ((rdev->pdev->device == 0x1306) ||
3317 (rdev->pdev->device == 0x1307) ||
3318 (rdev->pdev->device == 0x130B) ||
3319 (rdev->pdev->device == 0x130E) ||
3320 (rdev->pdev->device == 0x1315) ||
3321 (rdev->pdev->device == 0x131B)) {
3322 rdev->config.cik.max_cu_per_sh = 4;
3323 rdev->config.cik.max_backends_per_se = 1;
3324 } else {
3325 rdev->config.cik.max_cu_per_sh = 3;
3326 rdev->config.cik.max_backends_per_se = 1;
3327 }
3328 rdev->config.cik.max_sh_per_se = 1;
3329 rdev->config.cik.max_texture_channel_caches = 4;
3330 rdev->config.cik.max_gprs = 256;
3331 rdev->config.cik.max_gs_threads = 16;
3332 rdev->config.cik.max_hw_contexts = 8;
3333
3334 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3335 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3336 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3337 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3338 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3339 break;
3340 case CHIP_KABINI:
3341 case CHIP_MULLINS:
3342 default:
3343 rdev->config.cik.max_shader_engines = 1;
3344 rdev->config.cik.max_tile_pipes = 2;
3345 rdev->config.cik.max_cu_per_sh = 2;
3346 rdev->config.cik.max_sh_per_se = 1;
3347 rdev->config.cik.max_backends_per_se = 1;
3348 rdev->config.cik.max_texture_channel_caches = 2;
3349 rdev->config.cik.max_gprs = 256;
3350 rdev->config.cik.max_gs_threads = 16;
3351 rdev->config.cik.max_hw_contexts = 8;
3352
3353 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3354 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3355 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3356 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3357 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3358 break;
3359 }
3360
3361 /* Initialize HDP */
3362 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3363 WREG32((0x2c14 + j), 0x00000000);
3364 WREG32((0x2c18 + j), 0x00000000);
3365 WREG32((0x2c1c + j), 0x00000000);
3366 WREG32((0x2c20 + j), 0x00000000);
3367 WREG32((0x2c24 + j), 0x00000000);
3368 }
3369
3370 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3371
3372 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3373
3374 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3375 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3376
3377 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3378 rdev->config.cik.mem_max_burst_length_bytes = 256;
3379 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3380 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3381 if (rdev->config.cik.mem_row_size_in_kb > 4)
3382 rdev->config.cik.mem_row_size_in_kb = 4;
3383 /* XXX use MC settings? */
3384 rdev->config.cik.shader_engine_tile_size = 32;
3385 rdev->config.cik.num_gpus = 1;
3386 rdev->config.cik.multi_gpu_tile_size = 64;
3387
3388 /* fix up row size */
3389 gb_addr_config &= ~ROW_SIZE_MASK;
3390 switch (rdev->config.cik.mem_row_size_in_kb) {
3391 case 1:
3392 default:
3393 gb_addr_config |= ROW_SIZE(0);
3394 break;
3395 case 2:
3396 gb_addr_config |= ROW_SIZE(1);
3397 break;
3398 case 4:
3399 gb_addr_config |= ROW_SIZE(2);
3400 break;
3401 }
3402
3403 /* setup tiling info dword. gb_addr_config is not adequate since it does
3404 * not have bank info, so create a custom tiling dword.
3405 * bits 3:0 num_pipes
3406 * bits 7:4 num_banks
3407 * bits 11:8 group_size
3408 * bits 15:12 row_size
3409 */
3410 rdev->config.cik.tile_config = 0;
3411 switch (rdev->config.cik.num_tile_pipes) {
3412 case 1:
3413 rdev->config.cik.tile_config |= (0 << 0);
3414 break;
3415 case 2:
3416 rdev->config.cik.tile_config |= (1 << 0);
3417 break;
3418 case 4:
3419 rdev->config.cik.tile_config |= (2 << 0);
3420 break;
3421 case 8:
3422 default:
3423 /* XXX what about 12? */
3424 rdev->config.cik.tile_config |= (3 << 0);
3425 break;
3426 }
3427 rdev->config.cik.tile_config |=
3428 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3429 rdev->config.cik.tile_config |=
3430 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3431 rdev->config.cik.tile_config |=
3432 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3433
3434 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3435 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3436 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3437 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3438 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3439 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3440 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3441 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3442
3443 cik_tiling_mode_table_init(rdev);
3444
3445 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3446 rdev->config.cik.max_sh_per_se,
3447 rdev->config.cik.max_backends_per_se);
3448
3449 /* set HW defaults for 3D engine */
3450 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3451
3452 WREG32(SX_DEBUG_1, 0x20);
3453
3454 WREG32(TA_CNTL_AUX, 0x00010000);
3455
3456 tmp = RREG32(SPI_CONFIG_CNTL);
3457 tmp |= 0x03000000;
3458 WREG32(SPI_CONFIG_CNTL, tmp);
3459
3460 WREG32(SQ_CONFIG, 1);
3461
3462 WREG32(DB_DEBUG, 0);
3463
3464 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3465 tmp |= 0x00000400;
3466 WREG32(DB_DEBUG2, tmp);
3467
3468 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3469 tmp |= 0x00020200;
3470 WREG32(DB_DEBUG3, tmp);
3471
3472 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3473 tmp |= 0x00018208;
3474 WREG32(CB_HW_CONTROL, tmp);
3475
3476 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3477
3478 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3479 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3480 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3481 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3482
3483 WREG32(VGT_NUM_INSTANCES, 1);
3484
3485 WREG32(CP_PERFMON_CNTL, 0);
3486
3487 WREG32(SQ_CONFIG, 0);
3488
3489 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3490 FORCE_EOV_MAX_REZ_CNT(255)));
3491
3492 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3493 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3494
3495 WREG32(VGT_GS_VERTEX_REUSE, 16);
3496 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3497
3498 tmp = RREG32(HDP_MISC_CNTL);
3499 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3500 WREG32(HDP_MISC_CNTL, tmp);
3501
3502 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3503 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3504
3505 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3506 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3507
3508 udelay(50);
3509 }
3510
3511 /*
3512 * GPU scratch registers helpers function.
3513 */
3514 /**
3515 * cik_scratch_init - setup driver info for CP scratch regs
3516 *
3517 * @rdev: radeon_device pointer
3518 *
3519 * Set up the number and offset of the CP scratch registers.
3520 * NOTE: use of CP scratch registers is a legacy inferface and
3521 * is not used by default on newer asics (r6xx+). On newer asics,
3522 * memory buffers are used for fences rather than scratch regs.
3523 */
3524 static void cik_scratch_init(struct radeon_device *rdev)
3525 {
3526 int i;
3527
3528 rdev->scratch.num_reg = 7;
3529 rdev->scratch.reg_base = SCRATCH_REG0;
3530 for (i = 0; i < rdev->scratch.num_reg; i++) {
3531 rdev->scratch.free[i] = true;
3532 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3533 }
3534 }
3535
3536 /**
3537 * cik_ring_test - basic gfx ring test
3538 *
3539 * @rdev: radeon_device pointer
3540 * @ring: radeon_ring structure holding ring information
3541 *
3542 * Allocate a scratch register and write to it using the gfx ring (CIK).
3543 * Provides a basic gfx ring test to verify that the ring is working.
3544 * Used by cik_cp_gfx_resume();
3545 * Returns 0 on success, error on failure.
3546 */
3547 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3548 {
3549 uint32_t scratch;
3550 uint32_t tmp = 0;
3551 unsigned i;
3552 int r;
3553
3554 r = radeon_scratch_get(rdev, &scratch);
3555 if (r) {
3556 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3557 return r;
3558 }
3559 WREG32(scratch, 0xCAFEDEAD);
3560 r = radeon_ring_lock(rdev, ring, 3);
3561 if (r) {
3562 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3563 radeon_scratch_free(rdev, scratch);
3564 return r;
3565 }
3566 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3567 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3568 radeon_ring_write(ring, 0xDEADBEEF);
3569 radeon_ring_unlock_commit(rdev, ring);
3570
3571 for (i = 0; i < rdev->usec_timeout; i++) {
3572 tmp = RREG32(scratch);
3573 if (tmp == 0xDEADBEEF)
3574 break;
3575 DRM_UDELAY(1);
3576 }
3577 if (i < rdev->usec_timeout) {
3578 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3579 } else {
3580 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3581 ring->idx, scratch, tmp);
3582 r = -EINVAL;
3583 }
3584 radeon_scratch_free(rdev, scratch);
3585 return r;
3586 }
3587
3588 /**
3589 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3590 *
3591 * @rdev: radeon_device pointer
3592 * @ridx: radeon ring index
3593 *
3594 * Emits an hdp flush on the cp.
3595 */
3596 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3597 int ridx)
3598 {
3599 struct radeon_ring *ring = &rdev->ring[ridx];
3600 u32 ref_and_mask;
3601
3602 switch (ring->idx) {
3603 case CAYMAN_RING_TYPE_CP1_INDEX:
3604 case CAYMAN_RING_TYPE_CP2_INDEX:
3605 default:
3606 switch (ring->me) {
3607 case 0:
3608 ref_and_mask = CP2 << ring->pipe;
3609 break;
3610 case 1:
3611 ref_and_mask = CP6 << ring->pipe;
3612 break;
3613 default:
3614 return;
3615 }
3616 break;
3617 case RADEON_RING_TYPE_GFX_INDEX:
3618 ref_and_mask = CP0;
3619 break;
3620 }
3621
3622 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3623 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3624 WAIT_REG_MEM_FUNCTION(3) | /* == */
3625 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3626 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3627 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3628 radeon_ring_write(ring, ref_and_mask);
3629 radeon_ring_write(ring, ref_and_mask);
3630 radeon_ring_write(ring, 0x20); /* poll interval */
3631 }
3632
3633 /**
3634 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3635 *
3636 * @rdev: radeon_device pointer
3637 * @fence: radeon fence object
3638 *
3639 * Emits a fence sequnce number on the gfx ring and flushes
3640 * GPU caches.
3641 */
3642 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3643 struct radeon_fence *fence)
3644 {
3645 struct radeon_ring *ring = &rdev->ring[fence->ring];
3646 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3647
3648 /* EVENT_WRITE_EOP - flush caches, send int */
3649 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3650 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3651 EOP_TC_ACTION_EN |
3652 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3653 EVENT_INDEX(5)));
3654 radeon_ring_write(ring, addr & 0xfffffffc);
3655 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3656 radeon_ring_write(ring, fence->seq);
3657 radeon_ring_write(ring, 0);
3658 /* HDP flush */
3659 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3660 }
3661
3662 /**
3663 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3664 *
3665 * @rdev: radeon_device pointer
3666 * @fence: radeon fence object
3667 *
3668 * Emits a fence sequnce number on the compute ring and flushes
3669 * GPU caches.
3670 */
3671 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3672 struct radeon_fence *fence)
3673 {
3674 struct radeon_ring *ring = &rdev->ring[fence->ring];
3675 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3676
3677 /* RELEASE_MEM - flush caches, send int */
3678 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3679 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3680 EOP_TC_ACTION_EN |
3681 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3682 EVENT_INDEX(5)));
3683 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3684 radeon_ring_write(ring, addr & 0xfffffffc);
3685 radeon_ring_write(ring, upper_32_bits(addr));
3686 radeon_ring_write(ring, fence->seq);
3687 radeon_ring_write(ring, 0);
3688 /* HDP flush */
3689 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3690 }
3691
3692 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3693 struct radeon_ring *ring,
3694 struct radeon_semaphore *semaphore,
3695 bool emit_wait)
3696 {
3697 uint64_t addr = semaphore->gpu_addr;
3698 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3699
3700 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3701 radeon_ring_write(ring, lower_32_bits(addr));
3702 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3703
3704 return true;
3705 }
3706
3707 /**
3708 * cik_copy_cpdma - copy pages using the CP DMA engine
3709 *
3710 * @rdev: radeon_device pointer
3711 * @src_offset: src GPU address
3712 * @dst_offset: dst GPU address
3713 * @num_gpu_pages: number of GPU pages to xfer
3714 * @fence: radeon fence object
3715 *
3716 * Copy GPU paging using the CP DMA engine (CIK+).
3717 * Used by the radeon ttm implementation to move pages if
3718 * registered as the asic copy callback.
3719 */
3720 int cik_copy_cpdma(struct radeon_device *rdev,
3721 uint64_t src_offset, uint64_t dst_offset,
3722 unsigned num_gpu_pages,
3723 struct radeon_fence **fence)
3724 {
3725 struct radeon_semaphore *sem = NULL;
3726 int ring_index = rdev->asic->copy.blit_ring_index;
3727 struct radeon_ring *ring = &rdev->ring[ring_index];
3728 u32 size_in_bytes, cur_size_in_bytes, control;
3729 int i, num_loops;
3730 int r = 0;
3731
3732 r = radeon_semaphore_create(rdev, &sem);
3733 if (r) {
3734 DRM_ERROR("radeon: moving bo (%d).\n", r);
3735 return r;
3736 }
3737
3738 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3739 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3740 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3741 if (r) {
3742 DRM_ERROR("radeon: moving bo (%d).\n", r);
3743 radeon_semaphore_free(rdev, &sem, NULL);
3744 return r;
3745 }
3746
3747 radeon_semaphore_sync_to(sem, *fence);
3748 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3749
3750 for (i = 0; i < num_loops; i++) {
3751 cur_size_in_bytes = size_in_bytes;
3752 if (cur_size_in_bytes > 0x1fffff)
3753 cur_size_in_bytes = 0x1fffff;
3754 size_in_bytes -= cur_size_in_bytes;
3755 control = 0;
3756 if (size_in_bytes == 0)
3757 control |= PACKET3_DMA_DATA_CP_SYNC;
3758 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3759 radeon_ring_write(ring, control);
3760 radeon_ring_write(ring, lower_32_bits(src_offset));
3761 radeon_ring_write(ring, upper_32_bits(src_offset));
3762 radeon_ring_write(ring, lower_32_bits(dst_offset));
3763 radeon_ring_write(ring, upper_32_bits(dst_offset));
3764 radeon_ring_write(ring, cur_size_in_bytes);
3765 src_offset += cur_size_in_bytes;
3766 dst_offset += cur_size_in_bytes;
3767 }
3768
3769 r = radeon_fence_emit(rdev, fence, ring->idx);
3770 if (r) {
3771 radeon_ring_unlock_undo(rdev, ring);
3772 radeon_semaphore_free(rdev, &sem, NULL);
3773 return r;
3774 }
3775
3776 radeon_ring_unlock_commit(rdev, ring);
3777 radeon_semaphore_free(rdev, &sem, *fence);
3778
3779 return r;
3780 }
3781
3782 /*
3783 * IB stuff
3784 */
3785 /**
3786 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3787 *
3788 * @rdev: radeon_device pointer
3789 * @ib: radeon indirect buffer object
3790 *
3791 * Emits an DE (drawing engine) or CE (constant engine) IB
3792 * on the gfx ring. IBs are usually generated by userspace
3793 * acceleration drivers and submitted to the kernel for
3794 * sheduling on the ring. This function schedules the IB
3795 * on the gfx ring for execution by the GPU.
3796 */
3797 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3798 {
3799 struct radeon_ring *ring = &rdev->ring[ib->ring];
3800 u32 header, control = INDIRECT_BUFFER_VALID;
3801
3802 if (ib->is_const_ib) {
3803 /* set switch buffer packet before const IB */
3804 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3805 radeon_ring_write(ring, 0);
3806
3807 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3808 } else {
3809 u32 next_rptr;
3810 if (ring->rptr_save_reg) {
3811 next_rptr = ring->wptr + 3 + 4;
3812 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3813 radeon_ring_write(ring, ((ring->rptr_save_reg -
3814 PACKET3_SET_UCONFIG_REG_START) >> 2));
3815 radeon_ring_write(ring, next_rptr);
3816 } else if (rdev->wb.enabled) {
3817 next_rptr = ring->wptr + 5 + 4;
3818 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3819 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3820 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3821 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3822 radeon_ring_write(ring, next_rptr);
3823 }
3824
3825 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3826 }
3827
3828 control |= ib->length_dw |
3829 (ib->vm ? (ib->vm->id << 24) : 0);
3830
3831 radeon_ring_write(ring, header);
3832 radeon_ring_write(ring,
3833 #ifdef __BIG_ENDIAN
3834 (2 << 0) |
3835 #endif
3836 (ib->gpu_addr & 0xFFFFFFFC));
3837 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3838 radeon_ring_write(ring, control);
3839 }
3840
3841 /**
3842 * cik_ib_test - basic gfx ring IB test
3843 *
3844 * @rdev: radeon_device pointer
3845 * @ring: radeon_ring structure holding ring information
3846 *
3847 * Allocate an IB and execute it on the gfx ring (CIK).
3848 * Provides a basic gfx ring test to verify that IBs are working.
3849 * Returns 0 on success, error on failure.
3850 */
3851 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3852 {
3853 struct radeon_ib ib;
3854 uint32_t scratch;
3855 uint32_t tmp = 0;
3856 unsigned i;
3857 int r;
3858
3859 r = radeon_scratch_get(rdev, &scratch);
3860 if (r) {
3861 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3862 return r;
3863 }
3864 WREG32(scratch, 0xCAFEDEAD);
3865 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3866 if (r) {
3867 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3868 radeon_scratch_free(rdev, scratch);
3869 return r;
3870 }
3871 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3872 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3873 ib.ptr[2] = 0xDEADBEEF;
3874 ib.length_dw = 3;
3875 r = radeon_ib_schedule(rdev, &ib, NULL);
3876 if (r) {
3877 radeon_scratch_free(rdev, scratch);
3878 radeon_ib_free(rdev, &ib);
3879 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3880 return r;
3881 }
3882 r = radeon_fence_wait(ib.fence, false);
3883 if (r) {
3884 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3885 radeon_scratch_free(rdev, scratch);
3886 radeon_ib_free(rdev, &ib);
3887 return r;
3888 }
3889 for (i = 0; i < rdev->usec_timeout; i++) {
3890 tmp = RREG32(scratch);
3891 if (tmp == 0xDEADBEEF)
3892 break;
3893 DRM_UDELAY(1);
3894 }
3895 if (i < rdev->usec_timeout) {
3896 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3897 } else {
3898 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3899 scratch, tmp);
3900 r = -EINVAL;
3901 }
3902 radeon_scratch_free(rdev, scratch);
3903 radeon_ib_free(rdev, &ib);
3904 return r;
3905 }
3906
3907 /*
3908 * CP.
3909 * On CIK, gfx and compute now have independant command processors.
3910 *
3911 * GFX
3912 * Gfx consists of a single ring and can process both gfx jobs and
3913 * compute jobs. The gfx CP consists of three microengines (ME):
3914 * PFP - Pre-Fetch Parser
3915 * ME - Micro Engine
3916 * CE - Constant Engine
3917 * The PFP and ME make up what is considered the Drawing Engine (DE).
3918 * The CE is an asynchronous engine used for updating buffer desciptors
3919 * used by the DE so that they can be loaded into cache in parallel
3920 * while the DE is processing state update packets.
3921 *
3922 * Compute
3923 * The compute CP consists of two microengines (ME):
3924 * MEC1 - Compute MicroEngine 1
3925 * MEC2 - Compute MicroEngine 2
3926 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3927 * The queues are exposed to userspace and are programmed directly
3928 * by the compute runtime.
3929 */
3930 /**
3931 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3932 *
3933 * @rdev: radeon_device pointer
3934 * @enable: enable or disable the MEs
3935 *
3936 * Halts or unhalts the gfx MEs.
3937 */
3938 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3939 {
3940 if (enable)
3941 WREG32(CP_ME_CNTL, 0);
3942 else {
3943 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3944 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3945 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3946 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3947 }
3948 udelay(50);
3949 }
3950
3951 /**
3952 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3953 *
3954 * @rdev: radeon_device pointer
3955 *
3956 * Loads the gfx PFP, ME, and CE ucode.
3957 * Returns 0 for success, -EINVAL if the ucode is not available.
3958 */
3959 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3960 {
3961 const __be32 *fw_data;
3962 int i;
3963
3964 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3965 return -EINVAL;
3966
3967 cik_cp_gfx_enable(rdev, false);
3968
3969 /* PFP */
3970 fw_data = (const __be32 *)rdev->pfp_fw->data;
3971 WREG32(CP_PFP_UCODE_ADDR, 0);
3972 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3973 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3974 WREG32(CP_PFP_UCODE_ADDR, 0);
3975
3976 /* CE */
3977 fw_data = (const __be32 *)rdev->ce_fw->data;
3978 WREG32(CP_CE_UCODE_ADDR, 0);
3979 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3980 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3981 WREG32(CP_CE_UCODE_ADDR, 0);
3982
3983 /* ME */
3984 fw_data = (const __be32 *)rdev->me_fw->data;
3985 WREG32(CP_ME_RAM_WADDR, 0);
3986 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3987 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3988 WREG32(CP_ME_RAM_WADDR, 0);
3989
3990 WREG32(CP_PFP_UCODE_ADDR, 0);
3991 WREG32(CP_CE_UCODE_ADDR, 0);
3992 WREG32(CP_ME_RAM_WADDR, 0);
3993 WREG32(CP_ME_RAM_RADDR, 0);
3994 return 0;
3995 }
3996
3997 /**
3998 * cik_cp_gfx_start - start the gfx ring
3999 *
4000 * @rdev: radeon_device pointer
4001 *
4002 * Enables the ring and loads the clear state context and other
4003 * packets required to init the ring.
4004 * Returns 0 for success, error for failure.
4005 */
4006 static int cik_cp_gfx_start(struct radeon_device *rdev)
4007 {
4008 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4009 int r, i;
4010
4011 /* init the CP */
4012 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4013 WREG32(CP_ENDIAN_SWAP, 0);
4014 WREG32(CP_DEVICE_ID, 1);
4015
4016 cik_cp_gfx_enable(rdev, true);
4017
4018 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4019 if (r) {
4020 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4021 return r;
4022 }
4023
4024 /* init the CE partitions. CE only used for gfx on CIK */
4025 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4026 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4027 radeon_ring_write(ring, 0xc000);
4028 radeon_ring_write(ring, 0xc000);
4029
4030 /* setup clear context state */
4031 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4032 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4033
4034 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4035 radeon_ring_write(ring, 0x80000000);
4036 radeon_ring_write(ring, 0x80000000);
4037
4038 for (i = 0; i < cik_default_size; i++)
4039 radeon_ring_write(ring, cik_default_state[i]);
4040
4041 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4042 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4043
4044 /* set clear context state */
4045 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4046 radeon_ring_write(ring, 0);
4047
4048 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4049 radeon_ring_write(ring, 0x00000316);
4050 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4051 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4052
4053 radeon_ring_unlock_commit(rdev, ring);
4054
4055 return 0;
4056 }
4057
4058 /**
4059 * cik_cp_gfx_fini - stop the gfx ring
4060 *
4061 * @rdev: radeon_device pointer
4062 *
4063 * Stop the gfx ring and tear down the driver ring
4064 * info.
4065 */
4066 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4067 {
4068 cik_cp_gfx_enable(rdev, false);
4069 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4070 }
4071
4072 /**
4073 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4074 *
4075 * @rdev: radeon_device pointer
4076 *
4077 * Program the location and size of the gfx ring buffer
4078 * and test it to make sure it's working.
4079 * Returns 0 for success, error for failure.
4080 */
4081 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4082 {
4083 struct radeon_ring *ring;
4084 u32 tmp;
4085 u32 rb_bufsz;
4086 u64 rb_addr;
4087 int r;
4088
4089 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4090 if (rdev->family != CHIP_HAWAII)
4091 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4092
4093 /* Set the write pointer delay */
4094 WREG32(CP_RB_WPTR_DELAY, 0);
4095
4096 /* set the RB to use vmid 0 */
4097 WREG32(CP_RB_VMID, 0);
4098
4099 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4100
4101 /* ring 0 - compute and gfx */
4102 /* Set ring buffer size */
4103 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4104 rb_bufsz = order_base_2(ring->ring_size / 8);
4105 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4106 #ifdef __BIG_ENDIAN
4107 tmp |= BUF_SWAP_32BIT;
4108 #endif
4109 WREG32(CP_RB0_CNTL, tmp);
4110
4111 /* Initialize the ring buffer's read and write pointers */
4112 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4113 ring->wptr = 0;
4114 WREG32(CP_RB0_WPTR, ring->wptr);
4115
4116 /* set the wb address wether it's enabled or not */
4117 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4118 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4119
4120 /* scratch register shadowing is no longer supported */
4121 WREG32(SCRATCH_UMSK, 0);
4122
4123 if (!rdev->wb.enabled)
4124 tmp |= RB_NO_UPDATE;
4125
4126 mdelay(1);
4127 WREG32(CP_RB0_CNTL, tmp);
4128
4129 rb_addr = ring->gpu_addr >> 8;
4130 WREG32(CP_RB0_BASE, rb_addr);
4131 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4132
4133 /* start the ring */
4134 cik_cp_gfx_start(rdev);
4135 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4136 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4137 if (r) {
4138 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4139 return r;
4140 }
4141
4142 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4143 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4144
4145 return 0;
4146 }
4147
4148 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4149 struct radeon_ring *ring)
4150 {
4151 u32 rptr;
4152
4153 if (rdev->wb.enabled)
4154 rptr = rdev->wb.wb[ring->rptr_offs/4];
4155 else
4156 rptr = RREG32(CP_RB0_RPTR);
4157
4158 return rptr;
4159 }
4160
4161 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4162 struct radeon_ring *ring)
4163 {
4164 u32 wptr;
4165
4166 wptr = RREG32(CP_RB0_WPTR);
4167
4168 return wptr;
4169 }
4170
4171 void cik_gfx_set_wptr(struct radeon_device *rdev,
4172 struct radeon_ring *ring)
4173 {
4174 WREG32(CP_RB0_WPTR, ring->wptr);
4175 (void)RREG32(CP_RB0_WPTR);
4176 }
4177
4178 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4179 struct radeon_ring *ring)
4180 {
4181 u32 rptr;
4182
4183 if (rdev->wb.enabled) {
4184 rptr = rdev->wb.wb[ring->rptr_offs/4];
4185 } else {
4186 mutex_lock(&rdev->srbm_mutex);
4187 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4188 rptr = RREG32(CP_HQD_PQ_RPTR);
4189 cik_srbm_select(rdev, 0, 0, 0, 0);
4190 mutex_unlock(&rdev->srbm_mutex);
4191 }
4192
4193 return rptr;
4194 }
4195
4196 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4197 struct radeon_ring *ring)
4198 {
4199 u32 wptr;
4200
4201 if (rdev->wb.enabled) {
4202 /* XXX check if swapping is necessary on BE */
4203 wptr = rdev->wb.wb[ring->wptr_offs/4];
4204 } else {
4205 mutex_lock(&rdev->srbm_mutex);
4206 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4207 wptr = RREG32(CP_HQD_PQ_WPTR);
4208 cik_srbm_select(rdev, 0, 0, 0, 0);
4209 mutex_unlock(&rdev->srbm_mutex);
4210 }
4211
4212 return wptr;
4213 }
4214
4215 void cik_compute_set_wptr(struct radeon_device *rdev,
4216 struct radeon_ring *ring)
4217 {
4218 /* XXX check if swapping is necessary on BE */
4219 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4220 WDOORBELL32(ring->doorbell_index, ring->wptr);
4221 }
4222
4223 /**
4224 * cik_cp_compute_enable - enable/disable the compute CP MEs
4225 *
4226 * @rdev: radeon_device pointer
4227 * @enable: enable or disable the MEs
4228 *
4229 * Halts or unhalts the compute MEs.
4230 */
4231 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232 {
4233 if (enable)
4234 WREG32(CP_MEC_CNTL, 0);
4235 else {
4236 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4237 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4238 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4239 }
4240 udelay(50);
4241 }
4242
4243 /**
4244 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4245 *
4246 * @rdev: radeon_device pointer
4247 *
4248 * Loads the compute MEC1&2 ucode.
4249 * Returns 0 for success, -EINVAL if the ucode is not available.
4250 */
4251 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4252 {
4253 const __be32 *fw_data;
4254 int i;
4255
4256 if (!rdev->mec_fw)
4257 return -EINVAL;
4258
4259 cik_cp_compute_enable(rdev, false);
4260
4261 /* MEC1 */
4262 fw_data = (const __be32 *)rdev->mec_fw->data;
4263 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4264 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4265 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4266 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4267
4268 if (rdev->family == CHIP_KAVERI) {
4269 /* MEC2 */
4270 fw_data = (const __be32 *)rdev->mec_fw->data;
4271 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4272 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4273 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4274 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4275 }
4276
4277 return 0;
4278 }
4279
4280 /**
4281 * cik_cp_compute_start - start the compute queues
4282 *
4283 * @rdev: radeon_device pointer
4284 *
4285 * Enable the compute queues.
4286 * Returns 0 for success, error for failure.
4287 */
4288 static int cik_cp_compute_start(struct radeon_device *rdev)
4289 {
4290 cik_cp_compute_enable(rdev, true);
4291
4292 return 0;
4293 }
4294
4295 /**
4296 * cik_cp_compute_fini - stop the compute queues
4297 *
4298 * @rdev: radeon_device pointer
4299 *
4300 * Stop the compute queues and tear down the driver queue
4301 * info.
4302 */
4303 static void cik_cp_compute_fini(struct radeon_device *rdev)
4304 {
4305 int i, idx, r;
4306
4307 cik_cp_compute_enable(rdev, false);
4308
4309 for (i = 0; i < 2; i++) {
4310 if (i == 0)
4311 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4312 else
4313 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4314
4315 if (rdev->ring[idx].mqd_obj) {
4316 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4317 if (unlikely(r != 0))
4318 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4319
4320 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4321 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4322
4323 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4324 rdev->ring[idx].mqd_obj = NULL;
4325 }
4326 }
4327 }
4328
4329 static void cik_mec_fini(struct radeon_device *rdev)
4330 {
4331 int r;
4332
4333 if (rdev->mec.hpd_eop_obj) {
4334 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4335 if (unlikely(r != 0))
4336 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4337 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4338 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4339
4340 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4341 rdev->mec.hpd_eop_obj = NULL;
4342 }
4343 }
4344
4345 #define MEC_HPD_SIZE 2048
4346
4347 static int cik_mec_init(struct radeon_device *rdev)
4348 {
4349 int r;
4350 u32 *hpd;
4351
4352 /*
4353 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4354 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4355 */
4356 if (rdev->family == CHIP_KAVERI)
4357 rdev->mec.num_mec = 2;
4358 else
4359 rdev->mec.num_mec = 1;
4360 rdev->mec.num_pipe = 4;
4361 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4362
4363 if (rdev->mec.hpd_eop_obj == NULL) {
4364 r = radeon_bo_create(rdev,
4365 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4366 PAGE_SIZE, true,
4367 RADEON_GEM_DOMAIN_GTT, NULL,
4368 &rdev->mec.hpd_eop_obj);
4369 if (r) {
4370 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4371 return r;
4372 }
4373 }
4374
4375 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4376 if (unlikely(r != 0)) {
4377 cik_mec_fini(rdev);
4378 return r;
4379 }
4380 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4381 &rdev->mec.hpd_eop_gpu_addr);
4382 if (r) {
4383 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4384 cik_mec_fini(rdev);
4385 return r;
4386 }
4387 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4388 if (r) {
4389 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4390 cik_mec_fini(rdev);
4391 return r;
4392 }
4393
4394 /* clear memory. Not sure if this is required or not */
4395 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4396
4397 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4398 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4399
4400 return 0;
4401 }
4402
4403 struct hqd_registers
4404 {
4405 u32 cp_mqd_base_addr;
4406 u32 cp_mqd_base_addr_hi;
4407 u32 cp_hqd_active;
4408 u32 cp_hqd_vmid;
4409 u32 cp_hqd_persistent_state;
4410 u32 cp_hqd_pipe_priority;
4411 u32 cp_hqd_queue_priority;
4412 u32 cp_hqd_quantum;
4413 u32 cp_hqd_pq_base;
4414 u32 cp_hqd_pq_base_hi;
4415 u32 cp_hqd_pq_rptr;
4416 u32 cp_hqd_pq_rptr_report_addr;
4417 u32 cp_hqd_pq_rptr_report_addr_hi;
4418 u32 cp_hqd_pq_wptr_poll_addr;
4419 u32 cp_hqd_pq_wptr_poll_addr_hi;
4420 u32 cp_hqd_pq_doorbell_control;
4421 u32 cp_hqd_pq_wptr;
4422 u32 cp_hqd_pq_control;
4423 u32 cp_hqd_ib_base_addr;
4424 u32 cp_hqd_ib_base_addr_hi;
4425 u32 cp_hqd_ib_rptr;
4426 u32 cp_hqd_ib_control;
4427 u32 cp_hqd_iq_timer;
4428 u32 cp_hqd_iq_rptr;
4429 u32 cp_hqd_dequeue_request;
4430 u32 cp_hqd_dma_offload;
4431 u32 cp_hqd_sema_cmd;
4432 u32 cp_hqd_msg_type;
4433 u32 cp_hqd_atomic0_preop_lo;
4434 u32 cp_hqd_atomic0_preop_hi;
4435 u32 cp_hqd_atomic1_preop_lo;
4436 u32 cp_hqd_atomic1_preop_hi;
4437 u32 cp_hqd_hq_scheduler0;
4438 u32 cp_hqd_hq_scheduler1;
4439 u32 cp_mqd_control;
4440 };
4441
4442 struct bonaire_mqd
4443 {
4444 u32 header;
4445 u32 dispatch_initiator;
4446 u32 dimensions[3];
4447 u32 start_idx[3];
4448 u32 num_threads[3];
4449 u32 pipeline_stat_enable;
4450 u32 perf_counter_enable;
4451 u32 pgm[2];
4452 u32 tba[2];
4453 u32 tma[2];
4454 u32 pgm_rsrc[2];
4455 u32 vmid;
4456 u32 resource_limits;
4457 u32 static_thread_mgmt01[2];
4458 u32 tmp_ring_size;
4459 u32 static_thread_mgmt23[2];
4460 u32 restart[3];
4461 u32 thread_trace_enable;
4462 u32 reserved1;
4463 u32 user_data[16];
4464 u32 vgtcs_invoke_count[2];
4465 struct hqd_registers queue_state;
4466 u32 dequeue_cntr;
4467 u32 interrupt_queue[64];
4468 };
4469
4470 /**
4471 * cik_cp_compute_resume - setup the compute queue registers
4472 *
4473 * @rdev: radeon_device pointer
4474 *
4475 * Program the compute queues and test them to make sure they
4476 * are working.
4477 * Returns 0 for success, error for failure.
4478 */
4479 static int cik_cp_compute_resume(struct radeon_device *rdev)
4480 {
4481 int r, i, idx;
4482 u32 tmp;
4483 bool use_doorbell = true;
4484 u64 hqd_gpu_addr;
4485 u64 mqd_gpu_addr;
4486 u64 eop_gpu_addr;
4487 u64 wb_gpu_addr;
4488 u32 *buf;
4489 struct bonaire_mqd *mqd;
4490
4491 r = cik_cp_compute_start(rdev);
4492 if (r)
4493 return r;
4494
4495 /* fix up chicken bits */
4496 tmp = RREG32(CP_CPF_DEBUG);
4497 tmp |= (1 << 23);
4498 WREG32(CP_CPF_DEBUG, tmp);
4499
4500 /* init the pipes */
4501 mutex_lock(&rdev->srbm_mutex);
4502 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4503 int me = (i < 4) ? 1 : 2;
4504 int pipe = (i < 4) ? i : (i - 4);
4505
4506 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4507
4508 cik_srbm_select(rdev, me, pipe, 0, 0);
4509
4510 /* write the EOP addr */
4511 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4512 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4513
4514 /* set the VMID assigned */
4515 WREG32(CP_HPD_EOP_VMID, 0);
4516
4517 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4518 tmp = RREG32(CP_HPD_EOP_CONTROL);
4519 tmp &= ~EOP_SIZE_MASK;
4520 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4521 WREG32(CP_HPD_EOP_CONTROL, tmp);
4522 }
4523 cik_srbm_select(rdev, 0, 0, 0, 0);
4524 mutex_unlock(&rdev->srbm_mutex);
4525
4526 /* init the queues. Just two for now. */
4527 for (i = 0; i < 2; i++) {
4528 if (i == 0)
4529 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4530 else
4531 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4532
4533 if (rdev->ring[idx].mqd_obj == NULL) {
4534 r = radeon_bo_create(rdev,
4535 sizeof(struct bonaire_mqd),
4536 PAGE_SIZE, true,
4537 RADEON_GEM_DOMAIN_GTT, NULL,
4538 &rdev->ring[idx].mqd_obj);
4539 if (r) {
4540 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4541 return r;
4542 }
4543 }
4544
4545 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4546 if (unlikely(r != 0)) {
4547 cik_cp_compute_fini(rdev);
4548 return r;
4549 }
4550 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4551 &mqd_gpu_addr);
4552 if (r) {
4553 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4554 cik_cp_compute_fini(rdev);
4555 return r;
4556 }
4557 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4558 if (r) {
4559 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4560 cik_cp_compute_fini(rdev);
4561 return r;
4562 }
4563
4564 /* init the mqd struct */
4565 memset(buf, 0, sizeof(struct bonaire_mqd));
4566
4567 mqd = (struct bonaire_mqd *)buf;
4568 mqd->header = 0xC0310800;
4569 mqd->static_thread_mgmt01[0] = 0xffffffff;
4570 mqd->static_thread_mgmt01[1] = 0xffffffff;
4571 mqd->static_thread_mgmt23[0] = 0xffffffff;
4572 mqd->static_thread_mgmt23[1] = 0xffffffff;
4573
4574 mutex_lock(&rdev->srbm_mutex);
4575 cik_srbm_select(rdev, rdev->ring[idx].me,
4576 rdev->ring[idx].pipe,
4577 rdev->ring[idx].queue, 0);
4578
4579 /* disable wptr polling */
4580 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4581 tmp &= ~WPTR_POLL_EN;
4582 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4583
4584 /* enable doorbell? */
4585 mqd->queue_state.cp_hqd_pq_doorbell_control =
4586 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4587 if (use_doorbell)
4588 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4589 else
4590 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4591 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4592 mqd->queue_state.cp_hqd_pq_doorbell_control);
4593
4594 /* disable the queue if it's active */
4595 mqd->queue_state.cp_hqd_dequeue_request = 0;
4596 mqd->queue_state.cp_hqd_pq_rptr = 0;
4597 mqd->queue_state.cp_hqd_pq_wptr= 0;
4598 if (RREG32(CP_HQD_ACTIVE) & 1) {
4599 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4600 for (i = 0; i < rdev->usec_timeout; i++) {
4601 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4602 break;
4603 udelay(1);
4604 }
4605 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4606 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4607 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4608 }
4609
4610 /* set the pointer to the MQD */
4611 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4612 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4613 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4614 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4615 /* set MQD vmid to 0 */
4616 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4617 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4618 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4619
4620 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4621 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4622 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4623 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4624 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4625 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4626
4627 /* set up the HQD, this is similar to CP_RB0_CNTL */
4628 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4629 mqd->queue_state.cp_hqd_pq_control &=
4630 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4631
4632 mqd->queue_state.cp_hqd_pq_control |=
4633 order_base_2(rdev->ring[idx].ring_size / 8);
4634 mqd->queue_state.cp_hqd_pq_control |=
4635 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4636 #ifdef __BIG_ENDIAN
4637 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4638 #endif
4639 mqd->queue_state.cp_hqd_pq_control &=
4640 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4641 mqd->queue_state.cp_hqd_pq_control |=
4642 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4643 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4644
4645 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4646 if (i == 0)
4647 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4648 else
4649 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4650 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4651 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4652 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4653 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4654 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4655
4656 /* set the wb address wether it's enabled or not */
4657 if (i == 0)
4658 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4659 else
4660 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4661 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4662 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4663 upper_32_bits(wb_gpu_addr) & 0xffff;
4664 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4665 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4666 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4667 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4668
4669 /* enable the doorbell if requested */
4670 if (use_doorbell) {
4671 mqd->queue_state.cp_hqd_pq_doorbell_control =
4672 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4673 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4674 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4675 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4676 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4677 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4678 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4679
4680 } else {
4681 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4682 }
4683 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4684 mqd->queue_state.cp_hqd_pq_doorbell_control);
4685
4686 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4687 rdev->ring[idx].wptr = 0;
4688 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4689 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4690 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4691
4692 /* set the vmid for the queue */
4693 mqd->queue_state.cp_hqd_vmid = 0;
4694 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4695
4696 /* activate the queue */
4697 mqd->queue_state.cp_hqd_active = 1;
4698 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4699
4700 cik_srbm_select(rdev, 0, 0, 0, 0);
4701 mutex_unlock(&rdev->srbm_mutex);
4702
4703 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4704 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4705
4706 rdev->ring[idx].ready = true;
4707 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4708 if (r)
4709 rdev->ring[idx].ready = false;
4710 }
4711
4712 return 0;
4713 }
4714
4715 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4716 {
4717 cik_cp_gfx_enable(rdev, enable);
4718 cik_cp_compute_enable(rdev, enable);
4719 }
4720
4721 static int cik_cp_load_microcode(struct radeon_device *rdev)
4722 {
4723 int r;
4724
4725 r = cik_cp_gfx_load_microcode(rdev);
4726 if (r)
4727 return r;
4728 r = cik_cp_compute_load_microcode(rdev);
4729 if (r)
4730 return r;
4731
4732 return 0;
4733 }
4734
4735 static void cik_cp_fini(struct radeon_device *rdev)
4736 {
4737 cik_cp_gfx_fini(rdev);
4738 cik_cp_compute_fini(rdev);
4739 }
4740
4741 static int cik_cp_resume(struct radeon_device *rdev)
4742 {
4743 int r;
4744
4745 cik_enable_gui_idle_interrupt(rdev, false);
4746
4747 r = cik_cp_load_microcode(rdev);
4748 if (r)
4749 return r;
4750
4751 r = cik_cp_gfx_resume(rdev);
4752 if (r)
4753 return r;
4754 r = cik_cp_compute_resume(rdev);
4755 if (r)
4756 return r;
4757
4758 cik_enable_gui_idle_interrupt(rdev, true);
4759
4760 return 0;
4761 }
4762
4763 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4764 {
4765 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4766 RREG32(GRBM_STATUS));
4767 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4768 RREG32(GRBM_STATUS2));
4769 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4770 RREG32(GRBM_STATUS_SE0));
4771 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4772 RREG32(GRBM_STATUS_SE1));
4773 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4774 RREG32(GRBM_STATUS_SE2));
4775 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4776 RREG32(GRBM_STATUS_SE3));
4777 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4778 RREG32(SRBM_STATUS));
4779 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4780 RREG32(SRBM_STATUS2));
4781 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4782 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4783 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4784 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4785 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4786 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4787 RREG32(CP_STALLED_STAT1));
4788 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4789 RREG32(CP_STALLED_STAT2));
4790 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4791 RREG32(CP_STALLED_STAT3));
4792 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4793 RREG32(CP_CPF_BUSY_STAT));
4794 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4795 RREG32(CP_CPF_STALLED_STAT1));
4796 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4797 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4798 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4799 RREG32(CP_CPC_STALLED_STAT1));
4800 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4801 }
4802
4803 /**
4804 * cik_gpu_check_soft_reset - check which blocks are busy
4805 *
4806 * @rdev: radeon_device pointer
4807 *
4808 * Check which blocks are busy and return the relevant reset
4809 * mask to be used by cik_gpu_soft_reset().
4810 * Returns a mask of the blocks to be reset.
4811 */
4812 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4813 {
4814 u32 reset_mask = 0;
4815 u32 tmp;
4816
4817 /* GRBM_STATUS */
4818 tmp = RREG32(GRBM_STATUS);
4819 if (tmp & (PA_BUSY | SC_BUSY |
4820 BCI_BUSY | SX_BUSY |
4821 TA_BUSY | VGT_BUSY |
4822 DB_BUSY | CB_BUSY |
4823 GDS_BUSY | SPI_BUSY |
4824 IA_BUSY | IA_BUSY_NO_DMA))
4825 reset_mask |= RADEON_RESET_GFX;
4826
4827 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4828 reset_mask |= RADEON_RESET_CP;
4829
4830 /* GRBM_STATUS2 */
4831 tmp = RREG32(GRBM_STATUS2);
4832 if (tmp & RLC_BUSY)
4833 reset_mask |= RADEON_RESET_RLC;
4834
4835 /* SDMA0_STATUS_REG */
4836 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4837 if (!(tmp & SDMA_IDLE))
4838 reset_mask |= RADEON_RESET_DMA;
4839
4840 /* SDMA1_STATUS_REG */
4841 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4842 if (!(tmp & SDMA_IDLE))
4843 reset_mask |= RADEON_RESET_DMA1;
4844
4845 /* SRBM_STATUS2 */
4846 tmp = RREG32(SRBM_STATUS2);
4847 if (tmp & SDMA_BUSY)
4848 reset_mask |= RADEON_RESET_DMA;
4849
4850 if (tmp & SDMA1_BUSY)
4851 reset_mask |= RADEON_RESET_DMA1;
4852
4853 /* SRBM_STATUS */
4854 tmp = RREG32(SRBM_STATUS);
4855
4856 if (tmp & IH_BUSY)
4857 reset_mask |= RADEON_RESET_IH;
4858
4859 if (tmp & SEM_BUSY)
4860 reset_mask |= RADEON_RESET_SEM;
4861
4862 if (tmp & GRBM_RQ_PENDING)
4863 reset_mask |= RADEON_RESET_GRBM;
4864
4865 if (tmp & VMC_BUSY)
4866 reset_mask |= RADEON_RESET_VMC;
4867
4868 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4869 MCC_BUSY | MCD_BUSY))
4870 reset_mask |= RADEON_RESET_MC;
4871
4872 if (evergreen_is_display_hung(rdev))
4873 reset_mask |= RADEON_RESET_DISPLAY;
4874
4875 /* Skip MC reset as it's mostly likely not hung, just busy */
4876 if (reset_mask & RADEON_RESET_MC) {
4877 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4878 reset_mask &= ~RADEON_RESET_MC;
4879 }
4880
4881 return reset_mask;
4882 }
4883
4884 /**
4885 * cik_gpu_soft_reset - soft reset GPU
4886 *
4887 * @rdev: radeon_device pointer
4888 * @reset_mask: mask of which blocks to reset
4889 *
4890 * Soft reset the blocks specified in @reset_mask.
4891 */
4892 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4893 {
4894 struct evergreen_mc_save save;
4895 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4896 u32 tmp;
4897
4898 if (reset_mask == 0)
4899 return;
4900
4901 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4902
4903 cik_print_gpu_status_regs(rdev);
4904 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4905 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4906 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4907 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4908
4909 /* disable CG/PG */
4910 cik_fini_pg(rdev);
4911 cik_fini_cg(rdev);
4912
4913 /* stop the rlc */
4914 cik_rlc_stop(rdev);
4915
4916 /* Disable GFX parsing/prefetching */
4917 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4918
4919 /* Disable MEC parsing/prefetching */
4920 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4921
4922 if (reset_mask & RADEON_RESET_DMA) {
4923 /* sdma0 */
4924 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4925 tmp |= SDMA_HALT;
4926 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4927 }
4928 if (reset_mask & RADEON_RESET_DMA1) {
4929 /* sdma1 */
4930 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4931 tmp |= SDMA_HALT;
4932 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4933 }
4934
4935 evergreen_mc_stop(rdev, &save);
4936 if (evergreen_mc_wait_for_idle(rdev)) {
4937 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4938 }
4939
4940 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4941 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4942
4943 if (reset_mask & RADEON_RESET_CP) {
4944 grbm_soft_reset |= SOFT_RESET_CP;
4945
4946 srbm_soft_reset |= SOFT_RESET_GRBM;
4947 }
4948
4949 if (reset_mask & RADEON_RESET_DMA)
4950 srbm_soft_reset |= SOFT_RESET_SDMA;
4951
4952 if (reset_mask & RADEON_RESET_DMA1)
4953 srbm_soft_reset |= SOFT_RESET_SDMA1;
4954
4955 if (reset_mask & RADEON_RESET_DISPLAY)
4956 srbm_soft_reset |= SOFT_RESET_DC;
4957
4958 if (reset_mask & RADEON_RESET_RLC)
4959 grbm_soft_reset |= SOFT_RESET_RLC;
4960
4961 if (reset_mask & RADEON_RESET_SEM)
4962 srbm_soft_reset |= SOFT_RESET_SEM;
4963
4964 if (reset_mask & RADEON_RESET_IH)
4965 srbm_soft_reset |= SOFT_RESET_IH;
4966
4967 if (reset_mask & RADEON_RESET_GRBM)
4968 srbm_soft_reset |= SOFT_RESET_GRBM;
4969
4970 if (reset_mask & RADEON_RESET_VMC)
4971 srbm_soft_reset |= SOFT_RESET_VMC;
4972
4973 if (!(rdev->flags & RADEON_IS_IGP)) {
4974 if (reset_mask & RADEON_RESET_MC)
4975 srbm_soft_reset |= SOFT_RESET_MC;
4976 }
4977
4978 if (grbm_soft_reset) {
4979 tmp = RREG32(GRBM_SOFT_RESET);
4980 tmp |= grbm_soft_reset;
4981 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4982 WREG32(GRBM_SOFT_RESET, tmp);
4983 tmp = RREG32(GRBM_SOFT_RESET);
4984
4985 udelay(50);
4986
4987 tmp &= ~grbm_soft_reset;
4988 WREG32(GRBM_SOFT_RESET, tmp);
4989 tmp = RREG32(GRBM_SOFT_RESET);
4990 }
4991
4992 if (srbm_soft_reset) {
4993 tmp = RREG32(SRBM_SOFT_RESET);
4994 tmp |= srbm_soft_reset;
4995 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4996 WREG32(SRBM_SOFT_RESET, tmp);
4997 tmp = RREG32(SRBM_SOFT_RESET);
4998
4999 udelay(50);
5000
5001 tmp &= ~srbm_soft_reset;
5002 WREG32(SRBM_SOFT_RESET, tmp);
5003 tmp = RREG32(SRBM_SOFT_RESET);
5004 }
5005
5006 /* Wait a little for things to settle down */
5007 udelay(50);
5008
5009 evergreen_mc_resume(rdev, &save);
5010 udelay(50);
5011
5012 cik_print_gpu_status_regs(rdev);
5013 }
5014
5015 struct kv_reset_save_regs {
5016 u32 gmcon_reng_execute;
5017 u32 gmcon_misc;
5018 u32 gmcon_misc3;
5019 };
5020
5021 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5022 struct kv_reset_save_regs *save)
5023 {
5024 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5025 save->gmcon_misc = RREG32(GMCON_MISC);
5026 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5027
5028 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5029 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5030 STCTRL_STUTTER_EN));
5031 }
5032
5033 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5034 struct kv_reset_save_regs *save)
5035 {
5036 int i;
5037
5038 WREG32(GMCON_PGFSM_WRITE, 0);
5039 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5040
5041 for (i = 0; i < 5; i++)
5042 WREG32(GMCON_PGFSM_WRITE, 0);
5043
5044 WREG32(GMCON_PGFSM_WRITE, 0);
5045 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5046
5047 for (i = 0; i < 5; i++)
5048 WREG32(GMCON_PGFSM_WRITE, 0);
5049
5050 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5051 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5052
5053 for (i = 0; i < 5; i++)
5054 WREG32(GMCON_PGFSM_WRITE, 0);
5055
5056 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5057 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5058
5059 for (i = 0; i < 5; i++)
5060 WREG32(GMCON_PGFSM_WRITE, 0);
5061
5062 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5063 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5064
5065 for (i = 0; i < 5; i++)
5066 WREG32(GMCON_PGFSM_WRITE, 0);
5067
5068 WREG32(GMCON_PGFSM_WRITE, 0);
5069 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5070
5071 for (i = 0; i < 5; i++)
5072 WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5075 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5076
5077 for (i = 0; i < 5; i++)
5078 WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5081 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5082
5083 for (i = 0; i < 5; i++)
5084 WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5087 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5088
5089 for (i = 0; i < 5; i++)
5090 WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5093 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5094
5095 for (i = 0; i < 5; i++)
5096 WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5099 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5100
5101 WREG32(GMCON_MISC3, save->gmcon_misc3);
5102 WREG32(GMCON_MISC, save->gmcon_misc);
5103 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5104 }
5105
5106 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5107 {
5108 struct evergreen_mc_save save;
5109 struct kv_reset_save_regs kv_save = { 0 };
5110 u32 tmp, i;
5111
5112 dev_info(rdev->dev, "GPU pci config reset\n");
5113
5114 /* disable dpm? */
5115
5116 /* disable cg/pg */
5117 cik_fini_pg(rdev);
5118 cik_fini_cg(rdev);
5119
5120 /* Disable GFX parsing/prefetching */
5121 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5122
5123 /* Disable MEC parsing/prefetching */
5124 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5125
5126 /* sdma0 */
5127 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5128 tmp |= SDMA_HALT;
5129 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5130 /* sdma1 */
5131 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5132 tmp |= SDMA_HALT;
5133 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5134 /* XXX other engines? */
5135
5136 /* halt the rlc, disable cp internal ints */
5137 cik_rlc_stop(rdev);
5138
5139 udelay(50);
5140
5141 /* disable mem access */
5142 evergreen_mc_stop(rdev, &save);
5143 if (evergreen_mc_wait_for_idle(rdev)) {
5144 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5145 }
5146
5147 if (rdev->flags & RADEON_IS_IGP)
5148 kv_save_regs_for_reset(rdev, &kv_save);
5149
5150 /* disable BM */
5151 pci_clear_master(rdev->pdev);
5152 /* reset */
5153 radeon_pci_config_reset(rdev);
5154
5155 udelay(100);
5156
5157 /* wait for asic to come out of reset */
5158 for (i = 0; i < rdev->usec_timeout; i++) {
5159 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5160 break;
5161 udelay(1);
5162 }
5163
5164 /* does asic init need to be run first??? */
5165 if (rdev->flags & RADEON_IS_IGP)
5166 kv_restore_regs_for_reset(rdev, &kv_save);
5167 }
5168
5169 /**
5170 * cik_asic_reset - soft reset GPU
5171 *
5172 * @rdev: radeon_device pointer
5173 *
5174 * Look up which blocks are hung and attempt
5175 * to reset them.
5176 * Returns 0 for success.
5177 */
5178 int cik_asic_reset(struct radeon_device *rdev)
5179 {
5180 u32 reset_mask;
5181
5182 reset_mask = cik_gpu_check_soft_reset(rdev);
5183
5184 if (reset_mask)
5185 r600_set_bios_scratch_engine_hung(rdev, true);
5186
5187 /* try soft reset */
5188 cik_gpu_soft_reset(rdev, reset_mask);
5189
5190 reset_mask = cik_gpu_check_soft_reset(rdev);
5191
5192 /* try pci config reset */
5193 if (reset_mask && radeon_hard_reset)
5194 cik_gpu_pci_config_reset(rdev);
5195
5196 reset_mask = cik_gpu_check_soft_reset(rdev);
5197
5198 if (!reset_mask)
5199 r600_set_bios_scratch_engine_hung(rdev, false);
5200
5201 return 0;
5202 }
5203
5204 /**
5205 * cik_gfx_is_lockup - check if the 3D engine is locked up
5206 *
5207 * @rdev: radeon_device pointer
5208 * @ring: radeon_ring structure holding ring information
5209 *
5210 * Check if the 3D engine is locked up (CIK).
5211 * Returns true if the engine is locked, false if not.
5212 */
5213 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5214 {
5215 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5216
5217 if (!(reset_mask & (RADEON_RESET_GFX |
5218 RADEON_RESET_COMPUTE |
5219 RADEON_RESET_CP))) {
5220 radeon_ring_lockup_update(rdev, ring);
5221 return false;
5222 }
5223 return radeon_ring_test_lockup(rdev, ring);
5224 }
5225
5226 /* MC */
5227 /**
5228 * cik_mc_program - program the GPU memory controller
5229 *
5230 * @rdev: radeon_device pointer
5231 *
5232 * Set the location of vram, gart, and AGP in the GPU's
5233 * physical address space (CIK).
5234 */
5235 static void cik_mc_program(struct radeon_device *rdev)
5236 {
5237 struct evergreen_mc_save save;
5238 u32 tmp;
5239 int i, j;
5240
5241 /* Initialize HDP */
5242 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5243 WREG32((0x2c14 + j), 0x00000000);
5244 WREG32((0x2c18 + j), 0x00000000);
5245 WREG32((0x2c1c + j), 0x00000000);
5246 WREG32((0x2c20 + j), 0x00000000);
5247 WREG32((0x2c24 + j), 0x00000000);
5248 }
5249 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5250
5251 evergreen_mc_stop(rdev, &save);
5252 if (radeon_mc_wait_for_idle(rdev)) {
5253 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5254 }
5255 /* Lockout access through VGA aperture*/
5256 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5257 /* Update configuration */
5258 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5259 rdev->mc.vram_start >> 12);
5260 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5261 rdev->mc.vram_end >> 12);
5262 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5263 rdev->vram_scratch.gpu_addr >> 12);
5264 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5265 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5266 WREG32(MC_VM_FB_LOCATION, tmp);
5267 /* XXX double check these! */
5268 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5269 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5270 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5271 WREG32(MC_VM_AGP_BASE, 0);
5272 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5273 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5274 if (radeon_mc_wait_for_idle(rdev)) {
5275 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5276 }
5277 evergreen_mc_resume(rdev, &save);
5278 /* we need to own VRAM, so turn off the VGA renderer here
5279 * to stop it overwriting our objects */
5280 rv515_vga_render_disable(rdev);
5281 }
5282
5283 /**
5284 * cik_mc_init - initialize the memory controller driver params
5285 *
5286 * @rdev: radeon_device pointer
5287 *
5288 * Look up the amount of vram, vram width, and decide how to place
5289 * vram and gart within the GPU's physical address space (CIK).
5290 * Returns 0 for success.
5291 */
5292 static int cik_mc_init(struct radeon_device *rdev)
5293 {
5294 u32 tmp;
5295 int chansize, numchan;
5296
5297 /* Get VRAM informations */
5298 rdev->mc.vram_is_ddr = true;
5299 tmp = RREG32(MC_ARB_RAMCFG);
5300 if (tmp & CHANSIZE_MASK) {
5301 chansize = 64;
5302 } else {
5303 chansize = 32;
5304 }
5305 tmp = RREG32(MC_SHARED_CHMAP);
5306 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5307 case 0:
5308 default:
5309 numchan = 1;
5310 break;
5311 case 1:
5312 numchan = 2;
5313 break;
5314 case 2:
5315 numchan = 4;
5316 break;
5317 case 3:
5318 numchan = 8;
5319 break;
5320 case 4:
5321 numchan = 3;
5322 break;
5323 case 5:
5324 numchan = 6;
5325 break;
5326 case 6:
5327 numchan = 10;
5328 break;
5329 case 7:
5330 numchan = 12;
5331 break;
5332 case 8:
5333 numchan = 16;
5334 break;
5335 }
5336 rdev->mc.vram_width = numchan * chansize;
5337 /* Could aper size report 0 ? */
5338 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5339 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5340 /* size in MB on si */
5341 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5342 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5343 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5344 si_vram_gtt_location(rdev, &rdev->mc);
5345 radeon_update_bandwidth_info(rdev);
5346
5347 return 0;
5348 }
5349
5350 /*
5351 * GART
5352 * VMID 0 is the physical GPU addresses as used by the kernel.
5353 * VMIDs 1-15 are used for userspace clients and are handled
5354 * by the radeon vm/hsa code.
5355 */
5356 /**
5357 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5358 *
5359 * @rdev: radeon_device pointer
5360 *
5361 * Flush the TLB for the VMID 0 page table (CIK).
5362 */
5363 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5364 {
5365 /* flush hdp cache */
5366 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5367
5368 /* bits 0-15 are the VM contexts0-15 */
5369 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5370 }
5371
5372 /**
5373 * cik_pcie_gart_enable - gart enable
5374 *
5375 * @rdev: radeon_device pointer
5376 *
5377 * This sets up the TLBs, programs the page tables for VMID0,
5378 * sets up the hw for VMIDs 1-15 which are allocated on
5379 * demand, and sets up the global locations for the LDS, GDS,
5380 * and GPUVM for FSA64 clients (CIK).
5381 * Returns 0 for success, errors for failure.
5382 */
5383 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5384 {
5385 int r, i;
5386
5387 if (rdev->gart.robj == NULL) {
5388 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5389 return -EINVAL;
5390 }
5391 r = radeon_gart_table_vram_pin(rdev);
5392 if (r)
5393 return r;
5394 radeon_gart_restore(rdev);
5395 /* Setup TLB control */
5396 WREG32(MC_VM_MX_L1_TLB_CNTL,
5397 (0xA << 7) |
5398 ENABLE_L1_TLB |
5399 ENABLE_L1_FRAGMENT_PROCESSING |
5400 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5401 ENABLE_ADVANCED_DRIVER_MODEL |
5402 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5403 /* Setup L2 cache */
5404 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5405 ENABLE_L2_FRAGMENT_PROCESSING |
5406 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5407 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5408 EFFECTIVE_L2_QUEUE_SIZE(7) |
5409 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5410 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5411 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5412 BANK_SELECT(4) |
5413 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5414 /* setup context0 */
5415 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5416 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5417 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5418 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5419 (u32)(rdev->dummy_page.addr >> 12));
5420 WREG32(VM_CONTEXT0_CNTL2, 0);
5421 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5422 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5423
5424 WREG32(0x15D4, 0);
5425 WREG32(0x15D8, 0);
5426 WREG32(0x15DC, 0);
5427
5428 /* empty context1-15 */
5429 /* FIXME start with 4G, once using 2 level pt switch to full
5430 * vm size space
5431 */
5432 /* set vm size, must be a multiple of 4 */
5433 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5434 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5435 for (i = 1; i < 16; i++) {
5436 if (i < 8)
5437 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5438 rdev->gart.table_addr >> 12);
5439 else
5440 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5441 rdev->gart.table_addr >> 12);
5442 }
5443
5444 /* enable context1-15 */
5445 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5446 (u32)(rdev->dummy_page.addr >> 12));
5447 WREG32(VM_CONTEXT1_CNTL2, 4);
5448 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5449 PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) |
5450 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5451 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5452 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5453 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5454 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5455 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5456 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5457 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5458 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5459 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5460 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5461 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5462
5463 if (rdev->family == CHIP_KAVERI) {
5464 u32 tmp = RREG32(CHUB_CONTROL);
5465 tmp &= ~BYPASS_VM;
5466 WREG32(CHUB_CONTROL, tmp);
5467 }
5468
5469 /* XXX SH_MEM regs */
5470 /* where to put LDS, scratch, GPUVM in FSA64 space */
5471 mutex_lock(&rdev->srbm_mutex);
5472 for (i = 0; i < 16; i++) {
5473 cik_srbm_select(rdev, 0, 0, 0, i);
5474 /* CP and shaders */
5475 WREG32(SH_MEM_CONFIG, 0);
5476 WREG32(SH_MEM_APE1_BASE, 1);
5477 WREG32(SH_MEM_APE1_LIMIT, 0);
5478 WREG32(SH_MEM_BASES, 0);
5479 /* SDMA GFX */
5480 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5481 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5482 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5483 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5484 /* XXX SDMA RLC - todo */
5485 }
5486 cik_srbm_select(rdev, 0, 0, 0, 0);
5487 mutex_unlock(&rdev->srbm_mutex);
5488
5489 cik_pcie_gart_tlb_flush(rdev);
5490 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5491 (unsigned)(rdev->mc.gtt_size >> 20),
5492 (unsigned long long)rdev->gart.table_addr);
5493 rdev->gart.ready = true;
5494 return 0;
5495 }
5496
5497 /**
5498 * cik_pcie_gart_disable - gart disable
5499 *
5500 * @rdev: radeon_device pointer
5501 *
5502 * This disables all VM page table (CIK).
5503 */
5504 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5505 {
5506 /* Disable all tables */
5507 WREG32(VM_CONTEXT0_CNTL, 0);
5508 WREG32(VM_CONTEXT1_CNTL, 0);
5509 /* Setup TLB control */
5510 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5511 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5512 /* Setup L2 cache */
5513 WREG32(VM_L2_CNTL,
5514 ENABLE_L2_FRAGMENT_PROCESSING |
5515 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5516 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5517 EFFECTIVE_L2_QUEUE_SIZE(7) |
5518 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5519 WREG32(VM_L2_CNTL2, 0);
5520 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5521 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5522 radeon_gart_table_vram_unpin(rdev);
5523 }
5524
5525 /**
5526 * cik_pcie_gart_fini - vm fini callback
5527 *
5528 * @rdev: radeon_device pointer
5529 *
5530 * Tears down the driver GART/VM setup (CIK).
5531 */
5532 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5533 {
5534 cik_pcie_gart_disable(rdev);
5535 radeon_gart_table_vram_free(rdev);
5536 radeon_gart_fini(rdev);
5537 }
5538
5539 /* vm parser */
5540 /**
5541 * cik_ib_parse - vm ib_parse callback
5542 *
5543 * @rdev: radeon_device pointer
5544 * @ib: indirect buffer pointer
5545 *
5546 * CIK uses hw IB checking so this is a nop (CIK).
5547 */
5548 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5549 {
5550 return 0;
5551 }
5552
5553 /*
5554 * vm
5555 * VMID 0 is the physical GPU addresses as used by the kernel.
5556 * VMIDs 1-15 are used for userspace clients and are handled
5557 * by the radeon vm/hsa code.
5558 */
5559 /**
5560 * cik_vm_init - cik vm init callback
5561 *
5562 * @rdev: radeon_device pointer
5563 *
5564 * Inits cik specific vm parameters (number of VMs, base of vram for
5565 * VMIDs 1-15) (CIK).
5566 * Returns 0 for success.
5567 */
5568 int cik_vm_init(struct radeon_device *rdev)
5569 {
5570 /* number of VMs */
5571 rdev->vm_manager.nvm = 16;
5572 /* base offset of vram pages */
5573 if (rdev->flags & RADEON_IS_IGP) {
5574 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5575 tmp <<= 22;
5576 rdev->vm_manager.vram_base_offset = tmp;
5577 } else
5578 rdev->vm_manager.vram_base_offset = 0;
5579
5580 return 0;
5581 }
5582
5583 /**
5584 * cik_vm_fini - cik vm fini callback
5585 *
5586 * @rdev: radeon_device pointer
5587 *
5588 * Tear down any asic specific VM setup (CIK).
5589 */
5590 void cik_vm_fini(struct radeon_device *rdev)
5591 {
5592 }
5593
5594 /**
5595 * cik_vm_decode_fault - print human readable fault info
5596 *
5597 * @rdev: radeon_device pointer
5598 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5599 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5600 *
5601 * Print human readable fault information (CIK).
5602 */
5603 static void cik_vm_decode_fault(struct radeon_device *rdev,
5604 u32 status, u32 addr, u32 mc_client)
5605 {
5606 u32 mc_id;
5607 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5608 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5609 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5610 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5611
5612 if (rdev->family == CHIP_HAWAII)
5613 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5614 else
5615 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5616
5617 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5618 protections, vmid, addr,
5619 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5620 block, mc_client, mc_id);
5621 }
5622
5623 /**
5624 * cik_vm_flush - cik vm flush using the CP
5625 *
5626 * @rdev: radeon_device pointer
5627 *
5628 * Update the page table base and flush the VM TLB
5629 * using the CP (CIK).
5630 */
5631 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5632 {
5633 struct radeon_ring *ring = &rdev->ring[ridx];
5634
5635 if (vm == NULL)
5636 return;
5637
5638 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5639 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5640 WRITE_DATA_DST_SEL(0)));
5641 if (vm->id < 8) {
5642 radeon_ring_write(ring,
5643 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5644 } else {
5645 radeon_ring_write(ring,
5646 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5647 }
5648 radeon_ring_write(ring, 0);
5649 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5650
5651 /* update SH_MEM_* regs */
5652 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5653 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5654 WRITE_DATA_DST_SEL(0)));
5655 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5656 radeon_ring_write(ring, 0);
5657 radeon_ring_write(ring, VMID(vm->id));
5658
5659 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5660 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5661 WRITE_DATA_DST_SEL(0)));
5662 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5663 radeon_ring_write(ring, 0);
5664
5665 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5666 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5667 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5668 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5669
5670 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5671 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5672 WRITE_DATA_DST_SEL(0)));
5673 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5674 radeon_ring_write(ring, 0);
5675 radeon_ring_write(ring, VMID(0));
5676
5677 /* HDP flush */
5678 cik_hdp_flush_cp_ring_emit(rdev, ridx);
5679
5680 /* bits 0-15 are the VM contexts0-15 */
5681 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5682 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5683 WRITE_DATA_DST_SEL(0)));
5684 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5685 radeon_ring_write(ring, 0);
5686 radeon_ring_write(ring, 1 << vm->id);
5687
5688 /* compute doesn't have PFP */
5689 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5690 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5691 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5692 radeon_ring_write(ring, 0x0);
5693 }
5694 }
5695
5696 /*
5697 * RLC
5698 * The RLC is a multi-purpose microengine that handles a
5699 * variety of functions, the most important of which is
5700 * the interrupt controller.
5701 */
5702 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5703 bool enable)
5704 {
5705 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5706
5707 if (enable)
5708 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5709 else
5710 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5711 WREG32(CP_INT_CNTL_RING0, tmp);
5712 }
5713
5714 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5715 {
5716 u32 tmp;
5717
5718 tmp = RREG32(RLC_LB_CNTL);
5719 if (enable)
5720 tmp |= LOAD_BALANCE_ENABLE;
5721 else
5722 tmp &= ~LOAD_BALANCE_ENABLE;
5723 WREG32(RLC_LB_CNTL, tmp);
5724 }
5725
5726 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5727 {
5728 u32 i, j, k;
5729 u32 mask;
5730
5731 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5732 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5733 cik_select_se_sh(rdev, i, j);
5734 for (k = 0; k < rdev->usec_timeout; k++) {
5735 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5736 break;
5737 udelay(1);
5738 }
5739 }
5740 }
5741 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5742
5743 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5744 for (k = 0; k < rdev->usec_timeout; k++) {
5745 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5746 break;
5747 udelay(1);
5748 }
5749 }
5750
5751 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5752 {
5753 u32 tmp;
5754
5755 tmp = RREG32(RLC_CNTL);
5756 if (tmp != rlc)
5757 WREG32(RLC_CNTL, rlc);
5758 }
5759
5760 static u32 cik_halt_rlc(struct radeon_device *rdev)
5761 {
5762 u32 data, orig;
5763
5764 orig = data = RREG32(RLC_CNTL);
5765
5766 if (data & RLC_ENABLE) {
5767 u32 i;
5768
5769 data &= ~RLC_ENABLE;
5770 WREG32(RLC_CNTL, data);
5771
5772 for (i = 0; i < rdev->usec_timeout; i++) {
5773 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5774 break;
5775 udelay(1);
5776 }
5777
5778 cik_wait_for_rlc_serdes(rdev);
5779 }
5780
5781 return orig;
5782 }
5783
5784 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5785 {
5786 u32 tmp, i, mask;
5787
5788 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5789 WREG32(RLC_GPR_REG2, tmp);
5790
5791 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5792 for (i = 0; i < rdev->usec_timeout; i++) {
5793 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5794 break;
5795 udelay(1);
5796 }
5797
5798 for (i = 0; i < rdev->usec_timeout; i++) {
5799 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5800 break;
5801 udelay(1);
5802 }
5803 }
5804
5805 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5806 {
5807 u32 tmp;
5808
5809 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5810 WREG32(RLC_GPR_REG2, tmp);
5811 }
5812
5813 /**
5814 * cik_rlc_stop - stop the RLC ME
5815 *
5816 * @rdev: radeon_device pointer
5817 *
5818 * Halt the RLC ME (MicroEngine) (CIK).
5819 */
5820 static void cik_rlc_stop(struct radeon_device *rdev)
5821 {
5822 WREG32(RLC_CNTL, 0);
5823
5824 cik_enable_gui_idle_interrupt(rdev, false);
5825
5826 cik_wait_for_rlc_serdes(rdev);
5827 }
5828
5829 /**
5830 * cik_rlc_start - start the RLC ME
5831 *
5832 * @rdev: radeon_device pointer
5833 *
5834 * Unhalt the RLC ME (MicroEngine) (CIK).
5835 */
5836 static void cik_rlc_start(struct radeon_device *rdev)
5837 {
5838 WREG32(RLC_CNTL, RLC_ENABLE);
5839
5840 cik_enable_gui_idle_interrupt(rdev, true);
5841
5842 udelay(50);
5843 }
5844
5845 /**
5846 * cik_rlc_resume - setup the RLC hw
5847 *
5848 * @rdev: radeon_device pointer
5849 *
5850 * Initialize the RLC registers, load the ucode,
5851 * and start the RLC (CIK).
5852 * Returns 0 for success, -EINVAL if the ucode is not available.
5853 */
5854 static int cik_rlc_resume(struct radeon_device *rdev)
5855 {
5856 u32 i, size, tmp;
5857 const __be32 *fw_data;
5858
5859 if (!rdev->rlc_fw)
5860 return -EINVAL;
5861
5862 switch (rdev->family) {
5863 case CHIP_BONAIRE:
5864 case CHIP_HAWAII:
5865 default:
5866 size = BONAIRE_RLC_UCODE_SIZE;
5867 break;
5868 case CHIP_KAVERI:
5869 size = KV_RLC_UCODE_SIZE;
5870 break;
5871 case CHIP_KABINI:
5872 size = KB_RLC_UCODE_SIZE;
5873 break;
5874 case CHIP_MULLINS:
5875 size = ML_RLC_UCODE_SIZE;
5876 break;
5877 }
5878
5879 cik_rlc_stop(rdev);
5880
5881 /* disable CG */
5882 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5883 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5884
5885 si_rlc_reset(rdev);
5886
5887 cik_init_pg(rdev);
5888
5889 cik_init_cg(rdev);
5890
5891 WREG32(RLC_LB_CNTR_INIT, 0);
5892 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5893
5894 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5895 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5896 WREG32(RLC_LB_PARAMS, 0x00600408);
5897 WREG32(RLC_LB_CNTL, 0x80000004);
5898
5899 WREG32(RLC_MC_CNTL, 0);
5900 WREG32(RLC_UCODE_CNTL, 0);
5901
5902 fw_data = (const __be32 *)rdev->rlc_fw->data;
5903 WREG32(RLC_GPM_UCODE_ADDR, 0);
5904 for (i = 0; i < size; i++)
5905 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5906 WREG32(RLC_GPM_UCODE_ADDR, 0);
5907
5908 /* XXX - find out what chips support lbpw */
5909 cik_enable_lbpw(rdev, false);
5910
5911 if (rdev->family == CHIP_BONAIRE)
5912 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5913
5914 cik_rlc_start(rdev);
5915
5916 return 0;
5917 }
5918
5919 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5920 {
5921 u32 data, orig, tmp, tmp2;
5922
5923 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5924
5925 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5926 cik_enable_gui_idle_interrupt(rdev, true);
5927
5928 tmp = cik_halt_rlc(rdev);
5929
5930 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5931 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5932 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5933 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5934 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5935
5936 cik_update_rlc(rdev, tmp);
5937
5938 data |= CGCG_EN | CGLS_EN;
5939 } else {
5940 cik_enable_gui_idle_interrupt(rdev, false);
5941
5942 RREG32(CB_CGTT_SCLK_CTRL);
5943 RREG32(CB_CGTT_SCLK_CTRL);
5944 RREG32(CB_CGTT_SCLK_CTRL);
5945 RREG32(CB_CGTT_SCLK_CTRL);
5946
5947 data &= ~(CGCG_EN | CGLS_EN);
5948 }
5949
5950 if (orig != data)
5951 WREG32(RLC_CGCG_CGLS_CTRL, data);
5952
5953 }
5954
5955 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5956 {
5957 u32 data, orig, tmp = 0;
5958
5959 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5960 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5961 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5962 orig = data = RREG32(CP_MEM_SLP_CNTL);
5963 data |= CP_MEM_LS_EN;
5964 if (orig != data)
5965 WREG32(CP_MEM_SLP_CNTL, data);
5966 }
5967 }
5968
5969 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5970 data &= 0xfffffffd;
5971 if (orig != data)
5972 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5973
5974 tmp = cik_halt_rlc(rdev);
5975
5976 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5977 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5978 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5979 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5980 WREG32(RLC_SERDES_WR_CTRL, data);
5981
5982 cik_update_rlc(rdev, tmp);
5983
5984 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5985 orig = data = RREG32(CGTS_SM_CTRL_REG);
5986 data &= ~SM_MODE_MASK;
5987 data |= SM_MODE(0x2);
5988 data |= SM_MODE_ENABLE;
5989 data &= ~CGTS_OVERRIDE;
5990 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5991 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5992 data &= ~CGTS_LS_OVERRIDE;
5993 data &= ~ON_MONITOR_ADD_MASK;
5994 data |= ON_MONITOR_ADD_EN;
5995 data |= ON_MONITOR_ADD(0x96);
5996 if (orig != data)
5997 WREG32(CGTS_SM_CTRL_REG, data);
5998 }
5999 } else {
6000 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6001 data |= 0x00000002;
6002 if (orig != data)
6003 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6004
6005 data = RREG32(RLC_MEM_SLP_CNTL);
6006 if (data & RLC_MEM_LS_EN) {
6007 data &= ~RLC_MEM_LS_EN;
6008 WREG32(RLC_MEM_SLP_CNTL, data);
6009 }
6010
6011 data = RREG32(CP_MEM_SLP_CNTL);
6012 if (data & CP_MEM_LS_EN) {
6013 data &= ~CP_MEM_LS_EN;
6014 WREG32(CP_MEM_SLP_CNTL, data);
6015 }
6016
6017 orig = data = RREG32(CGTS_SM_CTRL_REG);
6018 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6019 if (orig != data)
6020 WREG32(CGTS_SM_CTRL_REG, data);
6021
6022 tmp = cik_halt_rlc(rdev);
6023
6024 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6025 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6026 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6027 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6028 WREG32(RLC_SERDES_WR_CTRL, data);
6029
6030 cik_update_rlc(rdev, tmp);
6031 }
6032 }
6033
6034 static const u32 mc_cg_registers[] =
6035 {
6036 MC_HUB_MISC_HUB_CG,
6037 MC_HUB_MISC_SIP_CG,
6038 MC_HUB_MISC_VM_CG,
6039 MC_XPB_CLK_GAT,
6040 ATC_MISC_CG,
6041 MC_CITF_MISC_WR_CG,
6042 MC_CITF_MISC_RD_CG,
6043 MC_CITF_MISC_VM_CG,
6044 VM_L2_CG,
6045 };
6046
6047 static void cik_enable_mc_ls(struct radeon_device *rdev,
6048 bool enable)
6049 {
6050 int i;
6051 u32 orig, data;
6052
6053 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6054 orig = data = RREG32(mc_cg_registers[i]);
6055 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6056 data |= MC_LS_ENABLE;
6057 else
6058 data &= ~MC_LS_ENABLE;
6059 if (data != orig)
6060 WREG32(mc_cg_registers[i], data);
6061 }
6062 }
6063
6064 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6065 bool enable)
6066 {
6067 int i;
6068 u32 orig, data;
6069
6070 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6071 orig = data = RREG32(mc_cg_registers[i]);
6072 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6073 data |= MC_CG_ENABLE;
6074 else
6075 data &= ~MC_CG_ENABLE;
6076 if (data != orig)
6077 WREG32(mc_cg_registers[i], data);
6078 }
6079 }
6080
6081 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6082 bool enable)
6083 {
6084 u32 orig, data;
6085
6086 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6087 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6088 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6089 } else {
6090 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6091 data |= 0xff000000;
6092 if (data != orig)
6093 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6094
6095 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6096 data |= 0xff000000;
6097 if (data != orig)
6098 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6099 }
6100 }
6101
6102 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6103 bool enable)
6104 {
6105 u32 orig, data;
6106
6107 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6108 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6109 data |= 0x100;
6110 if (orig != data)
6111 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6112
6113 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6114 data |= 0x100;
6115 if (orig != data)
6116 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6117 } else {
6118 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6119 data &= ~0x100;
6120 if (orig != data)
6121 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6122
6123 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6124 data &= ~0x100;
6125 if (orig != data)
6126 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6127 }
6128 }
6129
6130 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6131 bool enable)
6132 {
6133 u32 orig, data;
6134
6135 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6136 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6137 data = 0xfff;
6138 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6139
6140 orig = data = RREG32(UVD_CGC_CTRL);
6141 data |= DCM;
6142 if (orig != data)
6143 WREG32(UVD_CGC_CTRL, data);
6144 } else {
6145 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6146 data &= ~0xfff;
6147 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6148
6149 orig = data = RREG32(UVD_CGC_CTRL);
6150 data &= ~DCM;
6151 if (orig != data)
6152 WREG32(UVD_CGC_CTRL, data);
6153 }
6154 }
6155
6156 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6157 bool enable)
6158 {
6159 u32 orig, data;
6160
6161 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6162
6163 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6164 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6165 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6166 else
6167 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6168 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6169
6170 if (orig != data)
6171 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6172 }
6173
6174 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6175 bool enable)
6176 {
6177 u32 orig, data;
6178
6179 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6180
6181 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6182 data &= ~CLOCK_GATING_DIS;
6183 else
6184 data |= CLOCK_GATING_DIS;
6185
6186 if (orig != data)
6187 WREG32(HDP_HOST_PATH_CNTL, data);
6188 }
6189
6190 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6191 bool enable)
6192 {
6193 u32 orig, data;
6194
6195 orig = data = RREG32(HDP_MEM_POWER_LS);
6196
6197 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6198 data |= HDP_LS_ENABLE;
6199 else
6200 data &= ~HDP_LS_ENABLE;
6201
6202 if (orig != data)
6203 WREG32(HDP_MEM_POWER_LS, data);
6204 }
6205
6206 void cik_update_cg(struct radeon_device *rdev,
6207 u32 block, bool enable)
6208 {
6209
6210 if (block & RADEON_CG_BLOCK_GFX) {
6211 cik_enable_gui_idle_interrupt(rdev, false);
6212 /* order matters! */
6213 if (enable) {
6214 cik_enable_mgcg(rdev, true);
6215 cik_enable_cgcg(rdev, true);
6216 } else {
6217 cik_enable_cgcg(rdev, false);
6218 cik_enable_mgcg(rdev, false);
6219 }
6220 cik_enable_gui_idle_interrupt(rdev, true);
6221 }
6222
6223 if (block & RADEON_CG_BLOCK_MC) {
6224 if (!(rdev->flags & RADEON_IS_IGP)) {
6225 cik_enable_mc_mgcg(rdev, enable);
6226 cik_enable_mc_ls(rdev, enable);
6227 }
6228 }
6229
6230 if (block & RADEON_CG_BLOCK_SDMA) {
6231 cik_enable_sdma_mgcg(rdev, enable);
6232 cik_enable_sdma_mgls(rdev, enable);
6233 }
6234
6235 if (block & RADEON_CG_BLOCK_BIF) {
6236 cik_enable_bif_mgls(rdev, enable);
6237 }
6238
6239 if (block & RADEON_CG_BLOCK_UVD) {
6240 if (rdev->has_uvd)
6241 cik_enable_uvd_mgcg(rdev, enable);
6242 }
6243
6244 if (block & RADEON_CG_BLOCK_HDP) {
6245 cik_enable_hdp_mgcg(rdev, enable);
6246 cik_enable_hdp_ls(rdev, enable);
6247 }
6248
6249 if (block & RADEON_CG_BLOCK_VCE) {
6250 vce_v2_0_enable_mgcg(rdev, enable);
6251 }
6252 }
6253
6254 static void cik_init_cg(struct radeon_device *rdev)
6255 {
6256
6257 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6258
6259 if (rdev->has_uvd)
6260 si_init_uvd_internal_cg(rdev);
6261
6262 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6263 RADEON_CG_BLOCK_SDMA |
6264 RADEON_CG_BLOCK_BIF |
6265 RADEON_CG_BLOCK_UVD |
6266 RADEON_CG_BLOCK_HDP), true);
6267 }
6268
6269 static void cik_fini_cg(struct radeon_device *rdev)
6270 {
6271 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6272 RADEON_CG_BLOCK_SDMA |
6273 RADEON_CG_BLOCK_BIF |
6274 RADEON_CG_BLOCK_UVD |
6275 RADEON_CG_BLOCK_HDP), false);
6276
6277 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6278 }
6279
6280 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6281 bool enable)
6282 {
6283 u32 data, orig;
6284
6285 orig = data = RREG32(RLC_PG_CNTL);
6286 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6287 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6288 else
6289 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6290 if (orig != data)
6291 WREG32(RLC_PG_CNTL, data);
6292 }
6293
6294 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6295 bool enable)
6296 {
6297 u32 data, orig;
6298
6299 orig = data = RREG32(RLC_PG_CNTL);
6300 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6301 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6302 else
6303 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6304 if (orig != data)
6305 WREG32(RLC_PG_CNTL, data);
6306 }
6307
6308 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6309 {
6310 u32 data, orig;
6311
6312 orig = data = RREG32(RLC_PG_CNTL);
6313 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6314 data &= ~DISABLE_CP_PG;
6315 else
6316 data |= DISABLE_CP_PG;
6317 if (orig != data)
6318 WREG32(RLC_PG_CNTL, data);
6319 }
6320
6321 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6322 {
6323 u32 data, orig;
6324
6325 orig = data = RREG32(RLC_PG_CNTL);
6326 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6327 data &= ~DISABLE_GDS_PG;
6328 else
6329 data |= DISABLE_GDS_PG;
6330 if (orig != data)
6331 WREG32(RLC_PG_CNTL, data);
6332 }
6333
6334 #define CP_ME_TABLE_SIZE 96
6335 #define CP_ME_TABLE_OFFSET 2048
6336 #define CP_MEC_TABLE_OFFSET 4096
6337
6338 void cik_init_cp_pg_table(struct radeon_device *rdev)
6339 {
6340 const __be32 *fw_data;
6341 volatile u32 *dst_ptr;
6342 int me, i, max_me = 4;
6343 u32 bo_offset = 0;
6344 u32 table_offset;
6345
6346 if (rdev->family == CHIP_KAVERI)
6347 max_me = 5;
6348
6349 if (rdev->rlc.cp_table_ptr == NULL)
6350 return;
6351
6352 /* write the cp table buffer */
6353 dst_ptr = rdev->rlc.cp_table_ptr;
6354 for (me = 0; me < max_me; me++) {
6355 if (me == 0) {
6356 fw_data = (const __be32 *)rdev->ce_fw->data;
6357 table_offset = CP_ME_TABLE_OFFSET;
6358 } else if (me == 1) {
6359 fw_data = (const __be32 *)rdev->pfp_fw->data;
6360 table_offset = CP_ME_TABLE_OFFSET;
6361 } else if (me == 2) {
6362 fw_data = (const __be32 *)rdev->me_fw->data;
6363 table_offset = CP_ME_TABLE_OFFSET;
6364 } else {
6365 fw_data = (const __be32 *)rdev->mec_fw->data;
6366 table_offset = CP_MEC_TABLE_OFFSET;
6367 }
6368
6369 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6370 dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6371 }
6372 bo_offset += CP_ME_TABLE_SIZE;
6373 }
6374 }
6375
6376 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6377 bool enable)
6378 {
6379 u32 data, orig;
6380
6381 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6382 orig = data = RREG32(RLC_PG_CNTL);
6383 data |= GFX_PG_ENABLE;
6384 if (orig != data)
6385 WREG32(RLC_PG_CNTL, data);
6386
6387 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6388 data |= AUTO_PG_EN;
6389 if (orig != data)
6390 WREG32(RLC_AUTO_PG_CTRL, data);
6391 } else {
6392 orig = data = RREG32(RLC_PG_CNTL);
6393 data &= ~GFX_PG_ENABLE;
6394 if (orig != data)
6395 WREG32(RLC_PG_CNTL, data);
6396
6397 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6398 data &= ~AUTO_PG_EN;
6399 if (orig != data)
6400 WREG32(RLC_AUTO_PG_CTRL, data);
6401
6402 data = RREG32(DB_RENDER_CONTROL);
6403 }
6404 }
6405
6406 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6407 {
6408 u32 mask = 0, tmp, tmp1;
6409 int i;
6410
6411 cik_select_se_sh(rdev, se, sh);
6412 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6413 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6414 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6415
6416 tmp &= 0xffff0000;
6417
6418 tmp |= tmp1;
6419 tmp >>= 16;
6420
6421 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6422 mask <<= 1;
6423 mask |= 1;
6424 }
6425
6426 return (~tmp) & mask;
6427 }
6428
6429 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6430 {
6431 u32 i, j, k, active_cu_number = 0;
6432 u32 mask, counter, cu_bitmap;
6433 u32 tmp = 0;
6434
6435 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6436 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6437 mask = 1;
6438 cu_bitmap = 0;
6439 counter = 0;
6440 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6441 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6442 if (counter < 2)
6443 cu_bitmap |= mask;
6444 counter ++;
6445 }
6446 mask <<= 1;
6447 }
6448
6449 active_cu_number += counter;
6450 tmp |= (cu_bitmap << (i * 16 + j * 8));
6451 }
6452 }
6453
6454 WREG32(RLC_PG_AO_CU_MASK, tmp);
6455
6456 tmp = RREG32(RLC_MAX_PG_CU);
6457 tmp &= ~MAX_PU_CU_MASK;
6458 tmp |= MAX_PU_CU(active_cu_number);
6459 WREG32(RLC_MAX_PG_CU, tmp);
6460 }
6461
6462 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6463 bool enable)
6464 {
6465 u32 data, orig;
6466
6467 orig = data = RREG32(RLC_PG_CNTL);
6468 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6469 data |= STATIC_PER_CU_PG_ENABLE;
6470 else
6471 data &= ~STATIC_PER_CU_PG_ENABLE;
6472 if (orig != data)
6473 WREG32(RLC_PG_CNTL, data);
6474 }
6475
6476 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6477 bool enable)
6478 {
6479 u32 data, orig;
6480
6481 orig = data = RREG32(RLC_PG_CNTL);
6482 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6483 data |= DYN_PER_CU_PG_ENABLE;
6484 else
6485 data &= ~DYN_PER_CU_PG_ENABLE;
6486 if (orig != data)
6487 WREG32(RLC_PG_CNTL, data);
6488 }
6489
6490 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6491 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6492
6493 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6494 {
6495 u32 data, orig;
6496 u32 i;
6497
6498 if (rdev->rlc.cs_data) {
6499 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6500 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6501 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6502 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6503 } else {
6504 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6505 for (i = 0; i < 3; i++)
6506 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6507 }
6508 if (rdev->rlc.reg_list) {
6509 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6510 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6511 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6512 }
6513
6514 orig = data = RREG32(RLC_PG_CNTL);
6515 data |= GFX_PG_SRC;
6516 if (orig != data)
6517 WREG32(RLC_PG_CNTL, data);
6518
6519 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6520 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6521
6522 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6523 data &= ~IDLE_POLL_COUNT_MASK;
6524 data |= IDLE_POLL_COUNT(0x60);
6525 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6526
6527 data = 0x10101010;
6528 WREG32(RLC_PG_DELAY, data);
6529
6530 data = RREG32(RLC_PG_DELAY_2);
6531 data &= ~0xff;
6532 data |= 0x3;
6533 WREG32(RLC_PG_DELAY_2, data);
6534
6535 data = RREG32(RLC_AUTO_PG_CTRL);
6536 data &= ~GRBM_REG_SGIT_MASK;
6537 data |= GRBM_REG_SGIT(0x700);
6538 WREG32(RLC_AUTO_PG_CTRL, data);
6539
6540 }
6541
6542 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6543 {
6544 cik_enable_gfx_cgpg(rdev, enable);
6545 cik_enable_gfx_static_mgpg(rdev, enable);
6546 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6547 }
6548
6549 u32 cik_get_csb_size(struct radeon_device *rdev)
6550 {
6551 u32 count = 0;
6552 const struct cs_section_def *sect = NULL;
6553 const struct cs_extent_def *ext = NULL;
6554
6555 if (rdev->rlc.cs_data == NULL)
6556 return 0;
6557
6558 /* begin clear state */
6559 count += 2;
6560 /* context control state */
6561 count += 3;
6562
6563 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6564 for (ext = sect->section; ext->extent != NULL; ++ext) {
6565 if (sect->id == SECT_CONTEXT)
6566 count += 2 + ext->reg_count;
6567 else
6568 return 0;
6569 }
6570 }
6571 /* pa_sc_raster_config/pa_sc_raster_config1 */
6572 count += 4;
6573 /* end clear state */
6574 count += 2;
6575 /* clear state */
6576 count += 2;
6577
6578 return count;
6579 }
6580
6581 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6582 {
6583 u32 count = 0, i;
6584 const struct cs_section_def *sect = NULL;
6585 const struct cs_extent_def *ext = NULL;
6586
6587 if (rdev->rlc.cs_data == NULL)
6588 return;
6589 if (buffer == NULL)
6590 return;
6591
6592 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6593 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6594
6595 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6596 buffer[count++] = cpu_to_le32(0x80000000);
6597 buffer[count++] = cpu_to_le32(0x80000000);
6598
6599 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6600 for (ext = sect->section; ext->extent != NULL; ++ext) {
6601 if (sect->id == SECT_CONTEXT) {
6602 buffer[count++] =
6603 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6604 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6605 for (i = 0; i < ext->reg_count; i++)
6606 buffer[count++] = cpu_to_le32(ext->extent[i]);
6607 } else {
6608 return;
6609 }
6610 }
6611 }
6612
6613 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6614 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6615 switch (rdev->family) {
6616 case CHIP_BONAIRE:
6617 buffer[count++] = cpu_to_le32(0x16000012);
6618 buffer[count++] = cpu_to_le32(0x00000000);
6619 break;
6620 case CHIP_KAVERI:
6621 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6622 buffer[count++] = cpu_to_le32(0x00000000);
6623 break;
6624 case CHIP_KABINI:
6625 case CHIP_MULLINS:
6626 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6627 buffer[count++] = cpu_to_le32(0x00000000);
6628 break;
6629 case CHIP_HAWAII:
6630 buffer[count++] = cpu_to_le32(0x3a00161a);
6631 buffer[count++] = cpu_to_le32(0x0000002e);
6632 break;
6633 default:
6634 buffer[count++] = cpu_to_le32(0x00000000);
6635 buffer[count++] = cpu_to_le32(0x00000000);
6636 break;
6637 }
6638
6639 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6640 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6641
6642 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6643 buffer[count++] = cpu_to_le32(0);
6644 }
6645
6646 static void cik_init_pg(struct radeon_device *rdev)
6647 {
6648 if (rdev->pg_flags) {
6649 cik_enable_sck_slowdown_on_pu(rdev, true);
6650 cik_enable_sck_slowdown_on_pd(rdev, true);
6651 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6652 cik_init_gfx_cgpg(rdev);
6653 cik_enable_cp_pg(rdev, true);
6654 cik_enable_gds_pg(rdev, true);
6655 }
6656 cik_init_ao_cu_mask(rdev);
6657 cik_update_gfx_pg(rdev, true);
6658 }
6659 }
6660
6661 static void cik_fini_pg(struct radeon_device *rdev)
6662 {
6663 if (rdev->pg_flags) {
6664 cik_update_gfx_pg(rdev, false);
6665 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6666 cik_enable_cp_pg(rdev, false);
6667 cik_enable_gds_pg(rdev, false);
6668 }
6669 }
6670 }
6671
6672 /*
6673 * Interrupts
6674 * Starting with r6xx, interrupts are handled via a ring buffer.
6675 * Ring buffers are areas of GPU accessible memory that the GPU
6676 * writes interrupt vectors into and the host reads vectors out of.
6677 * There is a rptr (read pointer) that determines where the
6678 * host is currently reading, and a wptr (write pointer)
6679 * which determines where the GPU has written. When the
6680 * pointers are equal, the ring is idle. When the GPU
6681 * writes vectors to the ring buffer, it increments the
6682 * wptr. When there is an interrupt, the host then starts
6683 * fetching commands and processing them until the pointers are
6684 * equal again at which point it updates the rptr.
6685 */
6686
6687 /**
6688 * cik_enable_interrupts - Enable the interrupt ring buffer
6689 *
6690 * @rdev: radeon_device pointer
6691 *
6692 * Enable the interrupt ring buffer (CIK).
6693 */
6694 static void cik_enable_interrupts(struct radeon_device *rdev)
6695 {
6696 u32 ih_cntl = RREG32(IH_CNTL);
6697 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6698
6699 ih_cntl |= ENABLE_INTR;
6700 ih_rb_cntl |= IH_RB_ENABLE;
6701 WREG32(IH_CNTL, ih_cntl);
6702 WREG32(IH_RB_CNTL, ih_rb_cntl);
6703 rdev->ih.enabled = true;
6704 }
6705
6706 /**
6707 * cik_disable_interrupts - Disable the interrupt ring buffer
6708 *
6709 * @rdev: radeon_device pointer
6710 *
6711 * Disable the interrupt ring buffer (CIK).
6712 */
6713 static void cik_disable_interrupts(struct radeon_device *rdev)
6714 {
6715 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6716 u32 ih_cntl = RREG32(IH_CNTL);
6717
6718 ih_rb_cntl &= ~IH_RB_ENABLE;
6719 ih_cntl &= ~ENABLE_INTR;
6720 WREG32(IH_RB_CNTL, ih_rb_cntl);
6721 WREG32(IH_CNTL, ih_cntl);
6722 /* set rptr, wptr to 0 */
6723 WREG32(IH_RB_RPTR, 0);
6724 WREG32(IH_RB_WPTR, 0);
6725 rdev->ih.enabled = false;
6726 rdev->ih.rptr = 0;
6727 }
6728
6729 /**
6730 * cik_disable_interrupt_state - Disable all interrupt sources
6731 *
6732 * @rdev: radeon_device pointer
6733 *
6734 * Clear all interrupt enable bits used by the driver (CIK).
6735 */
6736 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6737 {
6738 u32 tmp;
6739
6740 /* gfx ring */
6741 tmp = RREG32(CP_INT_CNTL_RING0) &
6742 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6743 WREG32(CP_INT_CNTL_RING0, tmp);
6744 /* sdma */
6745 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6746 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6747 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6748 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6749 /* compute queues */
6750 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6751 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6752 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6753 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6754 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6755 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6756 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6757 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6758 /* grbm */
6759 WREG32(GRBM_INT_CNTL, 0);
6760 /* vline/vblank, etc. */
6761 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6762 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6763 if (rdev->num_crtc >= 4) {
6764 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6765 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6766 }
6767 if (rdev->num_crtc >= 6) {
6768 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6769 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6770 }
6771 /* pflip */
6772 if (rdev->num_crtc >= 2) {
6773 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6774 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6775 }
6776 if (rdev->num_crtc >= 4) {
6777 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6778 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6779 }
6780 if (rdev->num_crtc >= 6) {
6781 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6782 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6783 }
6784
6785 /* dac hotplug */
6786 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6787
6788 /* digital hotplug */
6789 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6790 WREG32(DC_HPD1_INT_CONTROL, tmp);
6791 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6792 WREG32(DC_HPD2_INT_CONTROL, tmp);
6793 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6794 WREG32(DC_HPD3_INT_CONTROL, tmp);
6795 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6796 WREG32(DC_HPD4_INT_CONTROL, tmp);
6797 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6798 WREG32(DC_HPD5_INT_CONTROL, tmp);
6799 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6800 WREG32(DC_HPD6_INT_CONTROL, tmp);
6801
6802 }
6803
6804 /**
6805 * cik_irq_init - init and enable the interrupt ring
6806 *
6807 * @rdev: radeon_device pointer
6808 *
6809 * Allocate a ring buffer for the interrupt controller,
6810 * enable the RLC, disable interrupts, enable the IH
6811 * ring buffer and enable it (CIK).
6812 * Called at device load and reume.
6813 * Returns 0 for success, errors for failure.
6814 */
6815 static int cik_irq_init(struct radeon_device *rdev)
6816 {
6817 int ret = 0;
6818 int rb_bufsz;
6819 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6820
6821 /* allocate ring */
6822 ret = r600_ih_ring_alloc(rdev);
6823 if (ret)
6824 return ret;
6825
6826 /* disable irqs */
6827 cik_disable_interrupts(rdev);
6828
6829 /* init rlc */
6830 ret = cik_rlc_resume(rdev);
6831 if (ret) {
6832 r600_ih_ring_fini(rdev);
6833 return ret;
6834 }
6835
6836 /* setup interrupt control */
6837 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6838 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6839 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6840 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6841 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6842 */
6843 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6844 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6845 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6846 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6847
6848 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6849 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6850
6851 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6852 IH_WPTR_OVERFLOW_CLEAR |
6853 (rb_bufsz << 1));
6854
6855 if (rdev->wb.enabled)
6856 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6857
6858 /* set the writeback address whether it's enabled or not */
6859 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6860 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6861
6862 WREG32(IH_RB_CNTL, ih_rb_cntl);
6863
6864 /* set rptr, wptr to 0 */
6865 WREG32(IH_RB_RPTR, 0);
6866 WREG32(IH_RB_WPTR, 0);
6867
6868 /* Default settings for IH_CNTL (disabled at first) */
6869 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6870 /* RPTR_REARM only works if msi's are enabled */
6871 if (rdev->msi_enabled)
6872 ih_cntl |= RPTR_REARM;
6873 WREG32(IH_CNTL, ih_cntl);
6874
6875 /* force the active interrupt state to all disabled */
6876 cik_disable_interrupt_state(rdev);
6877
6878 pci_set_master(rdev->pdev);
6879
6880 /* enable irqs */
6881 cik_enable_interrupts(rdev);
6882
6883 return ret;
6884 }
6885
6886 /**
6887 * cik_irq_set - enable/disable interrupt sources
6888 *
6889 * @rdev: radeon_device pointer
6890 *
6891 * Enable interrupt sources on the GPU (vblanks, hpd,
6892 * etc.) (CIK).
6893 * Returns 0 for success, errors for failure.
6894 */
6895 int cik_irq_set(struct radeon_device *rdev)
6896 {
6897 u32 cp_int_cntl;
6898 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6899 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6900 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6901 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6902 u32 grbm_int_cntl = 0;
6903 u32 dma_cntl, dma_cntl1;
6904 u32 thermal_int;
6905
6906 if (!rdev->irq.installed) {
6907 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6908 return -EINVAL;
6909 }
6910 /* don't enable anything if the ih is disabled */
6911 if (!rdev->ih.enabled) {
6912 cik_disable_interrupts(rdev);
6913 /* force the active interrupt state to all disabled */
6914 cik_disable_interrupt_state(rdev);
6915 return 0;
6916 }
6917
6918 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6919 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6920 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6921
6922 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6923 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6924 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6925 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6926 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6927 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6928
6929 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6930 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6931
6932 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6933 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6934 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6935 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6936 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6937 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6938 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6939 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6940
6941 if (rdev->flags & RADEON_IS_IGP)
6942 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6943 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6944 else
6945 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6946 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6947
6948 /* enable CP interrupts on all rings */
6949 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6950 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6951 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6952 }
6953 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6954 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6955 DRM_DEBUG("si_irq_set: sw int cp1\n");
6956 if (ring->me == 1) {
6957 switch (ring->pipe) {
6958 case 0:
6959 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6960 break;
6961 case 1:
6962 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6963 break;
6964 case 2:
6965 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6966 break;
6967 case 3:
6968 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6969 break;
6970 default:
6971 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6972 break;
6973 }
6974 } else if (ring->me == 2) {
6975 switch (ring->pipe) {
6976 case 0:
6977 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6978 break;
6979 case 1:
6980 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6981 break;
6982 case 2:
6983 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6984 break;
6985 case 3:
6986 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6987 break;
6988 default:
6989 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6990 break;
6991 }
6992 } else {
6993 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6994 }
6995 }
6996 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6997 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6998 DRM_DEBUG("si_irq_set: sw int cp2\n");
6999 if (ring->me == 1) {
7000 switch (ring->pipe) {
7001 case 0:
7002 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7003 break;
7004 case 1:
7005 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7006 break;
7007 case 2:
7008 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7009 break;
7010 case 3:
7011 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7012 break;
7013 default:
7014 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7015 break;
7016 }
7017 } else if (ring->me == 2) {
7018 switch (ring->pipe) {
7019 case 0:
7020 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7021 break;
7022 case 1:
7023 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7024 break;
7025 case 2:
7026 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7027 break;
7028 case 3:
7029 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7030 break;
7031 default:
7032 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7033 break;
7034 }
7035 } else {
7036 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7037 }
7038 }
7039
7040 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7041 DRM_DEBUG("cik_irq_set: sw int dma\n");
7042 dma_cntl |= TRAP_ENABLE;
7043 }
7044
7045 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7046 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7047 dma_cntl1 |= TRAP_ENABLE;
7048 }
7049
7050 if (rdev->irq.crtc_vblank_int[0] ||
7051 atomic_read(&rdev->irq.pflip[0])) {
7052 DRM_DEBUG("cik_irq_set: vblank 0\n");
7053 crtc1 |= VBLANK_INTERRUPT_MASK;
7054 }
7055 if (rdev->irq.crtc_vblank_int[1] ||
7056 atomic_read(&rdev->irq.pflip[1])) {
7057 DRM_DEBUG("cik_irq_set: vblank 1\n");
7058 crtc2 |= VBLANK_INTERRUPT_MASK;
7059 }
7060 if (rdev->irq.crtc_vblank_int[2] ||
7061 atomic_read(&rdev->irq.pflip[2])) {
7062 DRM_DEBUG("cik_irq_set: vblank 2\n");
7063 crtc3 |= VBLANK_INTERRUPT_MASK;
7064 }
7065 if (rdev->irq.crtc_vblank_int[3] ||
7066 atomic_read(&rdev->irq.pflip[3])) {
7067 DRM_DEBUG("cik_irq_set: vblank 3\n");
7068 crtc4 |= VBLANK_INTERRUPT_MASK;
7069 }
7070 if (rdev->irq.crtc_vblank_int[4] ||
7071 atomic_read(&rdev->irq.pflip[4])) {
7072 DRM_DEBUG("cik_irq_set: vblank 4\n");
7073 crtc5 |= VBLANK_INTERRUPT_MASK;
7074 }
7075 if (rdev->irq.crtc_vblank_int[5] ||
7076 atomic_read(&rdev->irq.pflip[5])) {
7077 DRM_DEBUG("cik_irq_set: vblank 5\n");
7078 crtc6 |= VBLANK_INTERRUPT_MASK;
7079 }
7080 if (rdev->irq.hpd[0]) {
7081 DRM_DEBUG("cik_irq_set: hpd 1\n");
7082 hpd1 |= DC_HPDx_INT_EN;
7083 }
7084 if (rdev->irq.hpd[1]) {
7085 DRM_DEBUG("cik_irq_set: hpd 2\n");
7086 hpd2 |= DC_HPDx_INT_EN;
7087 }
7088 if (rdev->irq.hpd[2]) {
7089 DRM_DEBUG("cik_irq_set: hpd 3\n");
7090 hpd3 |= DC_HPDx_INT_EN;
7091 }
7092 if (rdev->irq.hpd[3]) {
7093 DRM_DEBUG("cik_irq_set: hpd 4\n");
7094 hpd4 |= DC_HPDx_INT_EN;
7095 }
7096 if (rdev->irq.hpd[4]) {
7097 DRM_DEBUG("cik_irq_set: hpd 5\n");
7098 hpd5 |= DC_HPDx_INT_EN;
7099 }
7100 if (rdev->irq.hpd[5]) {
7101 DRM_DEBUG("cik_irq_set: hpd 6\n");
7102 hpd6 |= DC_HPDx_INT_EN;
7103 }
7104
7105 if (rdev->irq.dpm_thermal) {
7106 DRM_DEBUG("dpm thermal\n");
7107 if (rdev->flags & RADEON_IS_IGP)
7108 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7109 else
7110 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7111 }
7112
7113 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7114
7115 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7116 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7117
7118 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7119 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7120 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7121 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7122 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7123 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7124 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7125 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7126
7127 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7128
7129 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7130 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7131 if (rdev->num_crtc >= 4) {
7132 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7133 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7134 }
7135 if (rdev->num_crtc >= 6) {
7136 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7137 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7138 }
7139
7140 if (rdev->num_crtc >= 2) {
7141 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7142 GRPH_PFLIP_INT_MASK);
7143 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7144 GRPH_PFLIP_INT_MASK);
7145 }
7146 if (rdev->num_crtc >= 4) {
7147 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7148 GRPH_PFLIP_INT_MASK);
7149 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7150 GRPH_PFLIP_INT_MASK);
7151 }
7152 if (rdev->num_crtc >= 6) {
7153 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7154 GRPH_PFLIP_INT_MASK);
7155 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7156 GRPH_PFLIP_INT_MASK);
7157 }
7158
7159 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7160 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7161 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7162 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7163 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7164 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7165
7166 if (rdev->flags & RADEON_IS_IGP)
7167 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7168 else
7169 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7170
7171 return 0;
7172 }
7173
7174 /**
7175 * cik_irq_ack - ack interrupt sources
7176 *
7177 * @rdev: radeon_device pointer
7178 *
7179 * Ack interrupt sources on the GPU (vblanks, hpd,
7180 * etc.) (CIK). Certain interrupts sources are sw
7181 * generated and do not require an explicit ack.
7182 */
7183 static inline void cik_irq_ack(struct radeon_device *rdev)
7184 {
7185 u32 tmp;
7186
7187 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7188 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7189 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7190 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7191 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7192 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7193 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7194
7195 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7196 EVERGREEN_CRTC0_REGISTER_OFFSET);
7197 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7198 EVERGREEN_CRTC1_REGISTER_OFFSET);
7199 if (rdev->num_crtc >= 4) {
7200 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7201 EVERGREEN_CRTC2_REGISTER_OFFSET);
7202 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7203 EVERGREEN_CRTC3_REGISTER_OFFSET);
7204 }
7205 if (rdev->num_crtc >= 6) {
7206 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7207 EVERGREEN_CRTC4_REGISTER_OFFSET);
7208 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7209 EVERGREEN_CRTC5_REGISTER_OFFSET);
7210 }
7211
7212 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7213 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7214 GRPH_PFLIP_INT_CLEAR);
7215 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7216 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7217 GRPH_PFLIP_INT_CLEAR);
7218 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7219 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7220 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7221 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7222 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7223 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7224 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7225 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7226
7227 if (rdev->num_crtc >= 4) {
7228 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7229 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7230 GRPH_PFLIP_INT_CLEAR);
7231 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7232 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7233 GRPH_PFLIP_INT_CLEAR);
7234 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7235 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7236 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7237 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7238 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7239 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7240 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7241 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7242 }
7243
7244 if (rdev->num_crtc >= 6) {
7245 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7246 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7247 GRPH_PFLIP_INT_CLEAR);
7248 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7249 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7250 GRPH_PFLIP_INT_CLEAR);
7251 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7252 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7253 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7254 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7255 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7256 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7257 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7258 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7259 }
7260
7261 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7262 tmp = RREG32(DC_HPD1_INT_CONTROL);
7263 tmp |= DC_HPDx_INT_ACK;
7264 WREG32(DC_HPD1_INT_CONTROL, tmp);
7265 }
7266 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7267 tmp = RREG32(DC_HPD2_INT_CONTROL);
7268 tmp |= DC_HPDx_INT_ACK;
7269 WREG32(DC_HPD2_INT_CONTROL, tmp);
7270 }
7271 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7272 tmp = RREG32(DC_HPD3_INT_CONTROL);
7273 tmp |= DC_HPDx_INT_ACK;
7274 WREG32(DC_HPD3_INT_CONTROL, tmp);
7275 }
7276 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7277 tmp = RREG32(DC_HPD4_INT_CONTROL);
7278 tmp |= DC_HPDx_INT_ACK;
7279 WREG32(DC_HPD4_INT_CONTROL, tmp);
7280 }
7281 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7282 tmp = RREG32(DC_HPD5_INT_CONTROL);
7283 tmp |= DC_HPDx_INT_ACK;
7284 WREG32(DC_HPD5_INT_CONTROL, tmp);
7285 }
7286 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7287 tmp = RREG32(DC_HPD5_INT_CONTROL);
7288 tmp |= DC_HPDx_INT_ACK;
7289 WREG32(DC_HPD6_INT_CONTROL, tmp);
7290 }
7291 }
7292
7293 /**
7294 * cik_irq_disable - disable interrupts
7295 *
7296 * @rdev: radeon_device pointer
7297 *
7298 * Disable interrupts on the hw (CIK).
7299 */
7300 static void cik_irq_disable(struct radeon_device *rdev)
7301 {
7302 cik_disable_interrupts(rdev);
7303 /* Wait and acknowledge irq */
7304 mdelay(1);
7305 cik_irq_ack(rdev);
7306 cik_disable_interrupt_state(rdev);
7307 }
7308
7309 /**
7310 * cik_irq_disable - disable interrupts for suspend
7311 *
7312 * @rdev: radeon_device pointer
7313 *
7314 * Disable interrupts and stop the RLC (CIK).
7315 * Used for suspend.
7316 */
7317 static void cik_irq_suspend(struct radeon_device *rdev)
7318 {
7319 cik_irq_disable(rdev);
7320 cik_rlc_stop(rdev);
7321 }
7322
7323 /**
7324 * cik_irq_fini - tear down interrupt support
7325 *
7326 * @rdev: radeon_device pointer
7327 *
7328 * Disable interrupts on the hw and free the IH ring
7329 * buffer (CIK).
7330 * Used for driver unload.
7331 */
7332 static void cik_irq_fini(struct radeon_device *rdev)
7333 {
7334 cik_irq_suspend(rdev);
7335 r600_ih_ring_fini(rdev);
7336 }
7337
7338 /**
7339 * cik_get_ih_wptr - get the IH ring buffer wptr
7340 *
7341 * @rdev: radeon_device pointer
7342 *
7343 * Get the IH ring buffer wptr from either the register
7344 * or the writeback memory buffer (CIK). Also check for
7345 * ring buffer overflow and deal with it.
7346 * Used by cik_irq_process().
7347 * Returns the value of the wptr.
7348 */
7349 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7350 {
7351 u32 wptr, tmp;
7352
7353 if (rdev->wb.enabled)
7354 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7355 else
7356 wptr = RREG32(IH_RB_WPTR);
7357
7358 if (wptr & RB_OVERFLOW) {
7359 /* When a ring buffer overflow happen start parsing interrupt
7360 * from the last not overwritten vector (wptr + 16). Hopefully
7361 * this should allow us to catchup.
7362 */
7363 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7364 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7365 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7366 tmp = RREG32(IH_RB_CNTL);
7367 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7368 WREG32(IH_RB_CNTL, tmp);
7369 }
7370 return (wptr & rdev->ih.ptr_mask);
7371 }
7372
7373 /* CIK IV Ring
7374 * Each IV ring entry is 128 bits:
7375 * [7:0] - interrupt source id
7376 * [31:8] - reserved
7377 * [59:32] - interrupt source data
7378 * [63:60] - reserved
7379 * [71:64] - RINGID
7380 * CP:
7381 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7382 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7383 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7384 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7385 * PIPE_ID - ME0 0=3D
7386 * - ME1&2 compute dispatcher (4 pipes each)
7387 * SDMA:
7388 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7389 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7390 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7391 * [79:72] - VMID
7392 * [95:80] - PASID
7393 * [127:96] - reserved
7394 */
7395 /**
7396 * cik_irq_process - interrupt handler
7397 *
7398 * @rdev: radeon_device pointer
7399 *
7400 * Interrupt hander (CIK). Walk the IH ring,
7401 * ack interrupts and schedule work to handle
7402 * interrupt events.
7403 * Returns irq process return code.
7404 */
7405 int cik_irq_process(struct radeon_device *rdev)
7406 {
7407 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7408 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7409 u32 wptr;
7410 u32 rptr;
7411 u32 src_id, src_data, ring_id;
7412 u8 me_id, pipe_id, queue_id;
7413 u32 ring_index;
7414 bool queue_hotplug = false;
7415 bool queue_reset = false;
7416 u32 addr, status, mc_client;
7417 bool queue_thermal = false;
7418
7419 if (!rdev->ih.enabled || rdev->shutdown)
7420 return IRQ_NONE;
7421
7422 wptr = cik_get_ih_wptr(rdev);
7423
7424 restart_ih:
7425 /* is somebody else already processing irqs? */
7426 if (atomic_xchg(&rdev->ih.lock, 1))
7427 return IRQ_NONE;
7428
7429 rptr = rdev->ih.rptr;
7430 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7431
7432 /* Order reading of wptr vs. reading of IH ring data */
7433 rmb();
7434
7435 /* display interrupts */
7436 cik_irq_ack(rdev);
7437
7438 while (rptr != wptr) {
7439 /* wptr/rptr are in bytes! */
7440 ring_index = rptr / 4;
7441 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7442 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7443 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7444
7445 switch (src_id) {
7446 case 1: /* D1 vblank/vline */
7447 switch (src_data) {
7448 case 0: /* D1 vblank */
7449 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7450 if (rdev->irq.crtc_vblank_int[0]) {
7451 drm_handle_vblank(rdev->ddev, 0);
7452 rdev->pm.vblank_sync = true;
7453 wake_up(&rdev->irq.vblank_queue);
7454 }
7455 if (atomic_read(&rdev->irq.pflip[0]))
7456 radeon_crtc_handle_vblank(rdev, 0);
7457 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7458 DRM_DEBUG("IH: D1 vblank\n");
7459 }
7460 break;
7461 case 1: /* D1 vline */
7462 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7463 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7464 DRM_DEBUG("IH: D1 vline\n");
7465 }
7466 break;
7467 default:
7468 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7469 break;
7470 }
7471 break;
7472 case 2: /* D2 vblank/vline */
7473 switch (src_data) {
7474 case 0: /* D2 vblank */
7475 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7476 if (rdev->irq.crtc_vblank_int[1]) {
7477 drm_handle_vblank(rdev->ddev, 1);
7478 rdev->pm.vblank_sync = true;
7479 wake_up(&rdev->irq.vblank_queue);
7480 }
7481 if (atomic_read(&rdev->irq.pflip[1]))
7482 radeon_crtc_handle_vblank(rdev, 1);
7483 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7484 DRM_DEBUG("IH: D2 vblank\n");
7485 }
7486 break;
7487 case 1: /* D2 vline */
7488 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7489 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7490 DRM_DEBUG("IH: D2 vline\n");
7491 }
7492 break;
7493 default:
7494 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7495 break;
7496 }
7497 break;
7498 case 3: /* D3 vblank/vline */
7499 switch (src_data) {
7500 case 0: /* D3 vblank */
7501 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7502 if (rdev->irq.crtc_vblank_int[2]) {
7503 drm_handle_vblank(rdev->ddev, 2);
7504 rdev->pm.vblank_sync = true;
7505 wake_up(&rdev->irq.vblank_queue);
7506 }
7507 if (atomic_read(&rdev->irq.pflip[2]))
7508 radeon_crtc_handle_vblank(rdev, 2);
7509 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7510 DRM_DEBUG("IH: D3 vblank\n");
7511 }
7512 break;
7513 case 1: /* D3 vline */
7514 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7515 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7516 DRM_DEBUG("IH: D3 vline\n");
7517 }
7518 break;
7519 default:
7520 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7521 break;
7522 }
7523 break;
7524 case 4: /* D4 vblank/vline */
7525 switch (src_data) {
7526 case 0: /* D4 vblank */
7527 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7528 if (rdev->irq.crtc_vblank_int[3]) {
7529 drm_handle_vblank(rdev->ddev, 3);
7530 rdev->pm.vblank_sync = true;
7531 wake_up(&rdev->irq.vblank_queue);
7532 }
7533 if (atomic_read(&rdev->irq.pflip[3]))
7534 radeon_crtc_handle_vblank(rdev, 3);
7535 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7536 DRM_DEBUG("IH: D4 vblank\n");
7537 }
7538 break;
7539 case 1: /* D4 vline */
7540 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7541 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7542 DRM_DEBUG("IH: D4 vline\n");
7543 }
7544 break;
7545 default:
7546 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7547 break;
7548 }
7549 break;
7550 case 5: /* D5 vblank/vline */
7551 switch (src_data) {
7552 case 0: /* D5 vblank */
7553 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7554 if (rdev->irq.crtc_vblank_int[4]) {
7555 drm_handle_vblank(rdev->ddev, 4);
7556 rdev->pm.vblank_sync = true;
7557 wake_up(&rdev->irq.vblank_queue);
7558 }
7559 if (atomic_read(&rdev->irq.pflip[4]))
7560 radeon_crtc_handle_vblank(rdev, 4);
7561 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7562 DRM_DEBUG("IH: D5 vblank\n");
7563 }
7564 break;
7565 case 1: /* D5 vline */
7566 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7567 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7568 DRM_DEBUG("IH: D5 vline\n");
7569 }
7570 break;
7571 default:
7572 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7573 break;
7574 }
7575 break;
7576 case 6: /* D6 vblank/vline */
7577 switch (src_data) {
7578 case 0: /* D6 vblank */
7579 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7580 if (rdev->irq.crtc_vblank_int[5]) {
7581 drm_handle_vblank(rdev->ddev, 5);
7582 rdev->pm.vblank_sync = true;
7583 wake_up(&rdev->irq.vblank_queue);
7584 }
7585 if (atomic_read(&rdev->irq.pflip[5]))
7586 radeon_crtc_handle_vblank(rdev, 5);
7587 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7588 DRM_DEBUG("IH: D6 vblank\n");
7589 }
7590 break;
7591 case 1: /* D6 vline */
7592 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7593 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7594 DRM_DEBUG("IH: D6 vline\n");
7595 }
7596 break;
7597 default:
7598 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7599 break;
7600 }
7601 break;
7602 case 8: /* D1 page flip */
7603 case 10: /* D2 page flip */
7604 case 12: /* D3 page flip */
7605 case 14: /* D4 page flip */
7606 case 16: /* D5 page flip */
7607 case 18: /* D6 page flip */
7608 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7609 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7610 break;
7611 case 42: /* HPD hotplug */
7612 switch (src_data) {
7613 case 0:
7614 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7615 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7616 queue_hotplug = true;
7617 DRM_DEBUG("IH: HPD1\n");
7618 }
7619 break;
7620 case 1:
7621 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7622 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7623 queue_hotplug = true;
7624 DRM_DEBUG("IH: HPD2\n");
7625 }
7626 break;
7627 case 2:
7628 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7629 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7630 queue_hotplug = true;
7631 DRM_DEBUG("IH: HPD3\n");
7632 }
7633 break;
7634 case 3:
7635 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7636 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7637 queue_hotplug = true;
7638 DRM_DEBUG("IH: HPD4\n");
7639 }
7640 break;
7641 case 4:
7642 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7643 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7644 queue_hotplug = true;
7645 DRM_DEBUG("IH: HPD5\n");
7646 }
7647 break;
7648 case 5:
7649 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7650 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7651 queue_hotplug = true;
7652 DRM_DEBUG("IH: HPD6\n");
7653 }
7654 break;
7655 default:
7656 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7657 break;
7658 }
7659 break;
7660 case 124: /* UVD */
7661 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7662 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7663 break;
7664 case 146:
7665 case 147:
7666 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7667 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7668 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7669 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7670 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7671 addr);
7672 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7673 status);
7674 cik_vm_decode_fault(rdev, status, addr, mc_client);
7675 /* reset addr and status */
7676 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7677 break;
7678 case 167: /* VCE */
7679 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7680 switch (src_data) {
7681 case 0:
7682 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7683 break;
7684 case 1:
7685 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7686 break;
7687 default:
7688 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7689 break;
7690 }
7691 break;
7692 case 176: /* GFX RB CP_INT */
7693 case 177: /* GFX IB CP_INT */
7694 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7695 break;
7696 case 181: /* CP EOP event */
7697 DRM_DEBUG("IH: CP EOP\n");
7698 /* XXX check the bitfield order! */
7699 me_id = (ring_id & 0x60) >> 5;
7700 pipe_id = (ring_id & 0x18) >> 3;
7701 queue_id = (ring_id & 0x7) >> 0;
7702 switch (me_id) {
7703 case 0:
7704 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7705 break;
7706 case 1:
7707 case 2:
7708 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7709 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7710 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7711 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7712 break;
7713 }
7714 break;
7715 case 184: /* CP Privileged reg access */
7716 DRM_ERROR("Illegal register access in command stream\n");
7717 /* XXX check the bitfield order! */
7718 me_id = (ring_id & 0x60) >> 5;
7719 pipe_id = (ring_id & 0x18) >> 3;
7720 queue_id = (ring_id & 0x7) >> 0;
7721 switch (me_id) {
7722 case 0:
7723 /* This results in a full GPU reset, but all we need to do is soft
7724 * reset the CP for gfx
7725 */
7726 queue_reset = true;
7727 break;
7728 case 1:
7729 /* XXX compute */
7730 queue_reset = true;
7731 break;
7732 case 2:
7733 /* XXX compute */
7734 queue_reset = true;
7735 break;
7736 }
7737 break;
7738 case 185: /* CP Privileged inst */
7739 DRM_ERROR("Illegal instruction in command stream\n");
7740 /* XXX check the bitfield order! */
7741 me_id = (ring_id & 0x60) >> 5;
7742 pipe_id = (ring_id & 0x18) >> 3;
7743 queue_id = (ring_id & 0x7) >> 0;
7744 switch (me_id) {
7745 case 0:
7746 /* This results in a full GPU reset, but all we need to do is soft
7747 * reset the CP for gfx
7748 */
7749 queue_reset = true;
7750 break;
7751 case 1:
7752 /* XXX compute */
7753 queue_reset = true;
7754 break;
7755 case 2:
7756 /* XXX compute */
7757 queue_reset = true;
7758 break;
7759 }
7760 break;
7761 case 224: /* SDMA trap event */
7762 /* XXX check the bitfield order! */
7763 me_id = (ring_id & 0x3) >> 0;
7764 queue_id = (ring_id & 0xc) >> 2;
7765 DRM_DEBUG("IH: SDMA trap\n");
7766 switch (me_id) {
7767 case 0:
7768 switch (queue_id) {
7769 case 0:
7770 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7771 break;
7772 case 1:
7773 /* XXX compute */
7774 break;
7775 case 2:
7776 /* XXX compute */
7777 break;
7778 }
7779 break;
7780 case 1:
7781 switch (queue_id) {
7782 case 0:
7783 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7784 break;
7785 case 1:
7786 /* XXX compute */
7787 break;
7788 case 2:
7789 /* XXX compute */
7790 break;
7791 }
7792 break;
7793 }
7794 break;
7795 case 230: /* thermal low to high */
7796 DRM_DEBUG("IH: thermal low to high\n");
7797 rdev->pm.dpm.thermal.high_to_low = false;
7798 queue_thermal = true;
7799 break;
7800 case 231: /* thermal high to low */
7801 DRM_DEBUG("IH: thermal high to low\n");
7802 rdev->pm.dpm.thermal.high_to_low = true;
7803 queue_thermal = true;
7804 break;
7805 case 233: /* GUI IDLE */
7806 DRM_DEBUG("IH: GUI idle\n");
7807 break;
7808 case 241: /* SDMA Privileged inst */
7809 case 247: /* SDMA Privileged inst */
7810 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7811 /* XXX check the bitfield order! */
7812 me_id = (ring_id & 0x3) >> 0;
7813 queue_id = (ring_id & 0xc) >> 2;
7814 switch (me_id) {
7815 case 0:
7816 switch (queue_id) {
7817 case 0:
7818 queue_reset = true;
7819 break;
7820 case 1:
7821 /* XXX compute */
7822 queue_reset = true;
7823 break;
7824 case 2:
7825 /* XXX compute */
7826 queue_reset = true;
7827 break;
7828 }
7829 break;
7830 case 1:
7831 switch (queue_id) {
7832 case 0:
7833 queue_reset = true;
7834 break;
7835 case 1:
7836 /* XXX compute */
7837 queue_reset = true;
7838 break;
7839 case 2:
7840 /* XXX compute */
7841 queue_reset = true;
7842 break;
7843 }
7844 break;
7845 }
7846 break;
7847 default:
7848 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7849 break;
7850 }
7851
7852 /* wptr/rptr are in bytes! */
7853 rptr += 16;
7854 rptr &= rdev->ih.ptr_mask;
7855 }
7856 if (queue_hotplug)
7857 schedule_work(&rdev->hotplug_work);
7858 if (queue_reset)
7859 schedule_work(&rdev->reset_work);
7860 if (queue_thermal)
7861 schedule_work(&rdev->pm.dpm.thermal.work);
7862 rdev->ih.rptr = rptr;
7863 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7864 atomic_set(&rdev->ih.lock, 0);
7865
7866 /* make sure wptr hasn't changed while processing */
7867 wptr = cik_get_ih_wptr(rdev);
7868 if (wptr != rptr)
7869 goto restart_ih;
7870
7871 return IRQ_HANDLED;
7872 }
7873
7874 /*
7875 * startup/shutdown callbacks
7876 */
7877 /**
7878 * cik_startup - program the asic to a functional state
7879 *
7880 * @rdev: radeon_device pointer
7881 *
7882 * Programs the asic to a functional state (CIK).
7883 * Called by cik_init() and cik_resume().
7884 * Returns 0 for success, error for failure.
7885 */
7886 static int cik_startup(struct radeon_device *rdev)
7887 {
7888 struct radeon_ring *ring;
7889 int r;
7890
7891 /* enable pcie gen2/3 link */
7892 cik_pcie_gen3_enable(rdev);
7893 /* enable aspm */
7894 cik_program_aspm(rdev);
7895
7896 /* scratch needs to be initialized before MC */
7897 r = r600_vram_scratch_init(rdev);
7898 if (r)
7899 return r;
7900
7901 cik_mc_program(rdev);
7902
7903 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7904 r = ci_mc_load_microcode(rdev);
7905 if (r) {
7906 DRM_ERROR("Failed to load MC firmware!\n");
7907 return r;
7908 }
7909 }
7910
7911 r = cik_pcie_gart_enable(rdev);
7912 if (r)
7913 return r;
7914 cik_gpu_init(rdev);
7915
7916 /* allocate rlc buffers */
7917 if (rdev->flags & RADEON_IS_IGP) {
7918 if (rdev->family == CHIP_KAVERI) {
7919 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7920 rdev->rlc.reg_list_size =
7921 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7922 } else {
7923 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7924 rdev->rlc.reg_list_size =
7925 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7926 }
7927 }
7928 rdev->rlc.cs_data = ci_cs_data;
7929 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7930 r = sumo_rlc_init(rdev);
7931 if (r) {
7932 DRM_ERROR("Failed to init rlc BOs!\n");
7933 return r;
7934 }
7935
7936 /* allocate wb buffer */
7937 r = radeon_wb_init(rdev);
7938 if (r)
7939 return r;
7940
7941 /* allocate mec buffers */
7942 r = cik_mec_init(rdev);
7943 if (r) {
7944 DRM_ERROR("Failed to init MEC BOs!\n");
7945 return r;
7946 }
7947
7948 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7949 if (r) {
7950 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7951 return r;
7952 }
7953
7954 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7955 if (r) {
7956 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7957 return r;
7958 }
7959
7960 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7961 if (r) {
7962 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7963 return r;
7964 }
7965
7966 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7967 if (r) {
7968 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7969 return r;
7970 }
7971
7972 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7973 if (r) {
7974 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7975 return r;
7976 }
7977
7978 r = radeon_uvd_resume(rdev);
7979 if (!r) {
7980 r = uvd_v4_2_resume(rdev);
7981 if (!r) {
7982 r = radeon_fence_driver_start_ring(rdev,
7983 R600_RING_TYPE_UVD_INDEX);
7984 if (r)
7985 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7986 }
7987 }
7988 if (r)
7989 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7990
7991 r = radeon_vce_resume(rdev);
7992 if (!r) {
7993 r = vce_v2_0_resume(rdev);
7994 if (!r)
7995 r = radeon_fence_driver_start_ring(rdev,
7996 TN_RING_TYPE_VCE1_INDEX);
7997 if (!r)
7998 r = radeon_fence_driver_start_ring(rdev,
7999 TN_RING_TYPE_VCE2_INDEX);
8000 }
8001 if (r) {
8002 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8003 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8004 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8005 }
8006
8007 /* Enable IRQ */
8008 if (!rdev->irq.installed) {
8009 r = radeon_irq_kms_init(rdev);
8010 if (r)
8011 return r;
8012 }
8013
8014 r = cik_irq_init(rdev);
8015 if (r) {
8016 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8017 radeon_irq_kms_fini(rdev);
8018 return r;
8019 }
8020 cik_irq_set(rdev);
8021
8022 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8023 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8024 PACKET3(PACKET3_NOP, 0x3FFF));
8025 if (r)
8026 return r;
8027
8028 /* set up the compute queues */
8029 /* type-2 packets are deprecated on MEC, use type-3 instead */
8030 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8031 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8032 PACKET3(PACKET3_NOP, 0x3FFF));
8033 if (r)
8034 return r;
8035 ring->me = 1; /* first MEC */
8036 ring->pipe = 0; /* first pipe */
8037 ring->queue = 0; /* first queue */
8038 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8039
8040 /* type-2 packets are deprecated on MEC, use type-3 instead */
8041 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8042 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8043 PACKET3(PACKET3_NOP, 0x3FFF));
8044 if (r)
8045 return r;
8046 /* dGPU only have 1 MEC */
8047 ring->me = 1; /* first MEC */
8048 ring->pipe = 0; /* first pipe */
8049 ring->queue = 1; /* second queue */
8050 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8051
8052 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8053 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8054 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8055 if (r)
8056 return r;
8057
8058 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8059 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8060 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8061 if (r)
8062 return r;
8063
8064 r = cik_cp_resume(rdev);
8065 if (r)
8066 return r;
8067
8068 r = cik_sdma_resume(rdev);
8069 if (r)
8070 return r;
8071
8072 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8073 if (ring->ring_size) {
8074 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8075 RADEON_CP_PACKET2);
8076 if (!r)
8077 r = uvd_v1_0_init(rdev);
8078 if (r)
8079 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8080 }
8081
8082 r = -ENOENT;
8083
8084 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8085 if (ring->ring_size)
8086 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8087 VCE_CMD_NO_OP);
8088
8089 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8090 if (ring->ring_size)
8091 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8092 VCE_CMD_NO_OP);
8093
8094 if (!r)
8095 r = vce_v1_0_init(rdev);
8096 else if (r != -ENOENT)
8097 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8098
8099 r = radeon_ib_pool_init(rdev);
8100 if (r) {
8101 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8102 return r;
8103 }
8104
8105 r = radeon_vm_manager_init(rdev);
8106 if (r) {
8107 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8108 return r;
8109 }
8110
8111 r = dce6_audio_init(rdev);
8112 if (r)
8113 return r;
8114
8115 return 0;
8116 }
8117
8118 /**
8119 * cik_resume - resume the asic to a functional state
8120 *
8121 * @rdev: radeon_device pointer
8122 *
8123 * Programs the asic to a functional state (CIK).
8124 * Called at resume.
8125 * Returns 0 for success, error for failure.
8126 */
8127 int cik_resume(struct radeon_device *rdev)
8128 {
8129 int r;
8130
8131 /* post card */
8132 atom_asic_init(rdev->mode_info.atom_context);
8133
8134 /* init golden registers */
8135 cik_init_golden_registers(rdev);
8136
8137 if (rdev->pm.pm_method == PM_METHOD_DPM)
8138 radeon_pm_resume(rdev);
8139
8140 rdev->accel_working = true;
8141 r = cik_startup(rdev);
8142 if (r) {
8143 DRM_ERROR("cik startup failed on resume\n");
8144 rdev->accel_working = false;
8145 return r;
8146 }
8147
8148 return r;
8149
8150 }
8151
8152 /**
8153 * cik_suspend - suspend the asic
8154 *
8155 * @rdev: radeon_device pointer
8156 *
8157 * Bring the chip into a state suitable for suspend (CIK).
8158 * Called at suspend.
8159 * Returns 0 for success.
8160 */
8161 int cik_suspend(struct radeon_device *rdev)
8162 {
8163 radeon_pm_suspend(rdev);
8164 dce6_audio_fini(rdev);
8165 radeon_vm_manager_fini(rdev);
8166 cik_cp_enable(rdev, false);
8167 cik_sdma_enable(rdev, false);
8168 uvd_v1_0_fini(rdev);
8169 radeon_uvd_suspend(rdev);
8170 radeon_vce_suspend(rdev);
8171 cik_fini_pg(rdev);
8172 cik_fini_cg(rdev);
8173 cik_irq_suspend(rdev);
8174 radeon_wb_disable(rdev);
8175 cik_pcie_gart_disable(rdev);
8176 return 0;
8177 }
8178
8179 /* Plan is to move initialization in that function and use
8180 * helper function so that radeon_device_init pretty much
8181 * do nothing more than calling asic specific function. This
8182 * should also allow to remove a bunch of callback function
8183 * like vram_info.
8184 */
8185 /**
8186 * cik_init - asic specific driver and hw init
8187 *
8188 * @rdev: radeon_device pointer
8189 *
8190 * Setup asic specific driver variables and program the hw
8191 * to a functional state (CIK).
8192 * Called at driver startup.
8193 * Returns 0 for success, errors for failure.
8194 */
8195 int cik_init(struct radeon_device *rdev)
8196 {
8197 struct radeon_ring *ring;
8198 int r;
8199
8200 /* Read BIOS */
8201 if (!radeon_get_bios(rdev)) {
8202 if (ASIC_IS_AVIVO(rdev))
8203 return -EINVAL;
8204 }
8205 /* Must be an ATOMBIOS */
8206 if (!rdev->is_atom_bios) {
8207 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8208 return -EINVAL;
8209 }
8210 r = radeon_atombios_init(rdev);
8211 if (r)
8212 return r;
8213
8214 /* Post card if necessary */
8215 if (!radeon_card_posted(rdev)) {
8216 if (!rdev->bios) {
8217 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8218 return -EINVAL;
8219 }
8220 DRM_INFO("GPU not posted. posting now...\n");
8221 atom_asic_init(rdev->mode_info.atom_context);
8222 }
8223 /* init golden registers */
8224 cik_init_golden_registers(rdev);
8225 /* Initialize scratch registers */
8226 cik_scratch_init(rdev);
8227 /* Initialize surface registers */
8228 radeon_surface_init(rdev);
8229 /* Initialize clocks */
8230 radeon_get_clock_info(rdev->ddev);
8231
8232 /* Fence driver */
8233 r = radeon_fence_driver_init(rdev);
8234 if (r)
8235 return r;
8236
8237 /* initialize memory controller */
8238 r = cik_mc_init(rdev);
8239 if (r)
8240 return r;
8241 /* Memory manager */
8242 r = radeon_bo_init(rdev);
8243 if (r)
8244 return r;
8245
8246 if (rdev->flags & RADEON_IS_IGP) {
8247 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8248 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8249 r = cik_init_microcode(rdev);
8250 if (r) {
8251 DRM_ERROR("Failed to load firmware!\n");
8252 return r;
8253 }
8254 }
8255 } else {
8256 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8257 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8258 !rdev->mc_fw) {
8259 r = cik_init_microcode(rdev);
8260 if (r) {
8261 DRM_ERROR("Failed to load firmware!\n");
8262 return r;
8263 }
8264 }
8265 }
8266
8267 /* Initialize power management */
8268 radeon_pm_init(rdev);
8269
8270 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8271 ring->ring_obj = NULL;
8272 r600_ring_init(rdev, ring, 1024 * 1024);
8273
8274 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8275 ring->ring_obj = NULL;
8276 r600_ring_init(rdev, ring, 1024 * 1024);
8277 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8278 if (r)
8279 return r;
8280
8281 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8282 ring->ring_obj = NULL;
8283 r600_ring_init(rdev, ring, 1024 * 1024);
8284 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8285 if (r)
8286 return r;
8287
8288 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8289 ring->ring_obj = NULL;
8290 r600_ring_init(rdev, ring, 256 * 1024);
8291
8292 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8293 ring->ring_obj = NULL;
8294 r600_ring_init(rdev, ring, 256 * 1024);
8295
8296 r = radeon_uvd_init(rdev);
8297 if (!r) {
8298 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8299 ring->ring_obj = NULL;
8300 r600_ring_init(rdev, ring, 4096);
8301 }
8302
8303 r = radeon_vce_init(rdev);
8304 if (!r) {
8305 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8306 ring->ring_obj = NULL;
8307 r600_ring_init(rdev, ring, 4096);
8308
8309 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8310 ring->ring_obj = NULL;
8311 r600_ring_init(rdev, ring, 4096);
8312 }
8313
8314 rdev->ih.ring_obj = NULL;
8315 r600_ih_ring_init(rdev, 64 * 1024);
8316
8317 r = r600_pcie_gart_init(rdev);
8318 if (r)
8319 return r;
8320
8321 rdev->accel_working = true;
8322 r = cik_startup(rdev);
8323 if (r) {
8324 dev_err(rdev->dev, "disabling GPU acceleration\n");
8325 cik_cp_fini(rdev);
8326 cik_sdma_fini(rdev);
8327 cik_irq_fini(rdev);
8328 sumo_rlc_fini(rdev);
8329 cik_mec_fini(rdev);
8330 radeon_wb_fini(rdev);
8331 radeon_ib_pool_fini(rdev);
8332 radeon_vm_manager_fini(rdev);
8333 radeon_irq_kms_fini(rdev);
8334 cik_pcie_gart_fini(rdev);
8335 rdev->accel_working = false;
8336 }
8337
8338 /* Don't start up if the MC ucode is missing.
8339 * The default clocks and voltages before the MC ucode
8340 * is loaded are not suffient for advanced operations.
8341 */
8342 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8343 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8344 return -EINVAL;
8345 }
8346
8347 return 0;
8348 }
8349
8350 /**
8351 * cik_fini - asic specific driver and hw fini
8352 *
8353 * @rdev: radeon_device pointer
8354 *
8355 * Tear down the asic specific driver variables and program the hw
8356 * to an idle state (CIK).
8357 * Called at driver unload.
8358 */
8359 void cik_fini(struct radeon_device *rdev)
8360 {
8361 radeon_pm_fini(rdev);
8362 cik_cp_fini(rdev);
8363 cik_sdma_fini(rdev);
8364 cik_fini_pg(rdev);
8365 cik_fini_cg(rdev);
8366 cik_irq_fini(rdev);
8367 sumo_rlc_fini(rdev);
8368 cik_mec_fini(rdev);
8369 radeon_wb_fini(rdev);
8370 radeon_vm_manager_fini(rdev);
8371 radeon_ib_pool_fini(rdev);
8372 radeon_irq_kms_fini(rdev);
8373 uvd_v1_0_fini(rdev);
8374 radeon_uvd_fini(rdev);
8375 radeon_vce_fini(rdev);
8376 cik_pcie_gart_fini(rdev);
8377 r600_vram_scratch_fini(rdev);
8378 radeon_gem_fini(rdev);
8379 radeon_fence_driver_fini(rdev);
8380 radeon_bo_fini(rdev);
8381 radeon_atombios_fini(rdev);
8382 kfree(rdev->bios);
8383 rdev->bios = NULL;
8384 }
8385
8386 void dce8_program_fmt(struct drm_encoder *encoder)
8387 {
8388 struct drm_device *dev = encoder->dev;
8389 struct radeon_device *rdev = dev->dev_private;
8390 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8391 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8392 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8393 int bpc = 0;
8394 u32 tmp = 0;
8395 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8396
8397 if (connector) {
8398 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8399 bpc = radeon_get_monitor_bpc(connector);
8400 dither = radeon_connector->dither;
8401 }
8402
8403 /* LVDS/eDP FMT is set up by atom */
8404 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8405 return;
8406
8407 /* not needed for analog */
8408 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8409 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8410 return;
8411
8412 if (bpc == 0)
8413 return;
8414
8415 switch (bpc) {
8416 case 6:
8417 if (dither == RADEON_FMT_DITHER_ENABLE)
8418 /* XXX sort out optimal dither settings */
8419 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8420 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8421 else
8422 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8423 break;
8424 case 8:
8425 if (dither == RADEON_FMT_DITHER_ENABLE)
8426 /* XXX sort out optimal dither settings */
8427 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8428 FMT_RGB_RANDOM_ENABLE |
8429 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8430 else
8431 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8432 break;
8433 case 10:
8434 if (dither == RADEON_FMT_DITHER_ENABLE)
8435 /* XXX sort out optimal dither settings */
8436 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8437 FMT_RGB_RANDOM_ENABLE |
8438 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8439 else
8440 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8441 break;
8442 default:
8443 /* not needed */
8444 break;
8445 }
8446
8447 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8448 }
8449
8450 /* display watermark setup */
8451 /**
8452 * dce8_line_buffer_adjust - Set up the line buffer
8453 *
8454 * @rdev: radeon_device pointer
8455 * @radeon_crtc: the selected display controller
8456 * @mode: the current display mode on the selected display
8457 * controller
8458 *
8459 * Setup up the line buffer allocation for
8460 * the selected display controller (CIK).
8461 * Returns the line buffer size in pixels.
8462 */
8463 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8464 struct radeon_crtc *radeon_crtc,
8465 struct drm_display_mode *mode)
8466 {
8467 u32 tmp, buffer_alloc, i;
8468 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8469 /*
8470 * Line Buffer Setup
8471 * There are 6 line buffers, one for each display controllers.
8472 * There are 3 partitions per LB. Select the number of partitions
8473 * to enable based on the display width. For display widths larger
8474 * than 4096, you need use to use 2 display controllers and combine
8475 * them using the stereo blender.
8476 */
8477 if (radeon_crtc->base.enabled && mode) {
8478 if (mode->crtc_hdisplay < 1920) {
8479 tmp = 1;
8480 buffer_alloc = 2;
8481 } else if (mode->crtc_hdisplay < 2560) {
8482 tmp = 2;
8483 buffer_alloc = 2;
8484 } else if (mode->crtc_hdisplay < 4096) {
8485 tmp = 0;
8486 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8487 } else {
8488 DRM_DEBUG_KMS("Mode too big for LB!\n");
8489 tmp = 0;
8490 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8491 }
8492 } else {
8493 tmp = 1;
8494 buffer_alloc = 0;
8495 }
8496
8497 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8498 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8499
8500 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8501 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8502 for (i = 0; i < rdev->usec_timeout; i++) {
8503 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8504 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8505 break;
8506 udelay(1);
8507 }
8508
8509 if (radeon_crtc->base.enabled && mode) {
8510 switch (tmp) {
8511 case 0:
8512 default:
8513 return 4096 * 2;
8514 case 1:
8515 return 1920 * 2;
8516 case 2:
8517 return 2560 * 2;
8518 }
8519 }
8520
8521 /* controller not enabled, so no lb used */
8522 return 0;
8523 }
8524
8525 /**
8526 * cik_get_number_of_dram_channels - get the number of dram channels
8527 *
8528 * @rdev: radeon_device pointer
8529 *
8530 * Look up the number of video ram channels (CIK).
8531 * Used for display watermark bandwidth calculations
8532 * Returns the number of dram channels
8533 */
8534 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8535 {
8536 u32 tmp = RREG32(MC_SHARED_CHMAP);
8537
8538 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8539 case 0:
8540 default:
8541 return 1;
8542 case 1:
8543 return 2;
8544 case 2:
8545 return 4;
8546 case 3:
8547 return 8;
8548 case 4:
8549 return 3;
8550 case 5:
8551 return 6;
8552 case 6:
8553 return 10;
8554 case 7:
8555 return 12;
8556 case 8:
8557 return 16;
8558 }
8559 }
8560
8561 struct dce8_wm_params {
8562 u32 dram_channels; /* number of dram channels */
8563 u32 yclk; /* bandwidth per dram data pin in kHz */
8564 u32 sclk; /* engine clock in kHz */
8565 u32 disp_clk; /* display clock in kHz */
8566 u32 src_width; /* viewport width */
8567 u32 active_time; /* active display time in ns */
8568 u32 blank_time; /* blank time in ns */
8569 bool interlaced; /* mode is interlaced */
8570 fixed20_12 vsc; /* vertical scale ratio */
8571 u32 num_heads; /* number of active crtcs */
8572 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8573 u32 lb_size; /* line buffer allocated to pipe */
8574 u32 vtaps; /* vertical scaler taps */
8575 };
8576
8577 /**
8578 * dce8_dram_bandwidth - get the dram bandwidth
8579 *
8580 * @wm: watermark calculation data
8581 *
8582 * Calculate the raw dram bandwidth (CIK).
8583 * Used for display watermark bandwidth calculations
8584 * Returns the dram bandwidth in MBytes/s
8585 */
8586 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8587 {
8588 /* Calculate raw DRAM Bandwidth */
8589 fixed20_12 dram_efficiency; /* 0.7 */
8590 fixed20_12 yclk, dram_channels, bandwidth;
8591 fixed20_12 a;
8592
8593 a.full = dfixed_const(1000);
8594 yclk.full = dfixed_const(wm->yclk);
8595 yclk.full = dfixed_div(yclk, a);
8596 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8597 a.full = dfixed_const(10);
8598 dram_efficiency.full = dfixed_const(7);
8599 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8600 bandwidth.full = dfixed_mul(dram_channels, yclk);
8601 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8602
8603 return dfixed_trunc(bandwidth);
8604 }
8605
8606 /**
8607 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8608 *
8609 * @wm: watermark calculation data
8610 *
8611 * Calculate the dram bandwidth used for display (CIK).
8612 * Used for display watermark bandwidth calculations
8613 * Returns the dram bandwidth for display in MBytes/s
8614 */
8615 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8616 {
8617 /* Calculate DRAM Bandwidth and the part allocated to display. */
8618 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8619 fixed20_12 yclk, dram_channels, bandwidth;
8620 fixed20_12 a;
8621
8622 a.full = dfixed_const(1000);
8623 yclk.full = dfixed_const(wm->yclk);
8624 yclk.full = dfixed_div(yclk, a);
8625 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8626 a.full = dfixed_const(10);
8627 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8628 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8629 bandwidth.full = dfixed_mul(dram_channels, yclk);
8630 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8631
8632 return dfixed_trunc(bandwidth);
8633 }
8634
8635 /**
8636 * dce8_data_return_bandwidth - get the data return bandwidth
8637 *
8638 * @wm: watermark calculation data
8639 *
8640 * Calculate the data return bandwidth used for display (CIK).
8641 * Used for display watermark bandwidth calculations
8642 * Returns the data return bandwidth in MBytes/s
8643 */
8644 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8645 {
8646 /* Calculate the display Data return Bandwidth */
8647 fixed20_12 return_efficiency; /* 0.8 */
8648 fixed20_12 sclk, bandwidth;
8649 fixed20_12 a;
8650
8651 a.full = dfixed_const(1000);
8652 sclk.full = dfixed_const(wm->sclk);
8653 sclk.full = dfixed_div(sclk, a);
8654 a.full = dfixed_const(10);
8655 return_efficiency.full = dfixed_const(8);
8656 return_efficiency.full = dfixed_div(return_efficiency, a);
8657 a.full = dfixed_const(32);
8658 bandwidth.full = dfixed_mul(a, sclk);
8659 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8660
8661 return dfixed_trunc(bandwidth);
8662 }
8663
8664 /**
8665 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8666 *
8667 * @wm: watermark calculation data
8668 *
8669 * Calculate the dmif bandwidth used for display (CIK).
8670 * Used for display watermark bandwidth calculations
8671 * Returns the dmif bandwidth in MBytes/s
8672 */
8673 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8674 {
8675 /* Calculate the DMIF Request Bandwidth */
8676 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8677 fixed20_12 disp_clk, bandwidth;
8678 fixed20_12 a, b;
8679
8680 a.full = dfixed_const(1000);
8681 disp_clk.full = dfixed_const(wm->disp_clk);
8682 disp_clk.full = dfixed_div(disp_clk, a);
8683 a.full = dfixed_const(32);
8684 b.full = dfixed_mul(a, disp_clk);
8685
8686 a.full = dfixed_const(10);
8687 disp_clk_request_efficiency.full = dfixed_const(8);
8688 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8689
8690 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8691
8692 return dfixed_trunc(bandwidth);
8693 }
8694
8695 /**
8696 * dce8_available_bandwidth - get the min available bandwidth
8697 *
8698 * @wm: watermark calculation data
8699 *
8700 * Calculate the min available bandwidth used for display (CIK).
8701 * Used for display watermark bandwidth calculations
8702 * Returns the min available bandwidth in MBytes/s
8703 */
8704 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8705 {
8706 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8707 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8708 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8709 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8710
8711 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8712 }
8713
8714 /**
8715 * dce8_average_bandwidth - get the average available bandwidth
8716 *
8717 * @wm: watermark calculation data
8718 *
8719 * Calculate the average available bandwidth used for display (CIK).
8720 * Used for display watermark bandwidth calculations
8721 * Returns the average available bandwidth in MBytes/s
8722 */
8723 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8724 {
8725 /* Calculate the display mode Average Bandwidth
8726 * DisplayMode should contain the source and destination dimensions,
8727 * timing, etc.
8728 */
8729 fixed20_12 bpp;
8730 fixed20_12 line_time;
8731 fixed20_12 src_width;
8732 fixed20_12 bandwidth;
8733 fixed20_12 a;
8734
8735 a.full = dfixed_const(1000);
8736 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8737 line_time.full = dfixed_div(line_time, a);
8738 bpp.full = dfixed_const(wm->bytes_per_pixel);
8739 src_width.full = dfixed_const(wm->src_width);
8740 bandwidth.full = dfixed_mul(src_width, bpp);
8741 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8742 bandwidth.full = dfixed_div(bandwidth, line_time);
8743
8744 return dfixed_trunc(bandwidth);
8745 }
8746
8747 /**
8748 * dce8_latency_watermark - get the latency watermark
8749 *
8750 * @wm: watermark calculation data
8751 *
8752 * Calculate the latency watermark (CIK).
8753 * Used for display watermark bandwidth calculations
8754 * Returns the latency watermark in ns
8755 */
8756 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8757 {
8758 /* First calculate the latency in ns */
8759 u32 mc_latency = 2000; /* 2000 ns. */
8760 u32 available_bandwidth = dce8_available_bandwidth(wm);
8761 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8762 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8763 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8764 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8765 (wm->num_heads * cursor_line_pair_return_time);
8766 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8767 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8768 u32 tmp, dmif_size = 12288;
8769 fixed20_12 a, b, c;
8770
8771 if (wm->num_heads == 0)
8772 return 0;
8773
8774 a.full = dfixed_const(2);
8775 b.full = dfixed_const(1);
8776 if ((wm->vsc.full > a.full) ||
8777 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8778 (wm->vtaps >= 5) ||
8779 ((wm->vsc.full >= a.full) && wm->interlaced))
8780 max_src_lines_per_dst_line = 4;
8781 else
8782 max_src_lines_per_dst_line = 2;
8783
8784 a.full = dfixed_const(available_bandwidth);
8785 b.full = dfixed_const(wm->num_heads);
8786 a.full = dfixed_div(a, b);
8787
8788 b.full = dfixed_const(mc_latency + 512);
8789 c.full = dfixed_const(wm->disp_clk);
8790 b.full = dfixed_div(b, c);
8791
8792 c.full = dfixed_const(dmif_size);
8793 b.full = dfixed_div(c, b);
8794
8795 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8796
8797 b.full = dfixed_const(1000);
8798 c.full = dfixed_const(wm->disp_clk);
8799 b.full = dfixed_div(c, b);
8800 c.full = dfixed_const(wm->bytes_per_pixel);
8801 b.full = dfixed_mul(b, c);
8802
8803 lb_fill_bw = min(tmp, dfixed_trunc(b));
8804
8805 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8806 b.full = dfixed_const(1000);
8807 c.full = dfixed_const(lb_fill_bw);
8808 b.full = dfixed_div(c, b);
8809 a.full = dfixed_div(a, b);
8810 line_fill_time = dfixed_trunc(a);
8811
8812 if (line_fill_time < wm->active_time)
8813 return latency;
8814 else
8815 return latency + (line_fill_time - wm->active_time);
8816
8817 }
8818
8819 /**
8820 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8821 * average and available dram bandwidth
8822 *
8823 * @wm: watermark calculation data
8824 *
8825 * Check if the display average bandwidth fits in the display
8826 * dram bandwidth (CIK).
8827 * Used for display watermark bandwidth calculations
8828 * Returns true if the display fits, false if not.
8829 */
8830 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8831 {
8832 if (dce8_average_bandwidth(wm) <=
8833 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8834 return true;
8835 else
8836 return false;
8837 }
8838
8839 /**
8840 * dce8_average_bandwidth_vs_available_bandwidth - check
8841 * average and available bandwidth
8842 *
8843 * @wm: watermark calculation data
8844 *
8845 * Check if the display average bandwidth fits in the display
8846 * available bandwidth (CIK).
8847 * Used for display watermark bandwidth calculations
8848 * Returns true if the display fits, false if not.
8849 */
8850 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8851 {
8852 if (dce8_average_bandwidth(wm) <=
8853 (dce8_available_bandwidth(wm) / wm->num_heads))
8854 return true;
8855 else
8856 return false;
8857 }
8858
8859 /**
8860 * dce8_check_latency_hiding - check latency hiding
8861 *
8862 * @wm: watermark calculation data
8863 *
8864 * Check latency hiding (CIK).
8865 * Used for display watermark bandwidth calculations
8866 * Returns true if the display fits, false if not.
8867 */
8868 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8869 {
8870 u32 lb_partitions = wm->lb_size / wm->src_width;
8871 u32 line_time = wm->active_time + wm->blank_time;
8872 u32 latency_tolerant_lines;
8873 u32 latency_hiding;
8874 fixed20_12 a;
8875
8876 a.full = dfixed_const(1);
8877 if (wm->vsc.full > a.full)
8878 latency_tolerant_lines = 1;
8879 else {
8880 if (lb_partitions <= (wm->vtaps + 1))
8881 latency_tolerant_lines = 1;
8882 else
8883 latency_tolerant_lines = 2;
8884 }
8885
8886 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8887
8888 if (dce8_latency_watermark(wm) <= latency_hiding)
8889 return true;
8890 else
8891 return false;
8892 }
8893
8894 /**
8895 * dce8_program_watermarks - program display watermarks
8896 *
8897 * @rdev: radeon_device pointer
8898 * @radeon_crtc: the selected display controller
8899 * @lb_size: line buffer size
8900 * @num_heads: number of display controllers in use
8901 *
8902 * Calculate and program the display watermarks for the
8903 * selected display controller (CIK).
8904 */
8905 static void dce8_program_watermarks(struct radeon_device *rdev,
8906 struct radeon_crtc *radeon_crtc,
8907 u32 lb_size, u32 num_heads)
8908 {
8909 struct drm_display_mode *mode = &radeon_crtc->base.mode;
8910 struct dce8_wm_params wm_low, wm_high;
8911 u32 pixel_period;
8912 u32 line_time = 0;
8913 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8914 u32 tmp, wm_mask;
8915
8916 if (radeon_crtc->base.enabled && num_heads && mode) {
8917 pixel_period = 1000000 / (u32)mode->clock;
8918 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8919
8920 /* watermark for high clocks */
8921 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8922 rdev->pm.dpm_enabled) {
8923 wm_high.yclk =
8924 radeon_dpm_get_mclk(rdev, false) * 10;
8925 wm_high.sclk =
8926 radeon_dpm_get_sclk(rdev, false) * 10;
8927 } else {
8928 wm_high.yclk = rdev->pm.current_mclk * 10;
8929 wm_high.sclk = rdev->pm.current_sclk * 10;
8930 }
8931
8932 wm_high.disp_clk = mode->clock;
8933 wm_high.src_width = mode->crtc_hdisplay;
8934 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8935 wm_high.blank_time = line_time - wm_high.active_time;
8936 wm_high.interlaced = false;
8937 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8938 wm_high.interlaced = true;
8939 wm_high.vsc = radeon_crtc->vsc;
8940 wm_high.vtaps = 1;
8941 if (radeon_crtc->rmx_type != RMX_OFF)
8942 wm_high.vtaps = 2;
8943 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8944 wm_high.lb_size = lb_size;
8945 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8946 wm_high.num_heads = num_heads;
8947
8948 /* set for high clocks */
8949 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8950
8951 /* possibly force display priority to high */
8952 /* should really do this at mode validation time... */
8953 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8954 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8955 !dce8_check_latency_hiding(&wm_high) ||
8956 (rdev->disp_priority == 2)) {
8957 DRM_DEBUG_KMS("force priority to high\n");
8958 }
8959
8960 /* watermark for low clocks */
8961 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8962 rdev->pm.dpm_enabled) {
8963 wm_low.yclk =
8964 radeon_dpm_get_mclk(rdev, true) * 10;
8965 wm_low.sclk =
8966 radeon_dpm_get_sclk(rdev, true) * 10;
8967 } else {
8968 wm_low.yclk = rdev->pm.current_mclk * 10;
8969 wm_low.sclk = rdev->pm.current_sclk * 10;
8970 }
8971
8972 wm_low.disp_clk = mode->clock;
8973 wm_low.src_width = mode->crtc_hdisplay;
8974 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8975 wm_low.blank_time = line_time - wm_low.active_time;
8976 wm_low.interlaced = false;
8977 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8978 wm_low.interlaced = true;
8979 wm_low.vsc = radeon_crtc->vsc;
8980 wm_low.vtaps = 1;
8981 if (radeon_crtc->rmx_type != RMX_OFF)
8982 wm_low.vtaps = 2;
8983 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8984 wm_low.lb_size = lb_size;
8985 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8986 wm_low.num_heads = num_heads;
8987
8988 /* set for low clocks */
8989 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8990
8991 /* possibly force display priority to high */
8992 /* should really do this at mode validation time... */
8993 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8994 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8995 !dce8_check_latency_hiding(&wm_low) ||
8996 (rdev->disp_priority == 2)) {
8997 DRM_DEBUG_KMS("force priority to high\n");
8998 }
8999 }
9000
9001 /* select wm A */
9002 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9003 tmp = wm_mask;
9004 tmp &= ~LATENCY_WATERMARK_MASK(3);
9005 tmp |= LATENCY_WATERMARK_MASK(1);
9006 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9007 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9008 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9009 LATENCY_HIGH_WATERMARK(line_time)));
9010 /* select wm B */
9011 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9012 tmp &= ~LATENCY_WATERMARK_MASK(3);
9013 tmp |= LATENCY_WATERMARK_MASK(2);
9014 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9015 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9016 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9017 LATENCY_HIGH_WATERMARK(line_time)));
9018 /* restore original selection */
9019 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9020
9021 /* save values for DPM */
9022 radeon_crtc->line_time = line_time;
9023 radeon_crtc->wm_high = latency_watermark_a;
9024 radeon_crtc->wm_low = latency_watermark_b;
9025 }
9026
9027 /**
9028 * dce8_bandwidth_update - program display watermarks
9029 *
9030 * @rdev: radeon_device pointer
9031 *
9032 * Calculate and program the display watermarks and line
9033 * buffer allocation (CIK).
9034 */
9035 void dce8_bandwidth_update(struct radeon_device *rdev)
9036 {
9037 struct drm_display_mode *mode = NULL;
9038 u32 num_heads = 0, lb_size;
9039 int i;
9040
9041 radeon_update_display_priority(rdev);
9042
9043 for (i = 0; i < rdev->num_crtc; i++) {
9044 if (rdev->mode_info.crtcs[i]->base.enabled)
9045 num_heads++;
9046 }
9047 for (i = 0; i < rdev->num_crtc; i++) {
9048 mode = &rdev->mode_info.crtcs[i]->base.mode;
9049 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9050 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9051 }
9052 }
9053
9054 /**
9055 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9056 *
9057 * @rdev: radeon_device pointer
9058 *
9059 * Fetches a GPU clock counter snapshot (SI).
9060 * Returns the 64 bit clock counter snapshot.
9061 */
9062 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9063 {
9064 uint64_t clock;
9065
9066 mutex_lock(&rdev->gpu_clock_mutex);
9067 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9068 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9069 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9070 mutex_unlock(&rdev->gpu_clock_mutex);
9071 return clock;
9072 }
9073
9074 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9075 u32 cntl_reg, u32 status_reg)
9076 {
9077 int r, i;
9078 struct atom_clock_dividers dividers;
9079 uint32_t tmp;
9080
9081 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9082 clock, false, &dividers);
9083 if (r)
9084 return r;
9085
9086 tmp = RREG32_SMC(cntl_reg);
9087 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9088 tmp |= dividers.post_divider;
9089 WREG32_SMC(cntl_reg, tmp);
9090
9091 for (i = 0; i < 100; i++) {
9092 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9093 break;
9094 mdelay(10);
9095 }
9096 if (i == 100)
9097 return -ETIMEDOUT;
9098
9099 return 0;
9100 }
9101
9102 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9103 {
9104 int r = 0;
9105
9106 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9107 if (r)
9108 return r;
9109
9110 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9111 return r;
9112 }
9113
9114 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9115 {
9116 int r, i;
9117 struct atom_clock_dividers dividers;
9118 u32 tmp;
9119
9120 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9121 ecclk, false, &dividers);
9122 if (r)
9123 return r;
9124
9125 for (i = 0; i < 100; i++) {
9126 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9127 break;
9128 mdelay(10);
9129 }
9130 if (i == 100)
9131 return -ETIMEDOUT;
9132
9133 tmp = RREG32_SMC(CG_ECLK_CNTL);
9134 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9135 tmp |= dividers.post_divider;
9136 WREG32_SMC(CG_ECLK_CNTL, tmp);
9137
9138 for (i = 0; i < 100; i++) {
9139 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9140 break;
9141 mdelay(10);
9142 }
9143 if (i == 100)
9144 return -ETIMEDOUT;
9145
9146 return 0;
9147 }
9148
9149 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9150 {
9151 struct pci_dev *root = rdev->pdev->bus->self;
9152 int bridge_pos, gpu_pos;
9153 u32 speed_cntl, mask, current_data_rate;
9154 int ret, i;
9155 u16 tmp16;
9156
9157 if (radeon_pcie_gen2 == 0)
9158 return;
9159
9160 if (rdev->flags & RADEON_IS_IGP)
9161 return;
9162
9163 if (!(rdev->flags & RADEON_IS_PCIE))
9164 return;
9165
9166 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9167 if (ret != 0)
9168 return;
9169
9170 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9171 return;
9172
9173 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9174 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9175 LC_CURRENT_DATA_RATE_SHIFT;
9176 if (mask & DRM_PCIE_SPEED_80) {
9177 if (current_data_rate == 2) {
9178 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9179 return;
9180 }
9181 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9182 } else if (mask & DRM_PCIE_SPEED_50) {
9183 if (current_data_rate == 1) {
9184 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9185 return;
9186 }
9187 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9188 }
9189
9190 bridge_pos = pci_pcie_cap(root);
9191 if (!bridge_pos)
9192 return;
9193
9194 gpu_pos = pci_pcie_cap(rdev->pdev);
9195 if (!gpu_pos)
9196 return;
9197
9198 if (mask & DRM_PCIE_SPEED_80) {
9199 /* re-try equalization if gen3 is not already enabled */
9200 if (current_data_rate != 2) {
9201 u16 bridge_cfg, gpu_cfg;
9202 u16 bridge_cfg2, gpu_cfg2;
9203 u32 max_lw, current_lw, tmp;
9204
9205 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9206 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9207
9208 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9209 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9210
9211 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9212 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9213
9214 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9215 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9216 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9217
9218 if (current_lw < max_lw) {
9219 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9220 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9221 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9222 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9223 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9224 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9225 }
9226 }
9227
9228 for (i = 0; i < 10; i++) {
9229 /* check status */
9230 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9231 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9232 break;
9233
9234 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9235 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9236
9237 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9238 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9239
9240 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9241 tmp |= LC_SET_QUIESCE;
9242 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9243
9244 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9245 tmp |= LC_REDO_EQ;
9246 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9247
9248 mdelay(100);
9249
9250 /* linkctl */
9251 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9252 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9253 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9254 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9255
9256 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9257 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9258 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9259 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9260
9261 /* linkctl2 */
9262 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9263 tmp16 &= ~((1 << 4) | (7 << 9));
9264 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9265 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9266
9267 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9268 tmp16 &= ~((1 << 4) | (7 << 9));
9269 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9270 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9271
9272 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9273 tmp &= ~LC_SET_QUIESCE;
9274 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9275 }
9276 }
9277 }
9278
9279 /* set the link speed */
9280 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9281 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9282 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9283
9284 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9285 tmp16 &= ~0xf;
9286 if (mask & DRM_PCIE_SPEED_80)
9287 tmp16 |= 3; /* gen3 */
9288 else if (mask & DRM_PCIE_SPEED_50)
9289 tmp16 |= 2; /* gen2 */
9290 else
9291 tmp16 |= 1; /* gen1 */
9292 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9293
9294 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9295 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9296 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9297
9298 for (i = 0; i < rdev->usec_timeout; i++) {
9299 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9300 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9301 break;
9302 udelay(1);
9303 }
9304 }
9305
9306 static void cik_program_aspm(struct radeon_device *rdev)
9307 {
9308 u32 data, orig;
9309 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9310 bool disable_clkreq = false;
9311
9312 if (radeon_aspm == 0)
9313 return;
9314
9315 /* XXX double check IGPs */
9316 if (rdev->flags & RADEON_IS_IGP)
9317 return;
9318
9319 if (!(rdev->flags & RADEON_IS_PCIE))
9320 return;
9321
9322 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9323 data &= ~LC_XMIT_N_FTS_MASK;
9324 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9325 if (orig != data)
9326 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9327
9328 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9329 data |= LC_GO_TO_RECOVERY;
9330 if (orig != data)
9331 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9332
9333 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9334 data |= P_IGNORE_EDB_ERR;
9335 if (orig != data)
9336 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9337
9338 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9339 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9340 data |= LC_PMI_TO_L1_DIS;
9341 if (!disable_l0s)
9342 data |= LC_L0S_INACTIVITY(7);
9343
9344 if (!disable_l1) {
9345 data |= LC_L1_INACTIVITY(7);
9346 data &= ~LC_PMI_TO_L1_DIS;
9347 if (orig != data)
9348 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9349
9350 if (!disable_plloff_in_l1) {
9351 bool clk_req_support;
9352
9353 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9354 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9355 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9356 if (orig != data)
9357 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9358
9359 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9360 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9361 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9362 if (orig != data)
9363 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9364
9365 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9366 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9367 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9368 if (orig != data)
9369 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9370
9371 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9372 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9373 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9374 if (orig != data)
9375 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9376
9377 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9378 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9379 data |= LC_DYN_LANES_PWR_STATE(3);
9380 if (orig != data)
9381 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9382
9383 if (!disable_clkreq) {
9384 struct pci_dev *root = rdev->pdev->bus->self;
9385 u32 lnkcap;
9386
9387 clk_req_support = false;
9388 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9389 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9390 clk_req_support = true;
9391 } else {
9392 clk_req_support = false;
9393 }
9394
9395 if (clk_req_support) {
9396 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9397 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9398 if (orig != data)
9399 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9400
9401 orig = data = RREG32_SMC(THM_CLK_CNTL);
9402 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9403 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9404 if (orig != data)
9405 WREG32_SMC(THM_CLK_CNTL, data);
9406
9407 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9408 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9409 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9410 if (orig != data)
9411 WREG32_SMC(MISC_CLK_CTRL, data);
9412
9413 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9414 data &= ~BCLK_AS_XCLK;
9415 if (orig != data)
9416 WREG32_SMC(CG_CLKPIN_CNTL, data);
9417
9418 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9419 data &= ~FORCE_BIF_REFCLK_EN;
9420 if (orig != data)
9421 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9422
9423 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9424 data &= ~MPLL_CLKOUT_SEL_MASK;
9425 data |= MPLL_CLKOUT_SEL(4);
9426 if (orig != data)
9427 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9428 }
9429 }
9430 } else {
9431 if (orig != data)
9432 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9433 }
9434
9435 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9436 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9437 if (orig != data)
9438 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9439
9440 if (!disable_l0s) {
9441 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9442 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9443 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9444 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9445 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9446 data &= ~LC_L0S_INACTIVITY_MASK;
9447 if (orig != data)
9448 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9449 }
9450 }
9451 }
9452 }