2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
39 #define CIK_MEC_UCODE_SIZE 4192
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
45 #define CIK_MC_UCODE_SIZE 7866
47 #define CIK_SDMA_UCODE_SIZE 1050
48 #define CIK_SDMA_UCODE_VERSION 64
50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64 MODULE_FIRMWARE("radeon/KABINI_me.bin");
65 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
70 extern int r600_ih_ring_alloc(struct radeon_device
*rdev
);
71 extern void r600_ih_ring_fini(struct radeon_device
*rdev
);
72 extern void evergreen_mc_stop(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
73 extern void evergreen_mc_resume(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
74 extern bool evergreen_is_display_hung(struct radeon_device
*rdev
);
75 extern void si_vram_gtt_location(struct radeon_device
*rdev
, struct radeon_mc
*mc
);
76 extern void si_rlc_fini(struct radeon_device
*rdev
);
77 extern int si_rlc_init(struct radeon_device
*rdev
);
78 static void cik_rlc_stop(struct radeon_device
*rdev
);
81 * Indirect registers accessor
83 u32
cik_pciep_rreg(struct radeon_device
*rdev
, u32 reg
)
87 WREG32(PCIE_INDEX
, reg
);
88 (void)RREG32(PCIE_INDEX
);
89 r
= RREG32(PCIE_DATA
);
93 void cik_pciep_wreg(struct radeon_device
*rdev
, u32 reg
, u32 v
)
95 WREG32(PCIE_INDEX
, reg
);
96 (void)RREG32(PCIE_INDEX
);
98 (void)RREG32(PCIE_DATA
);
101 static const u32 bonaire_golden_spm_registers
[] =
103 0x30800, 0xe0ffffff, 0xe0000000
106 static const u32 bonaire_golden_common_registers
[] =
108 0xc770, 0xffffffff, 0x00000800,
109 0xc774, 0xffffffff, 0x00000800,
110 0xc798, 0xffffffff, 0x00007fbf,
111 0xc79c, 0xffffffff, 0x00007faf
114 static const u32 bonaire_golden_registers
[] =
116 0x3354, 0x00000333, 0x00000333,
117 0x3350, 0x000c0fc0, 0x00040200,
118 0x9a10, 0x00010000, 0x00058208,
119 0x3c000, 0xffff1fff, 0x00140000,
120 0x3c200, 0xfdfc0fff, 0x00000100,
121 0x3c234, 0x40000000, 0x40000200,
122 0x9830, 0xffffffff, 0x00000000,
123 0x9834, 0xf00fffff, 0x00000400,
124 0x9838, 0x0002021c, 0x00020200,
125 0xc78, 0x00000080, 0x00000000,
126 0x5bb0, 0x000000f0, 0x00000070,
127 0x5bc0, 0xf0311fff, 0x80300000,
128 0x98f8, 0x73773777, 0x12010001,
129 0x350c, 0x00810000, 0x408af000,
130 0x7030, 0x31000111, 0x00000011,
131 0x2f48, 0x73773777, 0x12010001,
132 0x220c, 0x00007fb6, 0x0021a1b1,
133 0x2210, 0x00007fb6, 0x002021b1,
134 0x2180, 0x00007fb6, 0x00002191,
135 0x2218, 0x00007fb6, 0x002121b1,
136 0x221c, 0x00007fb6, 0x002021b1,
137 0x21dc, 0x00007fb6, 0x00002191,
138 0x21e0, 0x00007fb6, 0x00002191,
139 0x3628, 0x0000003f, 0x0000000a,
140 0x362c, 0x0000003f, 0x0000000a,
141 0x2ae4, 0x00073ffe, 0x000022a2,
142 0x240c, 0x000007ff, 0x00000000,
143 0x8a14, 0xf000003f, 0x00000007,
144 0x8bf0, 0x00002001, 0x00000001,
145 0x8b24, 0xffffffff, 0x00ffffff,
146 0x30a04, 0x0000ff0f, 0x00000000,
147 0x28a4c, 0x07ffffff, 0x06000000,
148 0x4d8, 0x00000fff, 0x00000100,
149 0x3e78, 0x00000001, 0x00000002,
150 0x9100, 0x03000000, 0x0362c688,
151 0x8c00, 0x000000ff, 0x00000001,
152 0xe40, 0x00001fff, 0x00001fff,
153 0x9060, 0x0000007f, 0x00000020,
154 0x9508, 0x00010000, 0x00010000,
155 0xac14, 0x000003ff, 0x000000f3,
156 0xac0c, 0xffffffff, 0x00001032
159 static const u32 bonaire_mgcg_cgcg_init
[] =
161 0xc420, 0xffffffff, 0xfffffffc,
162 0x30800, 0xffffffff, 0xe0000000,
163 0x3c2a0, 0xffffffff, 0x00000100,
164 0x3c208, 0xffffffff, 0x00000100,
165 0x3c2c0, 0xffffffff, 0xc0000100,
166 0x3c2c8, 0xffffffff, 0xc0000100,
167 0x3c2c4, 0xffffffff, 0xc0000100,
168 0x55e4, 0xffffffff, 0x00600100,
169 0x3c280, 0xffffffff, 0x00000100,
170 0x3c214, 0xffffffff, 0x06000100,
171 0x3c220, 0xffffffff, 0x00000100,
172 0x3c218, 0xffffffff, 0x06000100,
173 0x3c204, 0xffffffff, 0x00000100,
174 0x3c2e0, 0xffffffff, 0x00000100,
175 0x3c224, 0xffffffff, 0x00000100,
176 0x3c200, 0xffffffff, 0x00000100,
177 0x3c230, 0xffffffff, 0x00000100,
178 0x3c234, 0xffffffff, 0x00000100,
179 0x3c250, 0xffffffff, 0x00000100,
180 0x3c254, 0xffffffff, 0x00000100,
181 0x3c258, 0xffffffff, 0x00000100,
182 0x3c25c, 0xffffffff, 0x00000100,
183 0x3c260, 0xffffffff, 0x00000100,
184 0x3c27c, 0xffffffff, 0x00000100,
185 0x3c278, 0xffffffff, 0x00000100,
186 0x3c210, 0xffffffff, 0x06000100,
187 0x3c290, 0xffffffff, 0x00000100,
188 0x3c274, 0xffffffff, 0x00000100,
189 0x3c2b4, 0xffffffff, 0x00000100,
190 0x3c2b0, 0xffffffff, 0x00000100,
191 0x3c270, 0xffffffff, 0x00000100,
192 0x30800, 0xffffffff, 0xe0000000,
193 0x3c020, 0xffffffff, 0x00010000,
194 0x3c024, 0xffffffff, 0x00030002,
195 0x3c028, 0xffffffff, 0x00040007,
196 0x3c02c, 0xffffffff, 0x00060005,
197 0x3c030, 0xffffffff, 0x00090008,
198 0x3c034, 0xffffffff, 0x00010000,
199 0x3c038, 0xffffffff, 0x00030002,
200 0x3c03c, 0xffffffff, 0x00040007,
201 0x3c040, 0xffffffff, 0x00060005,
202 0x3c044, 0xffffffff, 0x00090008,
203 0x3c048, 0xffffffff, 0x00010000,
204 0x3c04c, 0xffffffff, 0x00030002,
205 0x3c050, 0xffffffff, 0x00040007,
206 0x3c054, 0xffffffff, 0x00060005,
207 0x3c058, 0xffffffff, 0x00090008,
208 0x3c05c, 0xffffffff, 0x00010000,
209 0x3c060, 0xffffffff, 0x00030002,
210 0x3c064, 0xffffffff, 0x00040007,
211 0x3c068, 0xffffffff, 0x00060005,
212 0x3c06c, 0xffffffff, 0x00090008,
213 0x3c070, 0xffffffff, 0x00010000,
214 0x3c074, 0xffffffff, 0x00030002,
215 0x3c078, 0xffffffff, 0x00040007,
216 0x3c07c, 0xffffffff, 0x00060005,
217 0x3c080, 0xffffffff, 0x00090008,
218 0x3c084, 0xffffffff, 0x00010000,
219 0x3c088, 0xffffffff, 0x00030002,
220 0x3c08c, 0xffffffff, 0x00040007,
221 0x3c090, 0xffffffff, 0x00060005,
222 0x3c094, 0xffffffff, 0x00090008,
223 0x3c098, 0xffffffff, 0x00010000,
224 0x3c09c, 0xffffffff, 0x00030002,
225 0x3c0a0, 0xffffffff, 0x00040007,
226 0x3c0a4, 0xffffffff, 0x00060005,
227 0x3c0a8, 0xffffffff, 0x00090008,
228 0x3c000, 0xffffffff, 0x96e00200,
229 0x8708, 0xffffffff, 0x00900100,
230 0xc424, 0xffffffff, 0x0020003f,
231 0x38, 0xffffffff, 0x0140001c,
232 0x3c, 0x000f0000, 0x000f0000,
233 0x220, 0xffffffff, 0xC060000C,
234 0x224, 0xc0000fff, 0x00000100,
235 0xf90, 0xffffffff, 0x00000100,
236 0xf98, 0x00000101, 0x00000000,
237 0x20a8, 0xffffffff, 0x00000104,
238 0x55e4, 0xff000fff, 0x00000100,
239 0x30cc, 0xc0000fff, 0x00000104,
240 0xc1e4, 0x00000001, 0x00000001,
241 0xd00c, 0xff000ff0, 0x00000100,
242 0xd80c, 0xff000ff0, 0x00000100
245 static const u32 spectre_golden_spm_registers
[] =
247 0x30800, 0xe0ffffff, 0xe0000000
250 static const u32 spectre_golden_common_registers
[] =
252 0xc770, 0xffffffff, 0x00000800,
253 0xc774, 0xffffffff, 0x00000800,
254 0xc798, 0xffffffff, 0x00007fbf,
255 0xc79c, 0xffffffff, 0x00007faf
258 static const u32 spectre_golden_registers
[] =
260 0x3c000, 0xffff1fff, 0x96940200,
261 0x3c00c, 0xffff0001, 0xff000000,
262 0x3c200, 0xfffc0fff, 0x00000100,
263 0x6ed8, 0x00010101, 0x00010000,
264 0x9834, 0xf00fffff, 0x00000400,
265 0x9838, 0xfffffffc, 0x00020200,
266 0x5bb0, 0x000000f0, 0x00000070,
267 0x5bc0, 0xf0311fff, 0x80300000,
268 0x98f8, 0x73773777, 0x12010001,
269 0x9b7c, 0x00ff0000, 0x00fc0000,
270 0x2f48, 0x73773777, 0x12010001,
271 0x8a14, 0xf000003f, 0x00000007,
272 0x8b24, 0xffffffff, 0x00ffffff,
273 0x28350, 0x3f3f3fff, 0x00000082,
274 0x28355, 0x0000003f, 0x00000000,
275 0x3e78, 0x00000001, 0x00000002,
276 0x913c, 0xffff03df, 0x00000004,
277 0xc768, 0x00000008, 0x00000008,
278 0x8c00, 0x000008ff, 0x00000800,
279 0x9508, 0x00010000, 0x00010000,
280 0xac0c, 0xffffffff, 0x54763210,
281 0x214f8, 0x01ff01ff, 0x00000002,
282 0x21498, 0x007ff800, 0x00200000,
283 0x2015c, 0xffffffff, 0x00000f40,
284 0x30934, 0xffffffff, 0x00000001
287 static const u32 spectre_mgcg_cgcg_init
[] =
289 0xc420, 0xffffffff, 0xfffffffc,
290 0x30800, 0xffffffff, 0xe0000000,
291 0x3c2a0, 0xffffffff, 0x00000100,
292 0x3c208, 0xffffffff, 0x00000100,
293 0x3c2c0, 0xffffffff, 0x00000100,
294 0x3c2c8, 0xffffffff, 0x00000100,
295 0x3c2c4, 0xffffffff, 0x00000100,
296 0x55e4, 0xffffffff, 0x00600100,
297 0x3c280, 0xffffffff, 0x00000100,
298 0x3c214, 0xffffffff, 0x06000100,
299 0x3c220, 0xffffffff, 0x00000100,
300 0x3c218, 0xffffffff, 0x06000100,
301 0x3c204, 0xffffffff, 0x00000100,
302 0x3c2e0, 0xffffffff, 0x00000100,
303 0x3c224, 0xffffffff, 0x00000100,
304 0x3c200, 0xffffffff, 0x00000100,
305 0x3c230, 0xffffffff, 0x00000100,
306 0x3c234, 0xffffffff, 0x00000100,
307 0x3c250, 0xffffffff, 0x00000100,
308 0x3c254, 0xffffffff, 0x00000100,
309 0x3c258, 0xffffffff, 0x00000100,
310 0x3c25c, 0xffffffff, 0x00000100,
311 0x3c260, 0xffffffff, 0x00000100,
312 0x3c27c, 0xffffffff, 0x00000100,
313 0x3c278, 0xffffffff, 0x00000100,
314 0x3c210, 0xffffffff, 0x06000100,
315 0x3c290, 0xffffffff, 0x00000100,
316 0x3c274, 0xffffffff, 0x00000100,
317 0x3c2b4, 0xffffffff, 0x00000100,
318 0x3c2b0, 0xffffffff, 0x00000100,
319 0x3c270, 0xffffffff, 0x00000100,
320 0x30800, 0xffffffff, 0xe0000000,
321 0x3c020, 0xffffffff, 0x00010000,
322 0x3c024, 0xffffffff, 0x00030002,
323 0x3c028, 0xffffffff, 0x00040007,
324 0x3c02c, 0xffffffff, 0x00060005,
325 0x3c030, 0xffffffff, 0x00090008,
326 0x3c034, 0xffffffff, 0x00010000,
327 0x3c038, 0xffffffff, 0x00030002,
328 0x3c03c, 0xffffffff, 0x00040007,
329 0x3c040, 0xffffffff, 0x00060005,
330 0x3c044, 0xffffffff, 0x00090008,
331 0x3c048, 0xffffffff, 0x00010000,
332 0x3c04c, 0xffffffff, 0x00030002,
333 0x3c050, 0xffffffff, 0x00040007,
334 0x3c054, 0xffffffff, 0x00060005,
335 0x3c058, 0xffffffff, 0x00090008,
336 0x3c05c, 0xffffffff, 0x00010000,
337 0x3c060, 0xffffffff, 0x00030002,
338 0x3c064, 0xffffffff, 0x00040007,
339 0x3c068, 0xffffffff, 0x00060005,
340 0x3c06c, 0xffffffff, 0x00090008,
341 0x3c070, 0xffffffff, 0x00010000,
342 0x3c074, 0xffffffff, 0x00030002,
343 0x3c078, 0xffffffff, 0x00040007,
344 0x3c07c, 0xffffffff, 0x00060005,
345 0x3c080, 0xffffffff, 0x00090008,
346 0x3c084, 0xffffffff, 0x00010000,
347 0x3c088, 0xffffffff, 0x00030002,
348 0x3c08c, 0xffffffff, 0x00040007,
349 0x3c090, 0xffffffff, 0x00060005,
350 0x3c094, 0xffffffff, 0x00090008,
351 0x3c098, 0xffffffff, 0x00010000,
352 0x3c09c, 0xffffffff, 0x00030002,
353 0x3c0a0, 0xffffffff, 0x00040007,
354 0x3c0a4, 0xffffffff, 0x00060005,
355 0x3c0a8, 0xffffffff, 0x00090008,
356 0x3c0ac, 0xffffffff, 0x00010000,
357 0x3c0b0, 0xffffffff, 0x00030002,
358 0x3c0b4, 0xffffffff, 0x00040007,
359 0x3c0b8, 0xffffffff, 0x00060005,
360 0x3c0bc, 0xffffffff, 0x00090008,
361 0x3c000, 0xffffffff, 0x96e00200,
362 0x8708, 0xffffffff, 0x00900100,
363 0xc424, 0xffffffff, 0x0020003f,
364 0x38, 0xffffffff, 0x0140001c,
365 0x3c, 0x000f0000, 0x000f0000,
366 0x220, 0xffffffff, 0xC060000C,
367 0x224, 0xc0000fff, 0x00000100,
368 0xf90, 0xffffffff, 0x00000100,
369 0xf98, 0x00000101, 0x00000000,
370 0x20a8, 0xffffffff, 0x00000104,
371 0x55e4, 0xff000fff, 0x00000100,
372 0x30cc, 0xc0000fff, 0x00000104,
373 0xc1e4, 0x00000001, 0x00000001,
374 0xd00c, 0xff000ff0, 0x00000100,
375 0xd80c, 0xff000ff0, 0x00000100
378 static const u32 kalindi_golden_spm_registers
[] =
380 0x30800, 0xe0ffffff, 0xe0000000
383 static const u32 kalindi_golden_common_registers
[] =
385 0xc770, 0xffffffff, 0x00000800,
386 0xc774, 0xffffffff, 0x00000800,
387 0xc798, 0xffffffff, 0x00007fbf,
388 0xc79c, 0xffffffff, 0x00007faf
391 static const u32 kalindi_golden_registers
[] =
393 0x3c000, 0xffffdfff, 0x6e944040,
394 0x55e4, 0xff607fff, 0xfc000100,
395 0x3c220, 0xff000fff, 0x00000100,
396 0x3c224, 0xff000fff, 0x00000100,
397 0x3c200, 0xfffc0fff, 0x00000100,
398 0x6ed8, 0x00010101, 0x00010000,
399 0x9830, 0xffffffff, 0x00000000,
400 0x9834, 0xf00fffff, 0x00000400,
401 0x5bb0, 0x000000f0, 0x00000070,
402 0x5bc0, 0xf0311fff, 0x80300000,
403 0x98f8, 0x73773777, 0x12010001,
404 0x98fc, 0xffffffff, 0x00000010,
405 0x9b7c, 0x00ff0000, 0x00fc0000,
406 0x8030, 0x00001f0f, 0x0000100a,
407 0x2f48, 0x73773777, 0x12010001,
408 0x2408, 0x000fffff, 0x000c007f,
409 0x8a14, 0xf000003f, 0x00000007,
410 0x8b24, 0x3fff3fff, 0x00ffcfff,
411 0x30a04, 0x0000ff0f, 0x00000000,
412 0x28a4c, 0x07ffffff, 0x06000000,
413 0x4d8, 0x00000fff, 0x00000100,
414 0x3e78, 0x00000001, 0x00000002,
415 0xc768, 0x00000008, 0x00000008,
416 0x8c00, 0x000000ff, 0x00000003,
417 0x214f8, 0x01ff01ff, 0x00000002,
418 0x21498, 0x007ff800, 0x00200000,
419 0x2015c, 0xffffffff, 0x00000f40,
420 0x88c4, 0x001f3ae3, 0x00000082,
421 0x88d4, 0x0000001f, 0x00000010,
422 0x30934, 0xffffffff, 0x00000000
425 static const u32 kalindi_mgcg_cgcg_init
[] =
427 0xc420, 0xffffffff, 0xfffffffc,
428 0x30800, 0xffffffff, 0xe0000000,
429 0x3c2a0, 0xffffffff, 0x00000100,
430 0x3c208, 0xffffffff, 0x00000100,
431 0x3c2c0, 0xffffffff, 0x00000100,
432 0x3c2c8, 0xffffffff, 0x00000100,
433 0x3c2c4, 0xffffffff, 0x00000100,
434 0x55e4, 0xffffffff, 0x00600100,
435 0x3c280, 0xffffffff, 0x00000100,
436 0x3c214, 0xffffffff, 0x06000100,
437 0x3c220, 0xffffffff, 0x00000100,
438 0x3c218, 0xffffffff, 0x06000100,
439 0x3c204, 0xffffffff, 0x00000100,
440 0x3c2e0, 0xffffffff, 0x00000100,
441 0x3c224, 0xffffffff, 0x00000100,
442 0x3c200, 0xffffffff, 0x00000100,
443 0x3c230, 0xffffffff, 0x00000100,
444 0x3c234, 0xffffffff, 0x00000100,
445 0x3c250, 0xffffffff, 0x00000100,
446 0x3c254, 0xffffffff, 0x00000100,
447 0x3c258, 0xffffffff, 0x00000100,
448 0x3c25c, 0xffffffff, 0x00000100,
449 0x3c260, 0xffffffff, 0x00000100,
450 0x3c27c, 0xffffffff, 0x00000100,
451 0x3c278, 0xffffffff, 0x00000100,
452 0x3c210, 0xffffffff, 0x06000100,
453 0x3c290, 0xffffffff, 0x00000100,
454 0x3c274, 0xffffffff, 0x00000100,
455 0x3c2b4, 0xffffffff, 0x00000100,
456 0x3c2b0, 0xffffffff, 0x00000100,
457 0x3c270, 0xffffffff, 0x00000100,
458 0x30800, 0xffffffff, 0xe0000000,
459 0x3c020, 0xffffffff, 0x00010000,
460 0x3c024, 0xffffffff, 0x00030002,
461 0x3c028, 0xffffffff, 0x00040007,
462 0x3c02c, 0xffffffff, 0x00060005,
463 0x3c030, 0xffffffff, 0x00090008,
464 0x3c034, 0xffffffff, 0x00010000,
465 0x3c038, 0xffffffff, 0x00030002,
466 0x3c03c, 0xffffffff, 0x00040007,
467 0x3c040, 0xffffffff, 0x00060005,
468 0x3c044, 0xffffffff, 0x00090008,
469 0x3c000, 0xffffffff, 0x96e00200,
470 0x8708, 0xffffffff, 0x00900100,
471 0xc424, 0xffffffff, 0x0020003f,
472 0x38, 0xffffffff, 0x0140001c,
473 0x3c, 0x000f0000, 0x000f0000,
474 0x220, 0xffffffff, 0xC060000C,
475 0x224, 0xc0000fff, 0x00000100,
476 0x20a8, 0xffffffff, 0x00000104,
477 0x55e4, 0xff000fff, 0x00000100,
478 0x30cc, 0xc0000fff, 0x00000104,
479 0xc1e4, 0x00000001, 0x00000001,
480 0xd00c, 0xff000ff0, 0x00000100,
481 0xd80c, 0xff000ff0, 0x00000100
484 static void cik_init_golden_registers(struct radeon_device
*rdev
)
486 switch (rdev
->family
) {
488 radeon_program_register_sequence(rdev
,
489 bonaire_mgcg_cgcg_init
,
490 (const u32
)ARRAY_SIZE(bonaire_mgcg_cgcg_init
));
491 radeon_program_register_sequence(rdev
,
492 bonaire_golden_registers
,
493 (const u32
)ARRAY_SIZE(bonaire_golden_registers
));
494 radeon_program_register_sequence(rdev
,
495 bonaire_golden_common_registers
,
496 (const u32
)ARRAY_SIZE(bonaire_golden_common_registers
));
497 radeon_program_register_sequence(rdev
,
498 bonaire_golden_spm_registers
,
499 (const u32
)ARRAY_SIZE(bonaire_golden_spm_registers
));
502 radeon_program_register_sequence(rdev
,
503 kalindi_mgcg_cgcg_init
,
504 (const u32
)ARRAY_SIZE(kalindi_mgcg_cgcg_init
));
505 radeon_program_register_sequence(rdev
,
506 kalindi_golden_registers
,
507 (const u32
)ARRAY_SIZE(kalindi_golden_registers
));
508 radeon_program_register_sequence(rdev
,
509 kalindi_golden_common_registers
,
510 (const u32
)ARRAY_SIZE(kalindi_golden_common_registers
));
511 radeon_program_register_sequence(rdev
,
512 kalindi_golden_spm_registers
,
513 (const u32
)ARRAY_SIZE(kalindi_golden_spm_registers
));
516 radeon_program_register_sequence(rdev
,
517 spectre_mgcg_cgcg_init
,
518 (const u32
)ARRAY_SIZE(spectre_mgcg_cgcg_init
));
519 radeon_program_register_sequence(rdev
,
520 spectre_golden_registers
,
521 (const u32
)ARRAY_SIZE(spectre_golden_registers
));
522 radeon_program_register_sequence(rdev
,
523 spectre_golden_common_registers
,
524 (const u32
)ARRAY_SIZE(spectre_golden_common_registers
));
525 radeon_program_register_sequence(rdev
,
526 spectre_golden_spm_registers
,
527 (const u32
)ARRAY_SIZE(spectre_golden_spm_registers
));
535 * cik_get_xclk - get the xclk
537 * @rdev: radeon_device pointer
539 * Returns the reference clock used by the gfx engine
542 u32
cik_get_xclk(struct radeon_device
*rdev
)
544 u32 reference_clock
= rdev
->clock
.spll
.reference_freq
;
546 if (rdev
->flags
& RADEON_IS_IGP
) {
547 if (RREG32_SMC(GENERAL_PWRMGT
) & GPU_COUNTER_CLK
)
548 return reference_clock
/ 2;
550 if (RREG32_SMC(CG_CLKPIN_CNTL
) & XTALIN_DIVIDE
)
551 return reference_clock
/ 4;
553 return reference_clock
;
557 * cik_mm_rdoorbell - read a doorbell dword
559 * @rdev: radeon_device pointer
560 * @offset: byte offset into the aperture
562 * Returns the value in the doorbell aperture at the
563 * requested offset (CIK).
565 u32
cik_mm_rdoorbell(struct radeon_device
*rdev
, u32 offset
)
567 if (offset
< rdev
->doorbell
.size
) {
568 return readl(((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
570 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset
);
576 * cik_mm_wdoorbell - write a doorbell dword
578 * @rdev: radeon_device pointer
579 * @offset: byte offset into the aperture
582 * Writes @v to the doorbell aperture at the
583 * requested offset (CIK).
585 void cik_mm_wdoorbell(struct radeon_device
*rdev
, u32 offset
, u32 v
)
587 if (offset
< rdev
->doorbell
.size
) {
588 writel(v
, ((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
590 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset
);
594 #define BONAIRE_IO_MC_REGS_SIZE 36
596 static const u32 bonaire_io_mc_regs
[BONAIRE_IO_MC_REGS_SIZE
][2] =
598 {0x00000070, 0x04400000},
599 {0x00000071, 0x80c01803},
600 {0x00000072, 0x00004004},
601 {0x00000073, 0x00000100},
602 {0x00000074, 0x00ff0000},
603 {0x00000075, 0x34000000},
604 {0x00000076, 0x08000014},
605 {0x00000077, 0x00cc08ec},
606 {0x00000078, 0x00000400},
607 {0x00000079, 0x00000000},
608 {0x0000007a, 0x04090000},
609 {0x0000007c, 0x00000000},
610 {0x0000007e, 0x4408a8e8},
611 {0x0000007f, 0x00000304},
612 {0x00000080, 0x00000000},
613 {0x00000082, 0x00000001},
614 {0x00000083, 0x00000002},
615 {0x00000084, 0xf3e4f400},
616 {0x00000085, 0x052024e3},
617 {0x00000087, 0x00000000},
618 {0x00000088, 0x01000000},
619 {0x0000008a, 0x1c0a0000},
620 {0x0000008b, 0xff010000},
621 {0x0000008d, 0xffffefff},
622 {0x0000008e, 0xfff3efff},
623 {0x0000008f, 0xfff3efbf},
624 {0x00000092, 0xf7ffffff},
625 {0x00000093, 0xffffff7f},
626 {0x00000095, 0x00101101},
627 {0x00000096, 0x00000fff},
628 {0x00000097, 0x00116fff},
629 {0x00000098, 0x60010000},
630 {0x00000099, 0x10010000},
631 {0x0000009a, 0x00006000},
632 {0x0000009b, 0x00001000},
633 {0x0000009f, 0x00b48000}
637 * cik_srbm_select - select specific register instances
639 * @rdev: radeon_device pointer
640 * @me: selected ME (micro engine)
645 * Switches the currently active registers instances. Some
646 * registers are instanced per VMID, others are instanced per
647 * me/pipe/queue combination.
649 static void cik_srbm_select(struct radeon_device
*rdev
,
650 u32 me
, u32 pipe
, u32 queue
, u32 vmid
)
652 u32 srbm_gfx_cntl
= (PIPEID(pipe
& 0x3) |
655 QUEUEID(queue
& 0x7));
656 WREG32(SRBM_GFX_CNTL
, srbm_gfx_cntl
);
661 * ci_mc_load_microcode - load MC ucode into the hw
663 * @rdev: radeon_device pointer
665 * Load the GDDR MC ucode into the hw (CIK).
666 * Returns 0 on success, error on failure.
668 static int ci_mc_load_microcode(struct radeon_device
*rdev
)
670 const __be32
*fw_data
;
671 u32 running
, blackout
= 0;
673 int i
, ucode_size
, regs_size
;
678 switch (rdev
->family
) {
681 io_mc_regs
= (u32
*)&bonaire_io_mc_regs
;
682 ucode_size
= CIK_MC_UCODE_SIZE
;
683 regs_size
= BONAIRE_IO_MC_REGS_SIZE
;
687 running
= RREG32(MC_SEQ_SUP_CNTL
) & RUN_MASK
;
691 blackout
= RREG32(MC_SHARED_BLACKOUT_CNTL
);
692 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
| 1);
695 /* reset the engine and set to writable */
696 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
697 WREG32(MC_SEQ_SUP_CNTL
, 0x00000010);
699 /* load mc io regs */
700 for (i
= 0; i
< regs_size
; i
++) {
701 WREG32(MC_SEQ_IO_DEBUG_INDEX
, io_mc_regs
[(i
<< 1)]);
702 WREG32(MC_SEQ_IO_DEBUG_DATA
, io_mc_regs
[(i
<< 1) + 1]);
704 /* load the MC ucode */
705 fw_data
= (const __be32
*)rdev
->mc_fw
->data
;
706 for (i
= 0; i
< ucode_size
; i
++)
707 WREG32(MC_SEQ_SUP_PGM
, be32_to_cpup(fw_data
++));
709 /* put the engine back into the active state */
710 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
711 WREG32(MC_SEQ_SUP_CNTL
, 0x00000004);
712 WREG32(MC_SEQ_SUP_CNTL
, 0x00000001);
714 /* wait for training to complete */
715 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
716 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D0
)
720 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
721 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D1
)
727 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
);
734 * cik_init_microcode - load ucode images from disk
736 * @rdev: radeon_device pointer
738 * Use the firmware interface to load the ucode images into
739 * the driver (not loaded into hw).
740 * Returns 0 on success, error on failure.
742 static int cik_init_microcode(struct radeon_device
*rdev
)
744 const char *chip_name
;
745 size_t pfp_req_size
, me_req_size
, ce_req_size
,
746 mec_req_size
, rlc_req_size
, mc_req_size
,
753 switch (rdev
->family
) {
755 chip_name
= "BONAIRE";
756 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
757 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
758 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
759 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
760 rlc_req_size
= BONAIRE_RLC_UCODE_SIZE
* 4;
761 mc_req_size
= CIK_MC_UCODE_SIZE
* 4;
762 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
765 chip_name
= "KAVERI";
766 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
767 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
768 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
769 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
770 rlc_req_size
= KV_RLC_UCODE_SIZE
* 4;
771 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
774 chip_name
= "KABINI";
775 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
776 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
777 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
778 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
779 rlc_req_size
= KB_RLC_UCODE_SIZE
* 4;
780 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
785 DRM_INFO("Loading %s Microcode\n", chip_name
);
787 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_pfp.bin", chip_name
);
788 err
= request_firmware(&rdev
->pfp_fw
, fw_name
, rdev
->dev
);
791 if (rdev
->pfp_fw
->size
!= pfp_req_size
) {
793 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794 rdev
->pfp_fw
->size
, fw_name
);
799 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_me.bin", chip_name
);
800 err
= request_firmware(&rdev
->me_fw
, fw_name
, rdev
->dev
);
803 if (rdev
->me_fw
->size
!= me_req_size
) {
805 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806 rdev
->me_fw
->size
, fw_name
);
810 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_ce.bin", chip_name
);
811 err
= request_firmware(&rdev
->ce_fw
, fw_name
, rdev
->dev
);
814 if (rdev
->ce_fw
->size
!= ce_req_size
) {
816 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817 rdev
->ce_fw
->size
, fw_name
);
821 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mec.bin", chip_name
);
822 err
= request_firmware(&rdev
->mec_fw
, fw_name
, rdev
->dev
);
825 if (rdev
->mec_fw
->size
!= mec_req_size
) {
827 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828 rdev
->mec_fw
->size
, fw_name
);
832 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_rlc.bin", chip_name
);
833 err
= request_firmware(&rdev
->rlc_fw
, fw_name
, rdev
->dev
);
836 if (rdev
->rlc_fw
->size
!= rlc_req_size
) {
838 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839 rdev
->rlc_fw
->size
, fw_name
);
843 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_sdma.bin", chip_name
);
844 err
= request_firmware(&rdev
->sdma_fw
, fw_name
, rdev
->dev
);
847 if (rdev
->sdma_fw
->size
!= sdma_req_size
) {
849 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850 rdev
->sdma_fw
->size
, fw_name
);
854 /* No MC ucode on APUs */
855 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
856 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mc.bin", chip_name
);
857 err
= request_firmware(&rdev
->mc_fw
, fw_name
, rdev
->dev
);
860 if (rdev
->mc_fw
->size
!= mc_req_size
) {
862 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863 rdev
->mc_fw
->size
, fw_name
);
872 "cik_cp: Failed to load firmware \"%s\"\n",
874 release_firmware(rdev
->pfp_fw
);
876 release_firmware(rdev
->me_fw
);
878 release_firmware(rdev
->ce_fw
);
880 release_firmware(rdev
->rlc_fw
);
882 release_firmware(rdev
->mc_fw
);
892 * cik_tiling_mode_table_init - init the hw tiling table
894 * @rdev: radeon_device pointer
896 * Starting with SI, the tiling setup is done globally in a
897 * set of 32 tiling modes. Rather than selecting each set of
898 * parameters per surface as on older asics, we just select
899 * which index in the tiling table we want to use, and the
900 * surface uses those parameters (CIK).
902 static void cik_tiling_mode_table_init(struct radeon_device
*rdev
)
904 const u32 num_tile_mode_states
= 32;
905 const u32 num_secondary_tile_mode_states
= 16;
906 u32 reg_offset
, gb_tile_moden
, split_equal_to_row_size
;
907 u32 num_pipe_configs
;
908 u32 num_rbs
= rdev
->config
.cik
.max_backends_per_se
*
909 rdev
->config
.cik
.max_shader_engines
;
911 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
913 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_1KB
;
917 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_2KB
;
920 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_4KB
;
924 num_pipe_configs
= rdev
->config
.cik
.max_tile_pipes
;
925 if (num_pipe_configs
> 8)
926 num_pipe_configs
= 8; /* ??? */
928 if (num_pipe_configs
== 8) {
929 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
930 switch (reg_offset
) {
932 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
938 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
944 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
950 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
952 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
953 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
956 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
957 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
959 TILE_SPLIT(split_equal_to_row_size
));
962 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
963 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
966 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
967 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
972 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
975 TILE_SPLIT(split_equal_to_row_size
));
978 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
));
982 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
986 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
987 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
992 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
993 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
994 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
998 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1004 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1005 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
1008 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1009 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1014 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1016 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1020 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1021 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1026 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1027 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
1030 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1031 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1036 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1038 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1042 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1043 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1051 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1052 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1054 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1055 switch (reg_offset
) {
1057 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1060 NUM_BANKS(ADDR_SURF_16_BANK
));
1063 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1066 NUM_BANKS(ADDR_SURF_16_BANK
));
1069 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1072 NUM_BANKS(ADDR_SURF_16_BANK
));
1075 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1078 NUM_BANKS(ADDR_SURF_16_BANK
));
1081 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1084 NUM_BANKS(ADDR_SURF_8_BANK
));
1087 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1090 NUM_BANKS(ADDR_SURF_4_BANK
));
1093 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1096 NUM_BANKS(ADDR_SURF_2_BANK
));
1099 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1100 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1101 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1102 NUM_BANKS(ADDR_SURF_16_BANK
));
1105 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1108 NUM_BANKS(ADDR_SURF_16_BANK
));
1111 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1114 NUM_BANKS(ADDR_SURF_16_BANK
));
1117 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1120 NUM_BANKS(ADDR_SURF_16_BANK
));
1123 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1126 NUM_BANKS(ADDR_SURF_8_BANK
));
1129 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1130 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1131 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1132 NUM_BANKS(ADDR_SURF_4_BANK
));
1135 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1138 NUM_BANKS(ADDR_SURF_2_BANK
));
1144 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1146 } else if (num_pipe_configs
== 4) {
1148 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1149 switch (reg_offset
) {
1151 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1157 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1159 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1163 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1165 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1169 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1171 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1175 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1177 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1178 TILE_SPLIT(split_equal_to_row_size
));
1181 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1185 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1187 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1191 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1193 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1194 TILE_SPLIT(split_equal_to_row_size
));
1197 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1198 PIPE_CONFIG(ADDR_SURF_P4_16x16
));
1201 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
1205 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1206 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1207 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1211 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1213 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1217 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1218 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1219 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1223 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
1227 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1229 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1233 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1235 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1239 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1241 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1245 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1246 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
1249 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1250 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1255 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1256 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1257 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1261 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1262 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1263 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1270 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1271 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1273 } else if (num_rbs
< 4) {
1274 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1275 switch (reg_offset
) {
1277 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1279 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1283 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1285 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1289 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1291 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1295 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1297 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1301 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1303 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1304 TILE_SPLIT(split_equal_to_row_size
));
1307 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1311 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1313 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1317 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1319 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1320 TILE_SPLIT(split_equal_to_row_size
));
1323 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16
));
1327 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1328 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
1331 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1332 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1333 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1337 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1339 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1343 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1344 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1345 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1349 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
1353 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1355 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1359 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1361 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1365 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1366 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1367 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1371 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
1375 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1377 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1381 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1382 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1383 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1387 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1388 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1389 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1396 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1397 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1400 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1401 switch (reg_offset
) {
1403 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1406 NUM_BANKS(ADDR_SURF_16_BANK
));
1409 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1412 NUM_BANKS(ADDR_SURF_16_BANK
));
1415 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1418 NUM_BANKS(ADDR_SURF_16_BANK
));
1421 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1424 NUM_BANKS(ADDR_SURF_16_BANK
));
1427 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1430 NUM_BANKS(ADDR_SURF_16_BANK
));
1433 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1436 NUM_BANKS(ADDR_SURF_8_BANK
));
1439 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1442 NUM_BANKS(ADDR_SURF_4_BANK
));
1445 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1448 NUM_BANKS(ADDR_SURF_16_BANK
));
1451 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1454 NUM_BANKS(ADDR_SURF_16_BANK
));
1457 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1460 NUM_BANKS(ADDR_SURF_16_BANK
));
1463 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1466 NUM_BANKS(ADDR_SURF_16_BANK
));
1469 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1472 NUM_BANKS(ADDR_SURF_16_BANK
));
1475 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1478 NUM_BANKS(ADDR_SURF_8_BANK
));
1481 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1484 NUM_BANKS(ADDR_SURF_4_BANK
));
1490 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1492 } else if (num_pipe_configs
== 2) {
1493 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1494 switch (reg_offset
) {
1496 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1498 PIPE_CONFIG(ADDR_SURF_P2
) |
1499 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1502 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1504 PIPE_CONFIG(ADDR_SURF_P2
) |
1505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1508 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1510 PIPE_CONFIG(ADDR_SURF_P2
) |
1511 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1514 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1516 PIPE_CONFIG(ADDR_SURF_P2
) |
1517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1520 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1522 PIPE_CONFIG(ADDR_SURF_P2
) |
1523 TILE_SPLIT(split_equal_to_row_size
));
1526 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1530 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1532 PIPE_CONFIG(ADDR_SURF_P2
) |
1533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1536 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1538 PIPE_CONFIG(ADDR_SURF_P2
) |
1539 TILE_SPLIT(split_equal_to_row_size
));
1542 gb_tile_moden
= ARRAY_MODE(ARRAY_LINEAR_ALIGNED
);
1545 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1546 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
1549 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1551 PIPE_CONFIG(ADDR_SURF_P2
) |
1552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1555 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1557 PIPE_CONFIG(ADDR_SURF_P2
) |
1558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1561 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1562 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1563 PIPE_CONFIG(ADDR_SURF_P2
) |
1564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1567 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
1571 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1573 PIPE_CONFIG(ADDR_SURF_P2
) |
1574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1577 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1579 PIPE_CONFIG(ADDR_SURF_P2
) |
1580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1583 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1585 PIPE_CONFIG(ADDR_SURF_P2
) |
1586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1589 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1590 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
1593 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1594 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1595 PIPE_CONFIG(ADDR_SURF_P2
) |
1596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1599 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1600 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1601 PIPE_CONFIG(ADDR_SURF_P2
) |
1602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1605 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1606 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1607 PIPE_CONFIG(ADDR_SURF_P2
) |
1608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1614 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1615 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1617 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1618 switch (reg_offset
) {
1620 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1623 NUM_BANKS(ADDR_SURF_16_BANK
));
1626 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1629 NUM_BANKS(ADDR_SURF_16_BANK
));
1632 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1635 NUM_BANKS(ADDR_SURF_16_BANK
));
1638 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1641 NUM_BANKS(ADDR_SURF_16_BANK
));
1644 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1647 NUM_BANKS(ADDR_SURF_16_BANK
));
1650 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1653 NUM_BANKS(ADDR_SURF_16_BANK
));
1656 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1659 NUM_BANKS(ADDR_SURF_8_BANK
));
1662 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1665 NUM_BANKS(ADDR_SURF_16_BANK
));
1668 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1671 NUM_BANKS(ADDR_SURF_16_BANK
));
1674 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1677 NUM_BANKS(ADDR_SURF_16_BANK
));
1680 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1683 NUM_BANKS(ADDR_SURF_16_BANK
));
1686 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1689 NUM_BANKS(ADDR_SURF_16_BANK
));
1692 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1695 NUM_BANKS(ADDR_SURF_16_BANK
));
1698 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1701 NUM_BANKS(ADDR_SURF_8_BANK
));
1707 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1710 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs
);
1714 * cik_select_se_sh - select which SE, SH to address
1716 * @rdev: radeon_device pointer
1717 * @se_num: shader engine to address
1718 * @sh_num: sh block to address
1720 * Select which SE, SH combinations to address. Certain
1721 * registers are instanced per SE or SH. 0xffffffff means
1722 * broadcast to all SEs or SHs (CIK).
1724 static void cik_select_se_sh(struct radeon_device
*rdev
,
1725 u32 se_num
, u32 sh_num
)
1727 u32 data
= INSTANCE_BROADCAST_WRITES
;
1729 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff))
1730 data
|= SH_BROADCAST_WRITES
| SE_BROADCAST_WRITES
;
1731 else if (se_num
== 0xffffffff)
1732 data
|= SE_BROADCAST_WRITES
| SH_INDEX(sh_num
);
1733 else if (sh_num
== 0xffffffff)
1734 data
|= SH_BROADCAST_WRITES
| SE_INDEX(se_num
);
1736 data
|= SH_INDEX(sh_num
) | SE_INDEX(se_num
);
1737 WREG32(GRBM_GFX_INDEX
, data
);
1741 * cik_create_bitmask - create a bitmask
1743 * @bit_width: length of the mask
1745 * create a variable length bit mask (CIK).
1746 * Returns the bitmask.
1748 static u32
cik_create_bitmask(u32 bit_width
)
1752 for (i
= 0; i
< bit_width
; i
++) {
1760 * cik_select_se_sh - select which SE, SH to address
1762 * @rdev: radeon_device pointer
1763 * @max_rb_num: max RBs (render backends) for the asic
1764 * @se_num: number of SEs (shader engines) for the asic
1765 * @sh_per_se: number of SH blocks per SE for the asic
1767 * Calculates the bitmask of disabled RBs (CIK).
1768 * Returns the disabled RB bitmask.
1770 static u32
cik_get_rb_disabled(struct radeon_device
*rdev
,
1771 u32 max_rb_num
, u32 se_num
,
1776 data
= RREG32(CC_RB_BACKEND_DISABLE
);
1778 data
&= BACKEND_DISABLE_MASK
;
1781 data
|= RREG32(GC_USER_RB_BACKEND_DISABLE
);
1783 data
>>= BACKEND_DISABLE_SHIFT
;
1785 mask
= cik_create_bitmask(max_rb_num
/ se_num
/ sh_per_se
);
1791 * cik_setup_rb - setup the RBs on the asic
1793 * @rdev: radeon_device pointer
1794 * @se_num: number of SEs (shader engines) for the asic
1795 * @sh_per_se: number of SH blocks per SE for the asic
1796 * @max_rb_num: max RBs (render backends) for the asic
1798 * Configures per-SE/SH RB registers (CIK).
1800 static void cik_setup_rb(struct radeon_device
*rdev
,
1801 u32 se_num
, u32 sh_per_se
,
1806 u32 disabled_rbs
= 0;
1807 u32 enabled_rbs
= 0;
1809 for (i
= 0; i
< se_num
; i
++) {
1810 for (j
= 0; j
< sh_per_se
; j
++) {
1811 cik_select_se_sh(rdev
, i
, j
);
1812 data
= cik_get_rb_disabled(rdev
, max_rb_num
, se_num
, sh_per_se
);
1813 disabled_rbs
|= data
<< ((i
* sh_per_se
+ j
) * CIK_RB_BITMAP_WIDTH_PER_SH
);
1816 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
1819 for (i
= 0; i
< max_rb_num
; i
++) {
1820 if (!(disabled_rbs
& mask
))
1821 enabled_rbs
|= mask
;
1825 for (i
= 0; i
< se_num
; i
++) {
1826 cik_select_se_sh(rdev
, i
, 0xffffffff);
1828 for (j
= 0; j
< sh_per_se
; j
++) {
1829 switch (enabled_rbs
& 3) {
1831 data
|= (RASTER_CONFIG_RB_MAP_0
<< (i
* sh_per_se
+ j
) * 2);
1834 data
|= (RASTER_CONFIG_RB_MAP_3
<< (i
* sh_per_se
+ j
) * 2);
1838 data
|= (RASTER_CONFIG_RB_MAP_2
<< (i
* sh_per_se
+ j
) * 2);
1843 WREG32(PA_SC_RASTER_CONFIG
, data
);
1845 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
1849 * cik_gpu_init - setup the 3D engine
1851 * @rdev: radeon_device pointer
1853 * Configures the 3D engine and tiling configuration
1854 * registers so that the 3D engine is usable.
1856 static void cik_gpu_init(struct radeon_device
*rdev
)
1858 u32 gb_addr_config
= RREG32(GB_ADDR_CONFIG
);
1859 u32 mc_shared_chmap
, mc_arb_ramcfg
;
1860 u32 hdp_host_path_cntl
;
1864 switch (rdev
->family
) {
1866 rdev
->config
.cik
.max_shader_engines
= 2;
1867 rdev
->config
.cik
.max_tile_pipes
= 4;
1868 rdev
->config
.cik
.max_cu_per_sh
= 7;
1869 rdev
->config
.cik
.max_sh_per_se
= 1;
1870 rdev
->config
.cik
.max_backends_per_se
= 2;
1871 rdev
->config
.cik
.max_texture_channel_caches
= 4;
1872 rdev
->config
.cik
.max_gprs
= 256;
1873 rdev
->config
.cik
.max_gs_threads
= 32;
1874 rdev
->config
.cik
.max_hw_contexts
= 8;
1876 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
1877 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
1878 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
1879 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
1880 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
1887 rdev
->config
.cik
.max_shader_engines
= 1;
1888 rdev
->config
.cik
.max_tile_pipes
= 2;
1889 rdev
->config
.cik
.max_cu_per_sh
= 2;
1890 rdev
->config
.cik
.max_sh_per_se
= 1;
1891 rdev
->config
.cik
.max_backends_per_se
= 1;
1892 rdev
->config
.cik
.max_texture_channel_caches
= 2;
1893 rdev
->config
.cik
.max_gprs
= 256;
1894 rdev
->config
.cik
.max_gs_threads
= 16;
1895 rdev
->config
.cik
.max_hw_contexts
= 8;
1897 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
1898 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
1899 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
1900 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
1901 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
1905 /* Initialize HDP */
1906 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
1907 WREG32((0x2c14 + j
), 0x00000000);
1908 WREG32((0x2c18 + j
), 0x00000000);
1909 WREG32((0x2c1c + j
), 0x00000000);
1910 WREG32((0x2c20 + j
), 0x00000000);
1911 WREG32((0x2c24 + j
), 0x00000000);
1914 WREG32(GRBM_CNTL
, GRBM_READ_TIMEOUT(0xff));
1916 WREG32(BIF_FB_EN
, FB_READ_EN
| FB_WRITE_EN
);
1918 mc_shared_chmap
= RREG32(MC_SHARED_CHMAP
);
1919 mc_arb_ramcfg
= RREG32(MC_ARB_RAMCFG
);
1921 rdev
->config
.cik
.num_tile_pipes
= rdev
->config
.cik
.max_tile_pipes
;
1922 rdev
->config
.cik
.mem_max_burst_length_bytes
= 256;
1923 tmp
= (mc_arb_ramcfg
& NOOFCOLS_MASK
) >> NOOFCOLS_SHIFT
;
1924 rdev
->config
.cik
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
1925 if (rdev
->config
.cik
.mem_row_size_in_kb
> 4)
1926 rdev
->config
.cik
.mem_row_size_in_kb
= 4;
1927 /* XXX use MC settings? */
1928 rdev
->config
.cik
.shader_engine_tile_size
= 32;
1929 rdev
->config
.cik
.num_gpus
= 1;
1930 rdev
->config
.cik
.multi_gpu_tile_size
= 64;
1932 /* fix up row size */
1933 gb_addr_config
&= ~ROW_SIZE_MASK
;
1934 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
1937 gb_addr_config
|= ROW_SIZE(0);
1940 gb_addr_config
|= ROW_SIZE(1);
1943 gb_addr_config
|= ROW_SIZE(2);
1947 /* setup tiling info dword. gb_addr_config is not adequate since it does
1948 * not have bank info, so create a custom tiling dword.
1949 * bits 3:0 num_pipes
1950 * bits 7:4 num_banks
1951 * bits 11:8 group_size
1952 * bits 15:12 row_size
1954 rdev
->config
.cik
.tile_config
= 0;
1955 switch (rdev
->config
.cik
.num_tile_pipes
) {
1957 rdev
->config
.cik
.tile_config
|= (0 << 0);
1960 rdev
->config
.cik
.tile_config
|= (1 << 0);
1963 rdev
->config
.cik
.tile_config
|= (2 << 0);
1967 /* XXX what about 12? */
1968 rdev
->config
.cik
.tile_config
|= (3 << 0);
1971 if ((mc_arb_ramcfg
& NOOFBANK_MASK
) >> NOOFBANK_SHIFT
)
1972 rdev
->config
.cik
.tile_config
|= 1 << 4;
1974 rdev
->config
.cik
.tile_config
|= 0 << 4;
1975 rdev
->config
.cik
.tile_config
|=
1976 ((gb_addr_config
& PIPE_INTERLEAVE_SIZE_MASK
) >> PIPE_INTERLEAVE_SIZE_SHIFT
) << 8;
1977 rdev
->config
.cik
.tile_config
|=
1978 ((gb_addr_config
& ROW_SIZE_MASK
) >> ROW_SIZE_SHIFT
) << 12;
1980 WREG32(GB_ADDR_CONFIG
, gb_addr_config
);
1981 WREG32(HDP_ADDR_CONFIG
, gb_addr_config
);
1982 WREG32(DMIF_ADDR_CALC
, gb_addr_config
);
1983 WREG32(SDMA0_TILING_CONFIG
+ SDMA0_REGISTER_OFFSET
, gb_addr_config
& 0x70);
1984 WREG32(SDMA0_TILING_CONFIG
+ SDMA1_REGISTER_OFFSET
, gb_addr_config
& 0x70);
1985 WREG32(UVD_UDEC_ADDR_CONFIG
, gb_addr_config
);
1986 WREG32(UVD_UDEC_DB_ADDR_CONFIG
, gb_addr_config
);
1987 WREG32(UVD_UDEC_DBW_ADDR_CONFIG
, gb_addr_config
);
1989 cik_tiling_mode_table_init(rdev
);
1991 cik_setup_rb(rdev
, rdev
->config
.cik
.max_shader_engines
,
1992 rdev
->config
.cik
.max_sh_per_se
,
1993 rdev
->config
.cik
.max_backends_per_se
);
1995 /* set HW defaults for 3D engine */
1996 WREG32(CP_MEQ_THRESHOLDS
, MEQ1_START(0x30) | MEQ2_START(0x60));
1998 WREG32(SX_DEBUG_1
, 0x20);
2000 WREG32(TA_CNTL_AUX
, 0x00010000);
2002 tmp
= RREG32(SPI_CONFIG_CNTL
);
2004 WREG32(SPI_CONFIG_CNTL
, tmp
);
2006 WREG32(SQ_CONFIG
, 1);
2008 WREG32(DB_DEBUG
, 0);
2010 tmp
= RREG32(DB_DEBUG2
) & ~0xf00fffff;
2012 WREG32(DB_DEBUG2
, tmp
);
2014 tmp
= RREG32(DB_DEBUG3
) & ~0x0002021c;
2016 WREG32(DB_DEBUG3
, tmp
);
2018 tmp
= RREG32(CB_HW_CONTROL
) & ~0x00010000;
2020 WREG32(CB_HW_CONTROL
, tmp
);
2022 WREG32(SPI_CONFIG_CNTL_1
, VTX_DONE_DELAY(4));
2024 WREG32(PA_SC_FIFO_SIZE
, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_frontend
) |
2025 SC_BACKEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_backend
) |
2026 SC_HIZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_hiz_tile_fifo_size
) |
2027 SC_EARLYZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_earlyz_tile_fifo_size
)));
2029 WREG32(VGT_NUM_INSTANCES
, 1);
2031 WREG32(CP_PERFMON_CNTL
, 0);
2033 WREG32(SQ_CONFIG
, 0);
2035 WREG32(PA_SC_FORCE_EOV_MAX_CNTS
, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036 FORCE_EOV_MAX_REZ_CNT(255)));
2038 WREG32(VGT_CACHE_INVALIDATION
, CACHE_INVALIDATION(VC_AND_TC
) |
2039 AUTO_INVLD_EN(ES_AND_GS_AUTO
));
2041 WREG32(VGT_GS_VERTEX_REUSE
, 16);
2042 WREG32(PA_SC_LINE_STIPPLE_STATE
, 0);
2044 tmp
= RREG32(HDP_MISC_CNTL
);
2045 tmp
|= HDP_FLUSH_INVALIDATE_CACHE
;
2046 WREG32(HDP_MISC_CNTL
, tmp
);
2048 hdp_host_path_cntl
= RREG32(HDP_HOST_PATH_CNTL
);
2049 WREG32(HDP_HOST_PATH_CNTL
, hdp_host_path_cntl
);
2051 WREG32(PA_CL_ENHANCE
, CLIP_VTX_REORDER_ENA
| NUM_CLIP_SEQ(3));
2052 WREG32(PA_SC_ENHANCE
, ENABLE_PA_SC_OUT_OF_ORDER
);
2058 * GPU scratch registers helpers function.
2061 * cik_scratch_init - setup driver info for CP scratch regs
2063 * @rdev: radeon_device pointer
2065 * Set up the number and offset of the CP scratch registers.
2066 * NOTE: use of CP scratch registers is a legacy inferface and
2067 * is not used by default on newer asics (r6xx+). On newer asics,
2068 * memory buffers are used for fences rather than scratch regs.
2070 static void cik_scratch_init(struct radeon_device
*rdev
)
2074 rdev
->scratch
.num_reg
= 7;
2075 rdev
->scratch
.reg_base
= SCRATCH_REG0
;
2076 for (i
= 0; i
< rdev
->scratch
.num_reg
; i
++) {
2077 rdev
->scratch
.free
[i
] = true;
2078 rdev
->scratch
.reg
[i
] = rdev
->scratch
.reg_base
+ (i
* 4);
2083 * cik_ring_test - basic gfx ring test
2085 * @rdev: radeon_device pointer
2086 * @ring: radeon_ring structure holding ring information
2088 * Allocate a scratch register and write to it using the gfx ring (CIK).
2089 * Provides a basic gfx ring test to verify that the ring is working.
2090 * Used by cik_cp_gfx_resume();
2091 * Returns 0 on success, error on failure.
2093 int cik_ring_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
2100 r
= radeon_scratch_get(rdev
, &scratch
);
2102 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r
);
2105 WREG32(scratch
, 0xCAFEDEAD);
2106 r
= radeon_ring_lock(rdev
, ring
, 3);
2108 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring
->idx
, r
);
2109 radeon_scratch_free(rdev
, scratch
);
2112 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
2113 radeon_ring_write(ring
, ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2));
2114 radeon_ring_write(ring
, 0xDEADBEEF);
2115 radeon_ring_unlock_commit(rdev
, ring
);
2117 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
2118 tmp
= RREG32(scratch
);
2119 if (tmp
== 0xDEADBEEF)
2123 if (i
< rdev
->usec_timeout
) {
2124 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
2126 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127 ring
->idx
, scratch
, tmp
);
2130 radeon_scratch_free(rdev
, scratch
);
2135 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2137 * @rdev: radeon_device pointer
2138 * @fence: radeon fence object
2140 * Emits a fence sequnce number on the gfx ring and flushes
2143 void cik_fence_gfx_ring_emit(struct radeon_device
*rdev
,
2144 struct radeon_fence
*fence
)
2146 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
2147 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
2149 /* EVENT_WRITE_EOP - flush caches, send int */
2150 radeon_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
2151 radeon_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
2153 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
2155 radeon_ring_write(ring
, addr
& 0xfffffffc);
2156 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157 radeon_ring_write(ring
, fence
->seq
);
2158 radeon_ring_write(ring
, 0);
2160 /* We should be using the new WAIT_REG_MEM special op packet here
2161 * but it causes the CP to hang
2163 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
2164 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
2165 WRITE_DATA_DST_SEL(0)));
2166 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
2167 radeon_ring_write(ring
, 0);
2168 radeon_ring_write(ring
, 0);
2172 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2174 * @rdev: radeon_device pointer
2175 * @fence: radeon fence object
2177 * Emits a fence sequnce number on the compute ring and flushes
2180 void cik_fence_compute_ring_emit(struct radeon_device
*rdev
,
2181 struct radeon_fence
*fence
)
2183 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
2184 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
2186 /* RELEASE_MEM - flush caches, send int */
2187 radeon_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 5));
2188 radeon_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
2190 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
2192 radeon_ring_write(ring
, DATA_SEL(1) | INT_SEL(2));
2193 radeon_ring_write(ring
, addr
& 0xfffffffc);
2194 radeon_ring_write(ring
, upper_32_bits(addr
));
2195 radeon_ring_write(ring
, fence
->seq
);
2196 radeon_ring_write(ring
, 0);
2198 /* We should be using the new WAIT_REG_MEM special op packet here
2199 * but it causes the CP to hang
2201 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
2202 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
2203 WRITE_DATA_DST_SEL(0)));
2204 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
2205 radeon_ring_write(ring
, 0);
2206 radeon_ring_write(ring
, 0);
2209 void cik_semaphore_ring_emit(struct radeon_device
*rdev
,
2210 struct radeon_ring
*ring
,
2211 struct radeon_semaphore
*semaphore
,
2214 uint64_t addr
= semaphore
->gpu_addr
;
2215 unsigned sel
= emit_wait
? PACKET3_SEM_SEL_WAIT
: PACKET3_SEM_SEL_SIGNAL
;
2217 radeon_ring_write(ring
, PACKET3(PACKET3_MEM_SEMAPHORE
, 1));
2218 radeon_ring_write(ring
, addr
& 0xffffffff);
2219 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | sel
);
2226 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2228 * @rdev: radeon_device pointer
2229 * @ib: radeon indirect buffer object
2231 * Emits an DE (drawing engine) or CE (constant engine) IB
2232 * on the gfx ring. IBs are usually generated by userspace
2233 * acceleration drivers and submitted to the kernel for
2234 * sheduling on the ring. This function schedules the IB
2235 * on the gfx ring for execution by the GPU.
2237 void cik_ring_ib_execute(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
2239 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
2240 u32 header
, control
= INDIRECT_BUFFER_VALID
;
2242 if (ib
->is_const_ib
) {
2243 /* set switch buffer packet before const IB */
2244 radeon_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
2245 radeon_ring_write(ring
, 0);
2247 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
2250 if (ring
->rptr_save_reg
) {
2251 next_rptr
= ring
->wptr
+ 3 + 4;
2252 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
2253 radeon_ring_write(ring
, ((ring
->rptr_save_reg
-
2254 PACKET3_SET_UCONFIG_REG_START
) >> 2));
2255 radeon_ring_write(ring
, next_rptr
);
2256 } else if (rdev
->wb
.enabled
) {
2257 next_rptr
= ring
->wptr
+ 5 + 4;
2258 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
2259 radeon_ring_write(ring
, WRITE_DATA_DST_SEL(1));
2260 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
2261 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
2262 radeon_ring_write(ring
, next_rptr
);
2265 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
2268 control
|= ib
->length_dw
|
2269 (ib
->vm
? (ib
->vm
->id
<< 24) : 0);
2271 radeon_ring_write(ring
, header
);
2272 radeon_ring_write(ring
,
2276 (ib
->gpu_addr
& 0xFFFFFFFC));
2277 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
2278 radeon_ring_write(ring
, control
);
2282 * cik_ib_test - basic gfx ring IB test
2284 * @rdev: radeon_device pointer
2285 * @ring: radeon_ring structure holding ring information
2287 * Allocate an IB and execute it on the gfx ring (CIK).
2288 * Provides a basic gfx ring test to verify that IBs are working.
2289 * Returns 0 on success, error on failure.
2291 int cik_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
2293 struct radeon_ib ib
;
2299 r
= radeon_scratch_get(rdev
, &scratch
);
2301 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r
);
2304 WREG32(scratch
, 0xCAFEDEAD);
2305 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
2307 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
2310 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
2311 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2);
2312 ib
.ptr
[2] = 0xDEADBEEF;
2314 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
2316 radeon_scratch_free(rdev
, scratch
);
2317 radeon_ib_free(rdev
, &ib
);
2318 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
2321 r
= radeon_fence_wait(ib
.fence
, false);
2323 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
2326 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
2327 tmp
= RREG32(scratch
);
2328 if (tmp
== 0xDEADBEEF)
2332 if (i
< rdev
->usec_timeout
) {
2333 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
2335 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2339 radeon_scratch_free(rdev
, scratch
);
2340 radeon_ib_free(rdev
, &ib
);
2346 * On CIK, gfx and compute now have independant command processors.
2349 * Gfx consists of a single ring and can process both gfx jobs and
2350 * compute jobs. The gfx CP consists of three microengines (ME):
2351 * PFP - Pre-Fetch Parser
2353 * CE - Constant Engine
2354 * The PFP and ME make up what is considered the Drawing Engine (DE).
2355 * The CE is an asynchronous engine used for updating buffer desciptors
2356 * used by the DE so that they can be loaded into cache in parallel
2357 * while the DE is processing state update packets.
2360 * The compute CP consists of two microengines (ME):
2361 * MEC1 - Compute MicroEngine 1
2362 * MEC2 - Compute MicroEngine 2
2363 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364 * The queues are exposed to userspace and are programmed directly
2365 * by the compute runtime.
2368 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2370 * @rdev: radeon_device pointer
2371 * @enable: enable or disable the MEs
2373 * Halts or unhalts the gfx MEs.
2375 static void cik_cp_gfx_enable(struct radeon_device
*rdev
, bool enable
)
2378 WREG32(CP_ME_CNTL
, 0);
2380 WREG32(CP_ME_CNTL
, (CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
));
2381 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
2387 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2389 * @rdev: radeon_device pointer
2391 * Loads the gfx PFP, ME, and CE ucode.
2392 * Returns 0 for success, -EINVAL if the ucode is not available.
2394 static int cik_cp_gfx_load_microcode(struct radeon_device
*rdev
)
2396 const __be32
*fw_data
;
2399 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
)
2402 cik_cp_gfx_enable(rdev
, false);
2405 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
2406 WREG32(CP_PFP_UCODE_ADDR
, 0);
2407 for (i
= 0; i
< CIK_PFP_UCODE_SIZE
; i
++)
2408 WREG32(CP_PFP_UCODE_DATA
, be32_to_cpup(fw_data
++));
2409 WREG32(CP_PFP_UCODE_ADDR
, 0);
2412 fw_data
= (const __be32
*)rdev
->ce_fw
->data
;
2413 WREG32(CP_CE_UCODE_ADDR
, 0);
2414 for (i
= 0; i
< CIK_CE_UCODE_SIZE
; i
++)
2415 WREG32(CP_CE_UCODE_DATA
, be32_to_cpup(fw_data
++));
2416 WREG32(CP_CE_UCODE_ADDR
, 0);
2419 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
2420 WREG32(CP_ME_RAM_WADDR
, 0);
2421 for (i
= 0; i
< CIK_ME_UCODE_SIZE
; i
++)
2422 WREG32(CP_ME_RAM_DATA
, be32_to_cpup(fw_data
++));
2423 WREG32(CP_ME_RAM_WADDR
, 0);
2425 WREG32(CP_PFP_UCODE_ADDR
, 0);
2426 WREG32(CP_CE_UCODE_ADDR
, 0);
2427 WREG32(CP_ME_RAM_WADDR
, 0);
2428 WREG32(CP_ME_RAM_RADDR
, 0);
2433 * cik_cp_gfx_start - start the gfx ring
2435 * @rdev: radeon_device pointer
2437 * Enables the ring and loads the clear state context and other
2438 * packets required to init the ring.
2439 * Returns 0 for success, error for failure.
2441 static int cik_cp_gfx_start(struct radeon_device
*rdev
)
2443 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
2447 WREG32(CP_MAX_CONTEXT
, rdev
->config
.cik
.max_hw_contexts
- 1);
2448 WREG32(CP_ENDIAN_SWAP
, 0);
2449 WREG32(CP_DEVICE_ID
, 1);
2451 cik_cp_gfx_enable(rdev
, true);
2453 r
= radeon_ring_lock(rdev
, ring
, cik_default_size
+ 17);
2455 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
2459 /* init the CE partitions. CE only used for gfx on CIK */
2460 radeon_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
2461 radeon_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
2462 radeon_ring_write(ring
, 0xc000);
2463 radeon_ring_write(ring
, 0xc000);
2465 /* setup clear context state */
2466 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2467 radeon_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2469 radeon_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
2470 radeon_ring_write(ring
, 0x80000000);
2471 radeon_ring_write(ring
, 0x80000000);
2473 for (i
= 0; i
< cik_default_size
; i
++)
2474 radeon_ring_write(ring
, cik_default_state
[i
]);
2476 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2477 radeon_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2479 /* set clear context state */
2480 radeon_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2481 radeon_ring_write(ring
, 0);
2483 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
2484 radeon_ring_write(ring
, 0x00000316);
2485 radeon_ring_write(ring
, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486 radeon_ring_write(ring
, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2488 radeon_ring_unlock_commit(rdev
, ring
);
2494 * cik_cp_gfx_fini - stop the gfx ring
2496 * @rdev: radeon_device pointer
2498 * Stop the gfx ring and tear down the driver ring
2501 static void cik_cp_gfx_fini(struct radeon_device
*rdev
)
2503 cik_cp_gfx_enable(rdev
, false);
2504 radeon_ring_fini(rdev
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
2508 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2510 * @rdev: radeon_device pointer
2512 * Program the location and size of the gfx ring buffer
2513 * and test it to make sure it's working.
2514 * Returns 0 for success, error for failure.
2516 static int cik_cp_gfx_resume(struct radeon_device
*rdev
)
2518 struct radeon_ring
*ring
;
2524 WREG32(CP_SEM_WAIT_TIMER
, 0x0);
2525 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL
, 0x0);
2527 /* Set the write pointer delay */
2528 WREG32(CP_RB_WPTR_DELAY
, 0);
2530 /* set the RB to use vmid 0 */
2531 WREG32(CP_RB_VMID
, 0);
2533 WREG32(SCRATCH_ADDR
, ((rdev
->wb
.gpu_addr
+ RADEON_WB_SCRATCH_OFFSET
) >> 8) & 0xFFFFFFFF);
2535 /* ring 0 - compute and gfx */
2536 /* Set ring buffer size */
2537 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
2538 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2539 tmp
= (order_base_2(RADEON_GPU_PAGE_SIZE
/8) << 8) | rb_bufsz
;
2541 tmp
|= BUF_SWAP_32BIT
;
2543 WREG32(CP_RB0_CNTL
, tmp
);
2545 /* Initialize the ring buffer's read and write pointers */
2546 WREG32(CP_RB0_CNTL
, tmp
| RB_RPTR_WR_ENA
);
2548 WREG32(CP_RB0_WPTR
, ring
->wptr
);
2550 /* set the wb address wether it's enabled or not */
2551 WREG32(CP_RB0_RPTR_ADDR
, (rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFFFFFFFC);
2552 WREG32(CP_RB0_RPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFF);
2554 /* scratch register shadowing is no longer supported */
2555 WREG32(SCRATCH_UMSK
, 0);
2557 if (!rdev
->wb
.enabled
)
2558 tmp
|= RB_NO_UPDATE
;
2561 WREG32(CP_RB0_CNTL
, tmp
);
2563 rb_addr
= ring
->gpu_addr
>> 8;
2564 WREG32(CP_RB0_BASE
, rb_addr
);
2565 WREG32(CP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
2567 ring
->rptr
= RREG32(CP_RB0_RPTR
);
2569 /* start the ring */
2570 cik_cp_gfx_start(rdev
);
2571 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= true;
2572 r
= radeon_ring_test(rdev
, RADEON_RING_TYPE_GFX_INDEX
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
2574 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
2580 u32
cik_compute_ring_get_rptr(struct radeon_device
*rdev
,
2581 struct radeon_ring
*ring
)
2587 if (rdev
->wb
.enabled
) {
2588 rptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->rptr_offs
/4]);
2590 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2591 rptr
= RREG32(CP_HQD_PQ_RPTR
);
2592 cik_srbm_select(rdev
, 0, 0, 0, 0);
2594 rptr
= (rptr
& ring
->ptr_reg_mask
) >> ring
->ptr_reg_shift
;
2599 u32
cik_compute_ring_get_wptr(struct radeon_device
*rdev
,
2600 struct radeon_ring
*ring
)
2604 if (rdev
->wb
.enabled
) {
2605 wptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->wptr_offs
/4]);
2607 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2608 wptr
= RREG32(CP_HQD_PQ_WPTR
);
2609 cik_srbm_select(rdev
, 0, 0, 0, 0);
2611 wptr
= (wptr
& ring
->ptr_reg_mask
) >> ring
->ptr_reg_shift
;
2616 void cik_compute_ring_set_wptr(struct radeon_device
*rdev
,
2617 struct radeon_ring
*ring
)
2619 u32 wptr
= (ring
->wptr
<< ring
->ptr_reg_shift
) & ring
->ptr_reg_mask
;
2621 rdev
->wb
.wb
[ring
->wptr_offs
/4] = cpu_to_le32(wptr
);
2622 WDOORBELL32(ring
->doorbell_offset
, wptr
);
2626 * cik_cp_compute_enable - enable/disable the compute CP MEs
2628 * @rdev: radeon_device pointer
2629 * @enable: enable or disable the MEs
2631 * Halts or unhalts the compute MEs.
2633 static void cik_cp_compute_enable(struct radeon_device
*rdev
, bool enable
)
2636 WREG32(CP_MEC_CNTL
, 0);
2638 WREG32(CP_MEC_CNTL
, (MEC_ME1_HALT
| MEC_ME2_HALT
));
2643 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2645 * @rdev: radeon_device pointer
2647 * Loads the compute MEC1&2 ucode.
2648 * Returns 0 for success, -EINVAL if the ucode is not available.
2650 static int cik_cp_compute_load_microcode(struct radeon_device
*rdev
)
2652 const __be32
*fw_data
;
2658 cik_cp_compute_enable(rdev
, false);
2661 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
2662 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
2663 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
2664 WREG32(CP_MEC_ME1_UCODE_DATA
, be32_to_cpup(fw_data
++));
2665 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
2667 if (rdev
->family
== CHIP_KAVERI
) {
2669 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
2670 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
2671 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
2672 WREG32(CP_MEC_ME2_UCODE_DATA
, be32_to_cpup(fw_data
++));
2673 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
2680 * cik_cp_compute_start - start the compute queues
2682 * @rdev: radeon_device pointer
2684 * Enable the compute queues.
2685 * Returns 0 for success, error for failure.
2687 static int cik_cp_compute_start(struct radeon_device
*rdev
)
2689 cik_cp_compute_enable(rdev
, true);
2695 * cik_cp_compute_fini - stop the compute queues
2697 * @rdev: radeon_device pointer
2699 * Stop the compute queues and tear down the driver queue
2702 static void cik_cp_compute_fini(struct radeon_device
*rdev
)
2706 cik_cp_compute_enable(rdev
, false);
2708 for (i
= 0; i
< 2; i
++) {
2710 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
2712 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
2714 if (rdev
->ring
[idx
].mqd_obj
) {
2715 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
2716 if (unlikely(r
!= 0))
2717 dev_warn(rdev
->dev
, "(%d) reserve MQD bo failed\n", r
);
2719 radeon_bo_unpin(rdev
->ring
[idx
].mqd_obj
);
2720 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
2722 radeon_bo_unref(&rdev
->ring
[idx
].mqd_obj
);
2723 rdev
->ring
[idx
].mqd_obj
= NULL
;
2728 static void cik_mec_fini(struct radeon_device
*rdev
)
2732 if (rdev
->mec
.hpd_eop_obj
) {
2733 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
2734 if (unlikely(r
!= 0))
2735 dev_warn(rdev
->dev
, "(%d) reserve HPD EOP bo failed\n", r
);
2736 radeon_bo_unpin(rdev
->mec
.hpd_eop_obj
);
2737 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
2739 radeon_bo_unref(&rdev
->mec
.hpd_eop_obj
);
2740 rdev
->mec
.hpd_eop_obj
= NULL
;
2744 #define MEC_HPD_SIZE 2048
2746 static int cik_mec_init(struct radeon_device
*rdev
)
2752 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2753 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2755 if (rdev
->family
== CHIP_KAVERI
)
2756 rdev
->mec
.num_mec
= 2;
2758 rdev
->mec
.num_mec
= 1;
2759 rdev
->mec
.num_pipe
= 4;
2760 rdev
->mec
.num_queue
= rdev
->mec
.num_mec
* rdev
->mec
.num_pipe
* 8;
2762 if (rdev
->mec
.hpd_eop_obj
== NULL
) {
2763 r
= radeon_bo_create(rdev
,
2764 rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2,
2766 RADEON_GEM_DOMAIN_GTT
, NULL
,
2767 &rdev
->mec
.hpd_eop_obj
);
2769 dev_warn(rdev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
2774 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
2775 if (unlikely(r
!= 0)) {
2779 r
= radeon_bo_pin(rdev
->mec
.hpd_eop_obj
, RADEON_GEM_DOMAIN_GTT
,
2780 &rdev
->mec
.hpd_eop_gpu_addr
);
2782 dev_warn(rdev
->dev
, "(%d) pin HDP EOP bo failed\n", r
);
2786 r
= radeon_bo_kmap(rdev
->mec
.hpd_eop_obj
, (void **)&hpd
);
2788 dev_warn(rdev
->dev
, "(%d) map HDP EOP bo failed\n", r
);
2793 /* clear memory. Not sure if this is required or not */
2794 memset(hpd
, 0, rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2);
2796 radeon_bo_kunmap(rdev
->mec
.hpd_eop_obj
);
2797 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
2802 struct hqd_registers
2804 u32 cp_mqd_base_addr
;
2805 u32 cp_mqd_base_addr_hi
;
2808 u32 cp_hqd_persistent_state
;
2809 u32 cp_hqd_pipe_priority
;
2810 u32 cp_hqd_queue_priority
;
2813 u32 cp_hqd_pq_base_hi
;
2815 u32 cp_hqd_pq_rptr_report_addr
;
2816 u32 cp_hqd_pq_rptr_report_addr_hi
;
2817 u32 cp_hqd_pq_wptr_poll_addr
;
2818 u32 cp_hqd_pq_wptr_poll_addr_hi
;
2819 u32 cp_hqd_pq_doorbell_control
;
2821 u32 cp_hqd_pq_control
;
2822 u32 cp_hqd_ib_base_addr
;
2823 u32 cp_hqd_ib_base_addr_hi
;
2825 u32 cp_hqd_ib_control
;
2826 u32 cp_hqd_iq_timer
;
2828 u32 cp_hqd_dequeue_request
;
2829 u32 cp_hqd_dma_offload
;
2830 u32 cp_hqd_sema_cmd
;
2831 u32 cp_hqd_msg_type
;
2832 u32 cp_hqd_atomic0_preop_lo
;
2833 u32 cp_hqd_atomic0_preop_hi
;
2834 u32 cp_hqd_atomic1_preop_lo
;
2835 u32 cp_hqd_atomic1_preop_hi
;
2836 u32 cp_hqd_hq_scheduler0
;
2837 u32 cp_hqd_hq_scheduler1
;
2844 u32 dispatch_initiator
;
2848 u32 pipeline_stat_enable
;
2849 u32 perf_counter_enable
;
2855 u32 resource_limits
;
2856 u32 static_thread_mgmt01
[2];
2858 u32 static_thread_mgmt23
[2];
2860 u32 thread_trace_enable
;
2863 u32 vgtcs_invoke_count
[2];
2864 struct hqd_registers queue_state
;
2866 u32 interrupt_queue
[64];
2870 * cik_cp_compute_resume - setup the compute queue registers
2872 * @rdev: radeon_device pointer
2874 * Program the compute queues and test them to make sure they
2876 * Returns 0 for success, error for failure.
2878 static int cik_cp_compute_resume(struct radeon_device
*rdev
)
2882 bool use_doorbell
= true;
2888 struct bonaire_mqd
*mqd
;
2890 r
= cik_cp_compute_start(rdev
);
2894 /* fix up chicken bits */
2895 tmp
= RREG32(CP_CPF_DEBUG
);
2897 WREG32(CP_CPF_DEBUG
, tmp
);
2899 /* init the pipes */
2900 for (i
= 0; i
< (rdev
->mec
.num_pipe
* rdev
->mec
.num_mec
); i
++) {
2901 int me
= (i
< 4) ? 1 : 2;
2902 int pipe
= (i
< 4) ? i
: (i
- 4);
2904 eop_gpu_addr
= rdev
->mec
.hpd_eop_gpu_addr
+ (i
* MEC_HPD_SIZE
* 2);
2906 cik_srbm_select(rdev
, me
, pipe
, 0, 0);
2908 /* write the EOP addr */
2909 WREG32(CP_HPD_EOP_BASE_ADDR
, eop_gpu_addr
>> 8);
2910 WREG32(CP_HPD_EOP_BASE_ADDR_HI
, upper_32_bits(eop_gpu_addr
) >> 8);
2912 /* set the VMID assigned */
2913 WREG32(CP_HPD_EOP_VMID
, 0);
2915 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916 tmp
= RREG32(CP_HPD_EOP_CONTROL
);
2917 tmp
&= ~EOP_SIZE_MASK
;
2918 tmp
|= order_base_2(MEC_HPD_SIZE
/ 8);
2919 WREG32(CP_HPD_EOP_CONTROL
, tmp
);
2921 cik_srbm_select(rdev
, 0, 0, 0, 0);
2923 /* init the queues. Just two for now. */
2924 for (i
= 0; i
< 2; i
++) {
2926 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
2928 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
2930 if (rdev
->ring
[idx
].mqd_obj
== NULL
) {
2931 r
= radeon_bo_create(rdev
,
2932 sizeof(struct bonaire_mqd
),
2934 RADEON_GEM_DOMAIN_GTT
, NULL
,
2935 &rdev
->ring
[idx
].mqd_obj
);
2937 dev_warn(rdev
->dev
, "(%d) create MQD bo failed\n", r
);
2942 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
2943 if (unlikely(r
!= 0)) {
2944 cik_cp_compute_fini(rdev
);
2947 r
= radeon_bo_pin(rdev
->ring
[idx
].mqd_obj
, RADEON_GEM_DOMAIN_GTT
,
2950 dev_warn(rdev
->dev
, "(%d) pin MQD bo failed\n", r
);
2951 cik_cp_compute_fini(rdev
);
2954 r
= radeon_bo_kmap(rdev
->ring
[idx
].mqd_obj
, (void **)&buf
);
2956 dev_warn(rdev
->dev
, "(%d) map MQD bo failed\n", r
);
2957 cik_cp_compute_fini(rdev
);
2961 /* doorbell offset */
2962 rdev
->ring
[idx
].doorbell_offset
=
2963 (rdev
->ring
[idx
].doorbell_page_num
* PAGE_SIZE
) + 0;
2965 /* init the mqd struct */
2966 memset(buf
, 0, sizeof(struct bonaire_mqd
));
2968 mqd
= (struct bonaire_mqd
*)buf
;
2969 mqd
->header
= 0xC0310800;
2970 mqd
->static_thread_mgmt01
[0] = 0xffffffff;
2971 mqd
->static_thread_mgmt01
[1] = 0xffffffff;
2972 mqd
->static_thread_mgmt23
[0] = 0xffffffff;
2973 mqd
->static_thread_mgmt23
[1] = 0xffffffff;
2975 cik_srbm_select(rdev
, rdev
->ring
[idx
].me
,
2976 rdev
->ring
[idx
].pipe
,
2977 rdev
->ring
[idx
].queue
, 0);
2979 /* disable wptr polling */
2980 tmp
= RREG32(CP_PQ_WPTR_POLL_CNTL
);
2981 tmp
&= ~WPTR_POLL_EN
;
2982 WREG32(CP_PQ_WPTR_POLL_CNTL
, tmp
);
2984 /* enable doorbell? */
2985 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
2986 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
2988 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
2990 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_EN
;
2991 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
2992 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
2994 /* disable the queue if it's active */
2995 mqd
->queue_state
.cp_hqd_dequeue_request
= 0;
2996 mqd
->queue_state
.cp_hqd_pq_rptr
= 0;
2997 mqd
->queue_state
.cp_hqd_pq_wptr
= 0;
2998 if (RREG32(CP_HQD_ACTIVE
) & 1) {
2999 WREG32(CP_HQD_DEQUEUE_REQUEST
, 1);
3000 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3001 if (!(RREG32(CP_HQD_ACTIVE
) & 1))
3005 WREG32(CP_HQD_DEQUEUE_REQUEST
, mqd
->queue_state
.cp_hqd_dequeue_request
);
3006 WREG32(CP_HQD_PQ_RPTR
, mqd
->queue_state
.cp_hqd_pq_rptr
);
3007 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
3010 /* set the pointer to the MQD */
3011 mqd
->queue_state
.cp_mqd_base_addr
= mqd_gpu_addr
& 0xfffffffc;
3012 mqd
->queue_state
.cp_mqd_base_addr_hi
= upper_32_bits(mqd_gpu_addr
);
3013 WREG32(CP_MQD_BASE_ADDR
, mqd
->queue_state
.cp_mqd_base_addr
);
3014 WREG32(CP_MQD_BASE_ADDR_HI
, mqd
->queue_state
.cp_mqd_base_addr_hi
);
3015 /* set MQD vmid to 0 */
3016 mqd
->queue_state
.cp_mqd_control
= RREG32(CP_MQD_CONTROL
);
3017 mqd
->queue_state
.cp_mqd_control
&= ~MQD_VMID_MASK
;
3018 WREG32(CP_MQD_CONTROL
, mqd
->queue_state
.cp_mqd_control
);
3020 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3021 hqd_gpu_addr
= rdev
->ring
[idx
].gpu_addr
>> 8;
3022 mqd
->queue_state
.cp_hqd_pq_base
= hqd_gpu_addr
;
3023 mqd
->queue_state
.cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
3024 WREG32(CP_HQD_PQ_BASE
, mqd
->queue_state
.cp_hqd_pq_base
);
3025 WREG32(CP_HQD_PQ_BASE_HI
, mqd
->queue_state
.cp_hqd_pq_base_hi
);
3027 /* set up the HQD, this is similar to CP_RB0_CNTL */
3028 mqd
->queue_state
.cp_hqd_pq_control
= RREG32(CP_HQD_PQ_CONTROL
);
3029 mqd
->queue_state
.cp_hqd_pq_control
&=
3030 ~(QUEUE_SIZE_MASK
| RPTR_BLOCK_SIZE_MASK
);
3032 mqd
->queue_state
.cp_hqd_pq_control
|=
3033 order_base_2(rdev
->ring
[idx
].ring_size
/ 8);
3034 mqd
->queue_state
.cp_hqd_pq_control
|=
3035 (order_base_2(RADEON_GPU_PAGE_SIZE
/8) << 8);
3037 mqd
->queue_state
.cp_hqd_pq_control
|= BUF_SWAP_32BIT
;
3039 mqd
->queue_state
.cp_hqd_pq_control
&=
3040 ~(UNORD_DISPATCH
| ROQ_PQ_IB_FLIP
| PQ_VOLATILE
);
3041 mqd
->queue_state
.cp_hqd_pq_control
|=
3042 PRIV_STATE
| KMD_QUEUE
; /* assuming kernel queue control */
3043 WREG32(CP_HQD_PQ_CONTROL
, mqd
->queue_state
.cp_hqd_pq_control
);
3045 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3047 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP1_WPTR_OFFSET
;
3049 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP2_WPTR_OFFSET
;
3050 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
= wb_gpu_addr
& 0xfffffffc;
3051 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3052 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR
, mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
);
3053 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI
,
3054 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
);
3056 /* set the wb address wether it's enabled or not */
3058 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP1_RPTR_OFFSET
;
3060 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP2_RPTR_OFFSET
;
3061 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
= wb_gpu_addr
& 0xfffffffc;
3062 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
=
3063 upper_32_bits(wb_gpu_addr
) & 0xffff;
3064 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR
,
3065 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
);
3066 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
3067 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
);
3069 /* enable the doorbell if requested */
3071 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
3072 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
3073 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_OFFSET_MASK
;
3074 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|=
3075 DOORBELL_OFFSET(rdev
->ring
[idx
].doorbell_offset
/ 4);
3076 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
3077 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&=
3078 ~(DOORBELL_SOURCE
| DOORBELL_HIT
);
3081 mqd
->queue_state
.cp_hqd_pq_doorbell_control
= 0;
3083 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
3084 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
3086 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3087 rdev
->ring
[idx
].wptr
= 0;
3088 mqd
->queue_state
.cp_hqd_pq_wptr
= rdev
->ring
[idx
].wptr
;
3089 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
3090 rdev
->ring
[idx
].rptr
= RREG32(CP_HQD_PQ_RPTR
);
3091 mqd
->queue_state
.cp_hqd_pq_rptr
= rdev
->ring
[idx
].rptr
;
3093 /* set the vmid for the queue */
3094 mqd
->queue_state
.cp_hqd_vmid
= 0;
3095 WREG32(CP_HQD_VMID
, mqd
->queue_state
.cp_hqd_vmid
);
3097 /* activate the queue */
3098 mqd
->queue_state
.cp_hqd_active
= 1;
3099 WREG32(CP_HQD_ACTIVE
, mqd
->queue_state
.cp_hqd_active
);
3101 cik_srbm_select(rdev
, 0, 0, 0, 0);
3103 radeon_bo_kunmap(rdev
->ring
[idx
].mqd_obj
);
3104 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
3106 rdev
->ring
[idx
].ready
= true;
3107 r
= radeon_ring_test(rdev
, idx
, &rdev
->ring
[idx
]);
3109 rdev
->ring
[idx
].ready
= false;
3115 static void cik_cp_enable(struct radeon_device
*rdev
, bool enable
)
3117 cik_cp_gfx_enable(rdev
, enable
);
3118 cik_cp_compute_enable(rdev
, enable
);
3121 static int cik_cp_load_microcode(struct radeon_device
*rdev
)
3125 r
= cik_cp_gfx_load_microcode(rdev
);
3128 r
= cik_cp_compute_load_microcode(rdev
);
3135 static void cik_cp_fini(struct radeon_device
*rdev
)
3137 cik_cp_gfx_fini(rdev
);
3138 cik_cp_compute_fini(rdev
);
3141 static int cik_cp_resume(struct radeon_device
*rdev
)
3145 /* Reset all cp blocks */
3146 WREG32(GRBM_SOFT_RESET
, SOFT_RESET_CP
);
3147 RREG32(GRBM_SOFT_RESET
);
3149 WREG32(GRBM_SOFT_RESET
, 0);
3150 RREG32(GRBM_SOFT_RESET
);
3152 r
= cik_cp_load_microcode(rdev
);
3156 r
= cik_cp_gfx_resume(rdev
);
3159 r
= cik_cp_compute_resume(rdev
);
3168 * Starting with CIK, the GPU has new asynchronous
3169 * DMA engines. These engines are used for compute
3170 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3171 * and each one supports 1 ring buffer used for gfx
3172 * and 2 queues used for compute.
3174 * The programming model is very similar to the CP
3175 * (ring buffer, IBs, etc.), but sDMA has it's own
3176 * packet format that is different from the PM4 format
3177 * used by the CP. sDMA supports copying data, writing
3178 * embedded data, solid fills, and a number of other
3179 * things. It also has support for tiling/detiling of
3183 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3185 * @rdev: radeon_device pointer
3186 * @ib: IB object to schedule
3188 * Schedule an IB in the DMA ring (CIK).
3190 void cik_sdma_ring_ib_execute(struct radeon_device
*rdev
,
3191 struct radeon_ib
*ib
)
3193 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
3194 u32 extra_bits
= (ib
->vm
? ib
->vm
->id
: 0) & 0xf;
3196 if (rdev
->wb
.enabled
) {
3197 u32 next_rptr
= ring
->wptr
+ 5;
3198 while ((next_rptr
& 7) != 4)
3201 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
3202 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
3203 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
3204 radeon_ring_write(ring
, 1); /* number of DWs to follow */
3205 radeon_ring_write(ring
, next_rptr
);
3208 /* IB packet must end on a 8 DW boundary */
3209 while ((ring
->wptr
& 7) != 4)
3210 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
3211 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER
, 0, extra_bits
));
3212 radeon_ring_write(ring
, ib
->gpu_addr
& 0xffffffe0); /* base must be 32 byte aligned */
3213 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xffffffff);
3214 radeon_ring_write(ring
, ib
->length_dw
);
3219 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3221 * @rdev: radeon_device pointer
3222 * @fence: radeon fence object
3224 * Add a DMA fence packet to the ring to write
3225 * the fence seq number and DMA trap packet to generate
3226 * an interrupt if needed (CIK).
3228 void cik_sdma_fence_ring_emit(struct radeon_device
*rdev
,
3229 struct radeon_fence
*fence
)
3231 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
3232 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
3233 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3234 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3237 if (fence
->ring
== R600_RING_TYPE_DMA_INDEX
)
3238 ref_and_mask
= SDMA0
;
3240 ref_and_mask
= SDMA1
;
3242 /* write the fence */
3243 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_FENCE
, 0, 0));
3244 radeon_ring_write(ring
, addr
& 0xffffffff);
3245 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
3246 radeon_ring_write(ring
, fence
->seq
);
3247 /* generate an interrupt */
3248 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_TRAP
, 0, 0));
3250 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
3251 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
3252 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
3253 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
3254 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
3255 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3259 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3261 * @rdev: radeon_device pointer
3262 * @ring: radeon_ring structure holding ring information
3263 * @semaphore: radeon semaphore object
3264 * @emit_wait: wait or signal semaphore
3266 * Add a DMA semaphore packet to the ring wait on or signal
3267 * other rings (CIK).
3269 void cik_sdma_semaphore_ring_emit(struct radeon_device
*rdev
,
3270 struct radeon_ring
*ring
,
3271 struct radeon_semaphore
*semaphore
,
3274 u64 addr
= semaphore
->gpu_addr
;
3275 u32 extra_bits
= emit_wait
? 0 : SDMA_SEMAPHORE_EXTRA_S
;
3277 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE
, 0, extra_bits
));
3278 radeon_ring_write(ring
, addr
& 0xfffffff8);
3279 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
3283 * cik_sdma_gfx_stop - stop the gfx async dma engines
3285 * @rdev: radeon_device pointer
3287 * Stop the gfx async dma ring buffers (CIK).
3289 static void cik_sdma_gfx_stop(struct radeon_device
*rdev
)
3291 u32 rb_cntl
, reg_offset
;
3294 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
3296 for (i
= 0; i
< 2; i
++) {
3298 reg_offset
= SDMA0_REGISTER_OFFSET
;
3300 reg_offset
= SDMA1_REGISTER_OFFSET
;
3301 rb_cntl
= RREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
);
3302 rb_cntl
&= ~SDMA_RB_ENABLE
;
3303 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
3304 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, 0);
3309 * cik_sdma_rlc_stop - stop the compute async dma engines
3311 * @rdev: radeon_device pointer
3313 * Stop the compute async dma queues (CIK).
3315 static void cik_sdma_rlc_stop(struct radeon_device
*rdev
)
3321 * cik_sdma_enable - stop the async dma engines
3323 * @rdev: radeon_device pointer
3324 * @enable: enable/disable the DMA MEs.
3326 * Halt or unhalt the async dma engines (CIK).
3328 static void cik_sdma_enable(struct radeon_device
*rdev
, bool enable
)
3330 u32 me_cntl
, reg_offset
;
3333 for (i
= 0; i
< 2; i
++) {
3335 reg_offset
= SDMA0_REGISTER_OFFSET
;
3337 reg_offset
= SDMA1_REGISTER_OFFSET
;
3338 me_cntl
= RREG32(SDMA0_ME_CNTL
+ reg_offset
);
3340 me_cntl
&= ~SDMA_HALT
;
3342 me_cntl
|= SDMA_HALT
;
3343 WREG32(SDMA0_ME_CNTL
+ reg_offset
, me_cntl
);
3348 * cik_sdma_gfx_resume - setup and start the async dma engines
3350 * @rdev: radeon_device pointer
3352 * Set up the gfx DMA ring buffers and enable them (CIK).
3353 * Returns 0 for success, error for failure.
3355 static int cik_sdma_gfx_resume(struct radeon_device
*rdev
)
3357 struct radeon_ring
*ring
;
3358 u32 rb_cntl
, ib_cntl
;
3360 u32 reg_offset
, wb_offset
;
3363 for (i
= 0; i
< 2; i
++) {
3365 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
3366 reg_offset
= SDMA0_REGISTER_OFFSET
;
3367 wb_offset
= R600_WB_DMA_RPTR_OFFSET
;
3369 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
3370 reg_offset
= SDMA1_REGISTER_OFFSET
;
3371 wb_offset
= CAYMAN_WB_DMA1_RPTR_OFFSET
;
3374 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL
+ reg_offset
, 0);
3375 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL
+ reg_offset
, 0);
3377 /* Set ring buffer size in dwords */
3378 rb_bufsz
= order_base_2(ring
->ring_size
/ 4);
3379 rb_cntl
= rb_bufsz
<< 1;
3381 rb_cntl
|= SDMA_RB_SWAP_ENABLE
| SDMA_RPTR_WRITEBACK_SWAP_ENABLE
;
3383 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
3385 /* Initialize the ring buffer's read and write pointers */
3386 WREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
, 0);
3387 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, 0);
3389 /* set the wb address whether it's enabled or not */
3390 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI
+ reg_offset
,
3391 upper_32_bits(rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFF);
3392 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO
+ reg_offset
,
3393 ((rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFC));
3395 if (rdev
->wb
.enabled
)
3396 rb_cntl
|= SDMA_RPTR_WRITEBACK_ENABLE
;
3398 WREG32(SDMA0_GFX_RB_BASE
+ reg_offset
, ring
->gpu_addr
>> 8);
3399 WREG32(SDMA0_GFX_RB_BASE_HI
+ reg_offset
, ring
->gpu_addr
>> 40);
3402 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, ring
->wptr
<< 2);
3404 ring
->rptr
= RREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
) >> 2;
3407 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
| SDMA_RB_ENABLE
);
3409 ib_cntl
= SDMA_IB_ENABLE
;
3411 ib_cntl
|= SDMA_IB_SWAP_ENABLE
;
3413 /* enable DMA IBs */
3414 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, ib_cntl
);
3418 r
= radeon_ring_test(rdev
, ring
->idx
, ring
);
3420 ring
->ready
= false;
3425 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.real_vram_size
);
3431 * cik_sdma_rlc_resume - setup and start the async dma engines
3433 * @rdev: radeon_device pointer
3435 * Set up the compute DMA queues and enable them (CIK).
3436 * Returns 0 for success, error for failure.
3438 static int cik_sdma_rlc_resume(struct radeon_device
*rdev
)
3445 * cik_sdma_load_microcode - load the sDMA ME ucode
3447 * @rdev: radeon_device pointer
3449 * Loads the sDMA0/1 ucode.
3450 * Returns 0 for success, -EINVAL if the ucode is not available.
3452 static int cik_sdma_load_microcode(struct radeon_device
*rdev
)
3454 const __be32
*fw_data
;
3460 /* stop the gfx rings and rlc compute queues */
3461 cik_sdma_gfx_stop(rdev
);
3462 cik_sdma_rlc_stop(rdev
);
3465 cik_sdma_enable(rdev
, false);
3468 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
3469 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
3470 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
3471 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
3472 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
3475 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
3476 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
3477 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
3478 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
3479 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
3481 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
3482 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
3487 * cik_sdma_resume - setup and start the async dma engines
3489 * @rdev: radeon_device pointer
3491 * Set up the DMA engines and enable them (CIK).
3492 * Returns 0 for success, error for failure.
3494 static int cik_sdma_resume(struct radeon_device
*rdev
)
3499 WREG32(SRBM_SOFT_RESET
, SOFT_RESET_SDMA
| SOFT_RESET_SDMA1
);
3500 RREG32(SRBM_SOFT_RESET
);
3502 WREG32(SRBM_SOFT_RESET
, 0);
3503 RREG32(SRBM_SOFT_RESET
);
3505 r
= cik_sdma_load_microcode(rdev
);
3509 /* unhalt the MEs */
3510 cik_sdma_enable(rdev
, true);
3512 /* start the gfx rings and rlc compute queues */
3513 r
= cik_sdma_gfx_resume(rdev
);
3516 r
= cik_sdma_rlc_resume(rdev
);
3524 * cik_sdma_fini - tear down the async dma engines
3526 * @rdev: radeon_device pointer
3528 * Stop the async dma engines and free the rings (CIK).
3530 static void cik_sdma_fini(struct radeon_device
*rdev
)
3532 /* stop the gfx rings and rlc compute queues */
3533 cik_sdma_gfx_stop(rdev
);
3534 cik_sdma_rlc_stop(rdev
);
3536 cik_sdma_enable(rdev
, false);
3537 radeon_ring_fini(rdev
, &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
]);
3538 radeon_ring_fini(rdev
, &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
]);
3539 /* XXX - compute dma queue tear down */
3543 * cik_copy_dma - copy pages using the DMA engine
3545 * @rdev: radeon_device pointer
3546 * @src_offset: src GPU address
3547 * @dst_offset: dst GPU address
3548 * @num_gpu_pages: number of GPU pages to xfer
3549 * @fence: radeon fence object
3551 * Copy GPU paging using the DMA engine (CIK).
3552 * Used by the radeon ttm implementation to move pages if
3553 * registered as the asic copy callback.
3555 int cik_copy_dma(struct radeon_device
*rdev
,
3556 uint64_t src_offset
, uint64_t dst_offset
,
3557 unsigned num_gpu_pages
,
3558 struct radeon_fence
**fence
)
3560 struct radeon_semaphore
*sem
= NULL
;
3561 int ring_index
= rdev
->asic
->copy
.dma_ring_index
;
3562 struct radeon_ring
*ring
= &rdev
->ring
[ring_index
];
3563 u32 size_in_bytes
, cur_size_in_bytes
;
3567 r
= radeon_semaphore_create(rdev
, &sem
);
3569 DRM_ERROR("radeon: moving bo (%d).\n", r
);
3573 size_in_bytes
= (num_gpu_pages
<< RADEON_GPU_PAGE_SHIFT
);
3574 num_loops
= DIV_ROUND_UP(size_in_bytes
, 0x1fffff);
3575 r
= radeon_ring_lock(rdev
, ring
, num_loops
* 7 + 14);
3577 DRM_ERROR("radeon: moving bo (%d).\n", r
);
3578 radeon_semaphore_free(rdev
, &sem
, NULL
);
3582 if (radeon_fence_need_sync(*fence
, ring
->idx
)) {
3583 radeon_semaphore_sync_rings(rdev
, sem
, (*fence
)->ring
,
3585 radeon_fence_note_sync(*fence
, ring
->idx
);
3587 radeon_semaphore_free(rdev
, &sem
, NULL
);
3590 for (i
= 0; i
< num_loops
; i
++) {
3591 cur_size_in_bytes
= size_in_bytes
;
3592 if (cur_size_in_bytes
> 0x1fffff)
3593 cur_size_in_bytes
= 0x1fffff;
3594 size_in_bytes
-= cur_size_in_bytes
;
3595 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_COPY
, SDMA_COPY_SUB_OPCODE_LINEAR
, 0));
3596 radeon_ring_write(ring
, cur_size_in_bytes
);
3597 radeon_ring_write(ring
, 0); /* src/dst endian swap */
3598 radeon_ring_write(ring
, src_offset
& 0xffffffff);
3599 radeon_ring_write(ring
, upper_32_bits(src_offset
) & 0xffffffff);
3600 radeon_ring_write(ring
, dst_offset
& 0xfffffffc);
3601 radeon_ring_write(ring
, upper_32_bits(dst_offset
) & 0xffffffff);
3602 src_offset
+= cur_size_in_bytes
;
3603 dst_offset
+= cur_size_in_bytes
;
3606 r
= radeon_fence_emit(rdev
, fence
, ring
->idx
);
3608 radeon_ring_unlock_undo(rdev
, ring
);
3612 radeon_ring_unlock_commit(rdev
, ring
);
3613 radeon_semaphore_free(rdev
, &sem
, *fence
);
3619 * cik_sdma_ring_test - simple async dma engine test
3621 * @rdev: radeon_device pointer
3622 * @ring: radeon_ring structure holding ring information
3624 * Test the DMA engine by writing using it to write an
3625 * value to memory. (CIK).
3626 * Returns 0 for success, error for failure.
3628 int cik_sdma_ring_test(struct radeon_device
*rdev
,
3629 struct radeon_ring
*ring
)
3633 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
3637 DRM_ERROR("invalid vram scratch pointer\n");
3644 r
= radeon_ring_lock(rdev
, ring
, 4);
3646 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring
->idx
, r
);
3649 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
3650 radeon_ring_write(ring
, rdev
->vram_scratch
.gpu_addr
& 0xfffffffc);
3651 radeon_ring_write(ring
, upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff);
3652 radeon_ring_write(ring
, 1); /* number of DWs to follow */
3653 radeon_ring_write(ring
, 0xDEADBEEF);
3654 radeon_ring_unlock_commit(rdev
, ring
);
3656 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3658 if (tmp
== 0xDEADBEEF)
3663 if (i
< rdev
->usec_timeout
) {
3664 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
3666 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3674 * cik_sdma_ib_test - test an IB on the DMA engine
3676 * @rdev: radeon_device pointer
3677 * @ring: radeon_ring structure holding ring information
3679 * Test a simple IB in the DMA ring (CIK).
3680 * Returns 0 on success, error on failure.
3682 int cik_sdma_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
3684 struct radeon_ib ib
;
3687 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
3691 DRM_ERROR("invalid vram scratch pointer\n");
3698 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
3700 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
3704 ib
.ptr
[0] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
3705 ib
.ptr
[1] = rdev
->vram_scratch
.gpu_addr
& 0xfffffffc;
3706 ib
.ptr
[2] = upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff;
3708 ib
.ptr
[4] = 0xDEADBEEF;
3711 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
3713 radeon_ib_free(rdev
, &ib
);
3714 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
3717 r
= radeon_fence_wait(ib
.fence
, false);
3719 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
3722 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3724 if (tmp
== 0xDEADBEEF)
3728 if (i
< rdev
->usec_timeout
) {
3729 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
3731 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp
);
3734 radeon_ib_free(rdev
, &ib
);
3739 static void cik_print_gpu_status_regs(struct radeon_device
*rdev
)
3741 dev_info(rdev
->dev
, " GRBM_STATUS=0x%08X\n",
3742 RREG32(GRBM_STATUS
));
3743 dev_info(rdev
->dev
, " GRBM_STATUS2=0x%08X\n",
3744 RREG32(GRBM_STATUS2
));
3745 dev_info(rdev
->dev
, " GRBM_STATUS_SE0=0x%08X\n",
3746 RREG32(GRBM_STATUS_SE0
));
3747 dev_info(rdev
->dev
, " GRBM_STATUS_SE1=0x%08X\n",
3748 RREG32(GRBM_STATUS_SE1
));
3749 dev_info(rdev
->dev
, " GRBM_STATUS_SE2=0x%08X\n",
3750 RREG32(GRBM_STATUS_SE2
));
3751 dev_info(rdev
->dev
, " GRBM_STATUS_SE3=0x%08X\n",
3752 RREG32(GRBM_STATUS_SE3
));
3753 dev_info(rdev
->dev
, " SRBM_STATUS=0x%08X\n",
3754 RREG32(SRBM_STATUS
));
3755 dev_info(rdev
->dev
, " SRBM_STATUS2=0x%08X\n",
3756 RREG32(SRBM_STATUS2
));
3757 dev_info(rdev
->dev
, " SDMA0_STATUS_REG = 0x%08X\n",
3758 RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
));
3759 dev_info(rdev
->dev
, " SDMA1_STATUS_REG = 0x%08X\n",
3760 RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
));
3761 dev_info(rdev
->dev
, " CP_STAT = 0x%08x\n", RREG32(CP_STAT
));
3762 dev_info(rdev
->dev
, " CP_STALLED_STAT1 = 0x%08x\n",
3763 RREG32(CP_STALLED_STAT1
));
3764 dev_info(rdev
->dev
, " CP_STALLED_STAT2 = 0x%08x\n",
3765 RREG32(CP_STALLED_STAT2
));
3766 dev_info(rdev
->dev
, " CP_STALLED_STAT3 = 0x%08x\n",
3767 RREG32(CP_STALLED_STAT3
));
3768 dev_info(rdev
->dev
, " CP_CPF_BUSY_STAT = 0x%08x\n",
3769 RREG32(CP_CPF_BUSY_STAT
));
3770 dev_info(rdev
->dev
, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3771 RREG32(CP_CPF_STALLED_STAT1
));
3772 dev_info(rdev
->dev
, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS
));
3773 dev_info(rdev
->dev
, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT
));
3774 dev_info(rdev
->dev
, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3775 RREG32(CP_CPC_STALLED_STAT1
));
3776 dev_info(rdev
->dev
, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS
));
3780 * cik_gpu_check_soft_reset - check which blocks are busy
3782 * @rdev: radeon_device pointer
3784 * Check which blocks are busy and return the relevant reset
3785 * mask to be used by cik_gpu_soft_reset().
3786 * Returns a mask of the blocks to be reset.
3788 static u32
cik_gpu_check_soft_reset(struct radeon_device
*rdev
)
3794 tmp
= RREG32(GRBM_STATUS
);
3795 if (tmp
& (PA_BUSY
| SC_BUSY
|
3796 BCI_BUSY
| SX_BUSY
|
3797 TA_BUSY
| VGT_BUSY
|
3799 GDS_BUSY
| SPI_BUSY
|
3800 IA_BUSY
| IA_BUSY_NO_DMA
))
3801 reset_mask
|= RADEON_RESET_GFX
;
3803 if (tmp
& (CP_BUSY
| CP_COHERENCY_BUSY
))
3804 reset_mask
|= RADEON_RESET_CP
;
3807 tmp
= RREG32(GRBM_STATUS2
);
3809 reset_mask
|= RADEON_RESET_RLC
;
3811 /* SDMA0_STATUS_REG */
3812 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
);
3813 if (!(tmp
& SDMA_IDLE
))
3814 reset_mask
|= RADEON_RESET_DMA
;
3816 /* SDMA1_STATUS_REG */
3817 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
);
3818 if (!(tmp
& SDMA_IDLE
))
3819 reset_mask
|= RADEON_RESET_DMA1
;
3822 tmp
= RREG32(SRBM_STATUS2
);
3823 if (tmp
& SDMA_BUSY
)
3824 reset_mask
|= RADEON_RESET_DMA
;
3826 if (tmp
& SDMA1_BUSY
)
3827 reset_mask
|= RADEON_RESET_DMA1
;
3830 tmp
= RREG32(SRBM_STATUS
);
3833 reset_mask
|= RADEON_RESET_IH
;
3836 reset_mask
|= RADEON_RESET_SEM
;
3838 if (tmp
& GRBM_RQ_PENDING
)
3839 reset_mask
|= RADEON_RESET_GRBM
;
3842 reset_mask
|= RADEON_RESET_VMC
;
3844 if (tmp
& (MCB_BUSY
| MCB_NON_DISPLAY_BUSY
|
3845 MCC_BUSY
| MCD_BUSY
))
3846 reset_mask
|= RADEON_RESET_MC
;
3848 if (evergreen_is_display_hung(rdev
))
3849 reset_mask
|= RADEON_RESET_DISPLAY
;
3851 /* Skip MC reset as it's mostly likely not hung, just busy */
3852 if (reset_mask
& RADEON_RESET_MC
) {
3853 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask
);
3854 reset_mask
&= ~RADEON_RESET_MC
;
3861 * cik_gpu_soft_reset - soft reset GPU
3863 * @rdev: radeon_device pointer
3864 * @reset_mask: mask of which blocks to reset
3866 * Soft reset the blocks specified in @reset_mask.
3868 static void cik_gpu_soft_reset(struct radeon_device
*rdev
, u32 reset_mask
)
3870 struct evergreen_mc_save save
;
3871 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
3874 if (reset_mask
== 0)
3877 dev_info(rdev
->dev
, "GPU softreset: 0x%08X\n", reset_mask
);
3879 cik_print_gpu_status_regs(rdev
);
3880 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3881 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
));
3882 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3883 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
));
3888 /* Disable GFX parsing/prefetching */
3889 WREG32(CP_ME_CNTL
, CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
);
3891 /* Disable MEC parsing/prefetching */
3892 WREG32(CP_MEC_CNTL
, MEC_ME1_HALT
| MEC_ME2_HALT
);
3894 if (reset_mask
& RADEON_RESET_DMA
) {
3896 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
);
3898 WREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
3900 if (reset_mask
& RADEON_RESET_DMA1
) {
3902 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
);
3904 WREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
3907 evergreen_mc_stop(rdev
, &save
);
3908 if (evergreen_mc_wait_for_idle(rdev
)) {
3909 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
3912 if (reset_mask
& (RADEON_RESET_GFX
| RADEON_RESET_COMPUTE
| RADEON_RESET_CP
))
3913 grbm_soft_reset
= SOFT_RESET_CP
| SOFT_RESET_GFX
;
3915 if (reset_mask
& RADEON_RESET_CP
) {
3916 grbm_soft_reset
|= SOFT_RESET_CP
;
3918 srbm_soft_reset
|= SOFT_RESET_GRBM
;
3921 if (reset_mask
& RADEON_RESET_DMA
)
3922 srbm_soft_reset
|= SOFT_RESET_SDMA
;
3924 if (reset_mask
& RADEON_RESET_DMA1
)
3925 srbm_soft_reset
|= SOFT_RESET_SDMA1
;
3927 if (reset_mask
& RADEON_RESET_DISPLAY
)
3928 srbm_soft_reset
|= SOFT_RESET_DC
;
3930 if (reset_mask
& RADEON_RESET_RLC
)
3931 grbm_soft_reset
|= SOFT_RESET_RLC
;
3933 if (reset_mask
& RADEON_RESET_SEM
)
3934 srbm_soft_reset
|= SOFT_RESET_SEM
;
3936 if (reset_mask
& RADEON_RESET_IH
)
3937 srbm_soft_reset
|= SOFT_RESET_IH
;
3939 if (reset_mask
& RADEON_RESET_GRBM
)
3940 srbm_soft_reset
|= SOFT_RESET_GRBM
;
3942 if (reset_mask
& RADEON_RESET_VMC
)
3943 srbm_soft_reset
|= SOFT_RESET_VMC
;
3945 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
3946 if (reset_mask
& RADEON_RESET_MC
)
3947 srbm_soft_reset
|= SOFT_RESET_MC
;
3950 if (grbm_soft_reset
) {
3951 tmp
= RREG32(GRBM_SOFT_RESET
);
3952 tmp
|= grbm_soft_reset
;
3953 dev_info(rdev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
3954 WREG32(GRBM_SOFT_RESET
, tmp
);
3955 tmp
= RREG32(GRBM_SOFT_RESET
);
3959 tmp
&= ~grbm_soft_reset
;
3960 WREG32(GRBM_SOFT_RESET
, tmp
);
3961 tmp
= RREG32(GRBM_SOFT_RESET
);
3964 if (srbm_soft_reset
) {
3965 tmp
= RREG32(SRBM_SOFT_RESET
);
3966 tmp
|= srbm_soft_reset
;
3967 dev_info(rdev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
3968 WREG32(SRBM_SOFT_RESET
, tmp
);
3969 tmp
= RREG32(SRBM_SOFT_RESET
);
3973 tmp
&= ~srbm_soft_reset
;
3974 WREG32(SRBM_SOFT_RESET
, tmp
);
3975 tmp
= RREG32(SRBM_SOFT_RESET
);
3978 /* Wait a little for things to settle down */
3981 evergreen_mc_resume(rdev
, &save
);
3984 cik_print_gpu_status_regs(rdev
);
3988 * cik_asic_reset - soft reset GPU
3990 * @rdev: radeon_device pointer
3992 * Look up which blocks are hung and attempt
3994 * Returns 0 for success.
3996 int cik_asic_reset(struct radeon_device
*rdev
)
4000 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4003 r600_set_bios_scratch_engine_hung(rdev
, true);
4005 cik_gpu_soft_reset(rdev
, reset_mask
);
4007 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4010 r600_set_bios_scratch_engine_hung(rdev
, false);
4016 * cik_gfx_is_lockup - check if the 3D engine is locked up
4018 * @rdev: radeon_device pointer
4019 * @ring: radeon_ring structure holding ring information
4021 * Check if the 3D engine is locked up (CIK).
4022 * Returns true if the engine is locked, false if not.
4024 bool cik_gfx_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4026 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4028 if (!(reset_mask
& (RADEON_RESET_GFX
|
4029 RADEON_RESET_COMPUTE
|
4030 RADEON_RESET_CP
))) {
4031 radeon_ring_lockup_update(ring
);
4034 /* force CP activities */
4035 radeon_ring_force_activity(rdev
, ring
);
4036 return radeon_ring_test_lockup(rdev
, ring
);
4040 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4042 * @rdev: radeon_device pointer
4043 * @ring: radeon_ring structure holding ring information
4045 * Check if the async DMA engine is locked up (CIK).
4046 * Returns true if the engine appears to be locked up, false if not.
4048 bool cik_sdma_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4050 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4053 if (ring
->idx
== R600_RING_TYPE_DMA_INDEX
)
4054 mask
= RADEON_RESET_DMA
;
4056 mask
= RADEON_RESET_DMA1
;
4058 if (!(reset_mask
& mask
)) {
4059 radeon_ring_lockup_update(ring
);
4062 /* force ring activities */
4063 radeon_ring_force_activity(rdev
, ring
);
4064 return radeon_ring_test_lockup(rdev
, ring
);
4069 * cik_mc_program - program the GPU memory controller
4071 * @rdev: radeon_device pointer
4073 * Set the location of vram, gart, and AGP in the GPU's
4074 * physical address space (CIK).
4076 static void cik_mc_program(struct radeon_device
*rdev
)
4078 struct evergreen_mc_save save
;
4082 /* Initialize HDP */
4083 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
4084 WREG32((0x2c14 + j
), 0x00000000);
4085 WREG32((0x2c18 + j
), 0x00000000);
4086 WREG32((0x2c1c + j
), 0x00000000);
4087 WREG32((0x2c20 + j
), 0x00000000);
4088 WREG32((0x2c24 + j
), 0x00000000);
4090 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL
, 0);
4092 evergreen_mc_stop(rdev
, &save
);
4093 if (radeon_mc_wait_for_idle(rdev
)) {
4094 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4096 /* Lockout access through VGA aperture*/
4097 WREG32(VGA_HDP_CONTROL
, VGA_MEMORY_DISABLE
);
4098 /* Update configuration */
4099 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
4100 rdev
->mc
.vram_start
>> 12);
4101 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
4102 rdev
->mc
.vram_end
>> 12);
4103 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR
,
4104 rdev
->vram_scratch
.gpu_addr
>> 12);
4105 tmp
= ((rdev
->mc
.vram_end
>> 24) & 0xFFFF) << 16;
4106 tmp
|= ((rdev
->mc
.vram_start
>> 24) & 0xFFFF);
4107 WREG32(MC_VM_FB_LOCATION
, tmp
);
4108 /* XXX double check these! */
4109 WREG32(HDP_NONSURFACE_BASE
, (rdev
->mc
.vram_start
>> 8));
4110 WREG32(HDP_NONSURFACE_INFO
, (2 << 7) | (1 << 30));
4111 WREG32(HDP_NONSURFACE_SIZE
, 0x3FFFFFFF);
4112 WREG32(MC_VM_AGP_BASE
, 0);
4113 WREG32(MC_VM_AGP_TOP
, 0x0FFFFFFF);
4114 WREG32(MC_VM_AGP_BOT
, 0x0FFFFFFF);
4115 if (radeon_mc_wait_for_idle(rdev
)) {
4116 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4118 evergreen_mc_resume(rdev
, &save
);
4119 /* we need to own VRAM, so turn off the VGA renderer here
4120 * to stop it overwriting our objects */
4121 rv515_vga_render_disable(rdev
);
4125 * cik_mc_init - initialize the memory controller driver params
4127 * @rdev: radeon_device pointer
4129 * Look up the amount of vram, vram width, and decide how to place
4130 * vram and gart within the GPU's physical address space (CIK).
4131 * Returns 0 for success.
4133 static int cik_mc_init(struct radeon_device
*rdev
)
4136 int chansize
, numchan
;
4138 /* Get VRAM informations */
4139 rdev
->mc
.vram_is_ddr
= true;
4140 tmp
= RREG32(MC_ARB_RAMCFG
);
4141 if (tmp
& CHANSIZE_MASK
) {
4146 tmp
= RREG32(MC_SHARED_CHMAP
);
4147 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
4177 rdev
->mc
.vram_width
= numchan
* chansize
;
4178 /* Could aper size report 0 ? */
4179 rdev
->mc
.aper_base
= pci_resource_start(rdev
->pdev
, 0);
4180 rdev
->mc
.aper_size
= pci_resource_len(rdev
->pdev
, 0);
4181 /* size in MB on si */
4182 rdev
->mc
.mc_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
4183 rdev
->mc
.real_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
4184 rdev
->mc
.visible_vram_size
= rdev
->mc
.aper_size
;
4185 si_vram_gtt_location(rdev
, &rdev
->mc
);
4186 radeon_update_bandwidth_info(rdev
);
4193 * VMID 0 is the physical GPU addresses as used by the kernel.
4194 * VMIDs 1-15 are used for userspace clients and are handled
4195 * by the radeon vm/hsa code.
4198 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4200 * @rdev: radeon_device pointer
4202 * Flush the TLB for the VMID 0 page table (CIK).
4204 void cik_pcie_gart_tlb_flush(struct radeon_device
*rdev
)
4206 /* flush hdp cache */
4207 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL
, 0);
4209 /* bits 0-15 are the VM contexts0-15 */
4210 WREG32(VM_INVALIDATE_REQUEST
, 0x1);
4214 * cik_pcie_gart_enable - gart enable
4216 * @rdev: radeon_device pointer
4218 * This sets up the TLBs, programs the page tables for VMID0,
4219 * sets up the hw for VMIDs 1-15 which are allocated on
4220 * demand, and sets up the global locations for the LDS, GDS,
4221 * and GPUVM for FSA64 clients (CIK).
4222 * Returns 0 for success, errors for failure.
4224 static int cik_pcie_gart_enable(struct radeon_device
*rdev
)
4228 if (rdev
->gart
.robj
== NULL
) {
4229 dev_err(rdev
->dev
, "No VRAM object for PCIE GART.\n");
4232 r
= radeon_gart_table_vram_pin(rdev
);
4235 radeon_gart_restore(rdev
);
4236 /* Setup TLB control */
4237 WREG32(MC_VM_MX_L1_TLB_CNTL
,
4240 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
4241 ENABLE_ADVANCED_DRIVER_MODEL
|
4242 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
4243 /* Setup L2 cache */
4244 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
|
4245 ENABLE_L2_FRAGMENT_PROCESSING
|
4246 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
4247 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
4248 EFFECTIVE_L2_QUEUE_SIZE(7) |
4249 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4250 WREG32(VM_L2_CNTL2
, INVALIDATE_ALL_L1_TLBS
| INVALIDATE_L2_CACHE
);
4251 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
4252 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4253 /* setup context0 */
4254 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
, rdev
->mc
.gtt_start
>> 12);
4255 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
, rdev
->mc
.gtt_end
>> 12);
4256 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
, rdev
->gart
.table_addr
>> 12);
4257 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR
,
4258 (u32
)(rdev
->dummy_page
.addr
>> 12));
4259 WREG32(VM_CONTEXT0_CNTL2
, 0);
4260 WREG32(VM_CONTEXT0_CNTL
, (ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(0) |
4261 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
));
4267 /* empty context1-15 */
4268 /* FIXME start with 4G, once using 2 level pt switch to full
4271 /* set vm size, must be a multiple of 4 */
4272 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR
, 0);
4273 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR
, rdev
->vm_manager
.max_pfn
);
4274 for (i
= 1; i
< 16; i
++) {
4276 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (i
<< 2),
4277 rdev
->gart
.table_addr
>> 12);
4279 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((i
- 8) << 2),
4280 rdev
->gart
.table_addr
>> 12);
4283 /* enable context1-15 */
4284 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR
,
4285 (u32
)(rdev
->dummy_page
.addr
>> 12));
4286 WREG32(VM_CONTEXT1_CNTL2
, 4);
4287 WREG32(VM_CONTEXT1_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(1) |
4288 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
4289 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
4290 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
4291 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
4292 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT
|
4293 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT
|
4294 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT
|
4295 VALID_PROTECTION_FAULT_ENABLE_DEFAULT
|
4296 READ_PROTECTION_FAULT_ENABLE_INTERRUPT
|
4297 READ_PROTECTION_FAULT_ENABLE_DEFAULT
|
4298 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
4299 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT
);
4301 /* TC cache setup ??? */
4302 WREG32(TC_CFG_L1_LOAD_POLICY0
, 0);
4303 WREG32(TC_CFG_L1_LOAD_POLICY1
, 0);
4304 WREG32(TC_CFG_L1_STORE_POLICY
, 0);
4306 WREG32(TC_CFG_L2_LOAD_POLICY0
, 0);
4307 WREG32(TC_CFG_L2_LOAD_POLICY1
, 0);
4308 WREG32(TC_CFG_L2_STORE_POLICY0
, 0);
4309 WREG32(TC_CFG_L2_STORE_POLICY1
, 0);
4310 WREG32(TC_CFG_L2_ATOMIC_POLICY
, 0);
4312 WREG32(TC_CFG_L1_VOLATILE
, 0);
4313 WREG32(TC_CFG_L2_VOLATILE
, 0);
4315 if (rdev
->family
== CHIP_KAVERI
) {
4316 u32 tmp
= RREG32(CHUB_CONTROL
);
4318 WREG32(CHUB_CONTROL
, tmp
);
4321 /* XXX SH_MEM regs */
4322 /* where to put LDS, scratch, GPUVM in FSA64 space */
4323 for (i
= 0; i
< 16; i
++) {
4324 cik_srbm_select(rdev
, 0, 0, 0, i
);
4325 /* CP and shaders */
4326 WREG32(SH_MEM_CONFIG
, 0);
4327 WREG32(SH_MEM_APE1_BASE
, 1);
4328 WREG32(SH_MEM_APE1_LIMIT
, 0);
4329 WREG32(SH_MEM_BASES
, 0);
4331 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
4332 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA0_REGISTER_OFFSET
, 0);
4333 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
4334 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA1_REGISTER_OFFSET
, 0);
4335 /* XXX SDMA RLC - todo */
4337 cik_srbm_select(rdev
, 0, 0, 0, 0);
4339 cik_pcie_gart_tlb_flush(rdev
);
4340 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4341 (unsigned)(rdev
->mc
.gtt_size
>> 20),
4342 (unsigned long long)rdev
->gart
.table_addr
);
4343 rdev
->gart
.ready
= true;
4348 * cik_pcie_gart_disable - gart disable
4350 * @rdev: radeon_device pointer
4352 * This disables all VM page table (CIK).
4354 static void cik_pcie_gart_disable(struct radeon_device
*rdev
)
4356 /* Disable all tables */
4357 WREG32(VM_CONTEXT0_CNTL
, 0);
4358 WREG32(VM_CONTEXT1_CNTL
, 0);
4359 /* Setup TLB control */
4360 WREG32(MC_VM_MX_L1_TLB_CNTL
, SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
4361 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
4362 /* Setup L2 cache */
4364 ENABLE_L2_FRAGMENT_PROCESSING
|
4365 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
4366 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
4367 EFFECTIVE_L2_QUEUE_SIZE(7) |
4368 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4369 WREG32(VM_L2_CNTL2
, 0);
4370 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
4371 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4372 radeon_gart_table_vram_unpin(rdev
);
4376 * cik_pcie_gart_fini - vm fini callback
4378 * @rdev: radeon_device pointer
4380 * Tears down the driver GART/VM setup (CIK).
4382 static void cik_pcie_gart_fini(struct radeon_device
*rdev
)
4384 cik_pcie_gart_disable(rdev
);
4385 radeon_gart_table_vram_free(rdev
);
4386 radeon_gart_fini(rdev
);
4391 * cik_ib_parse - vm ib_parse callback
4393 * @rdev: radeon_device pointer
4394 * @ib: indirect buffer pointer
4396 * CIK uses hw IB checking so this is a nop (CIK).
4398 int cik_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
4405 * VMID 0 is the physical GPU addresses as used by the kernel.
4406 * VMIDs 1-15 are used for userspace clients and are handled
4407 * by the radeon vm/hsa code.
4410 * cik_vm_init - cik vm init callback
4412 * @rdev: radeon_device pointer
4414 * Inits cik specific vm parameters (number of VMs, base of vram for
4415 * VMIDs 1-15) (CIK).
4416 * Returns 0 for success.
4418 int cik_vm_init(struct radeon_device
*rdev
)
4421 rdev
->vm_manager
.nvm
= 16;
4422 /* base offset of vram pages */
4423 if (rdev
->flags
& RADEON_IS_IGP
) {
4424 u64 tmp
= RREG32(MC_VM_FB_OFFSET
);
4426 rdev
->vm_manager
.vram_base_offset
= tmp
;
4428 rdev
->vm_manager
.vram_base_offset
= 0;
4434 * cik_vm_fini - cik vm fini callback
4436 * @rdev: radeon_device pointer
4438 * Tear down any asic specific VM setup (CIK).
4440 void cik_vm_fini(struct radeon_device
*rdev
)
4445 * cik_vm_decode_fault - print human readable fault info
4447 * @rdev: radeon_device pointer
4448 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4449 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4451 * Print human readable fault information (CIK).
4453 static void cik_vm_decode_fault(struct radeon_device
*rdev
,
4454 u32 status
, u32 addr
, u32 mc_client
)
4456 u32 mc_id
= (status
& MEMORY_CLIENT_ID_MASK
) >> MEMORY_CLIENT_ID_SHIFT
;
4457 u32 vmid
= (status
& FAULT_VMID_MASK
) >> FAULT_VMID_SHIFT
;
4458 u32 protections
= (status
& PROTECTIONS_MASK
) >> PROTECTIONS_SHIFT
;
4459 char *block
= (char *)&mc_client
;
4461 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4462 protections
, vmid
, addr
,
4463 (status
& MEMORY_CLIENT_RW_MASK
) ? "write" : "read",
4468 * cik_vm_flush - cik vm flush using the CP
4470 * @rdev: radeon_device pointer
4472 * Update the page table base and flush the VM TLB
4473 * using the CP (CIK).
4475 void cik_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
4477 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
4482 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4483 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4484 WRITE_DATA_DST_SEL(0)));
4486 radeon_ring_write(ring
,
4487 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
4489 radeon_ring_write(ring
,
4490 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
4492 radeon_ring_write(ring
, 0);
4493 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
4495 /* update SH_MEM_* regs */
4496 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4497 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4498 WRITE_DATA_DST_SEL(0)));
4499 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4500 radeon_ring_write(ring
, 0);
4501 radeon_ring_write(ring
, VMID(vm
->id
));
4503 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 6));
4504 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4505 WRITE_DATA_DST_SEL(0)));
4506 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
4507 radeon_ring_write(ring
, 0);
4509 radeon_ring_write(ring
, 0); /* SH_MEM_BASES */
4510 radeon_ring_write(ring
, 0); /* SH_MEM_CONFIG */
4511 radeon_ring_write(ring
, 1); /* SH_MEM_APE1_BASE */
4512 radeon_ring_write(ring
, 0); /* SH_MEM_APE1_LIMIT */
4514 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4515 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4516 WRITE_DATA_DST_SEL(0)));
4517 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4518 radeon_ring_write(ring
, 0);
4519 radeon_ring_write(ring
, VMID(0));
4522 /* We should be using the WAIT_REG_MEM packet here like in
4523 * cik_fence_ring_emit(), but it causes the CP to hang in this
4526 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4527 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4528 WRITE_DATA_DST_SEL(0)));
4529 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
4530 radeon_ring_write(ring
, 0);
4531 radeon_ring_write(ring
, 0);
4533 /* bits 0-15 are the VM contexts0-15 */
4534 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4535 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4536 WRITE_DATA_DST_SEL(0)));
4537 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
4538 radeon_ring_write(ring
, 0);
4539 radeon_ring_write(ring
, 1 << vm
->id
);
4541 /* compute doesn't have PFP */
4542 if (ridx
== RADEON_RING_TYPE_GFX_INDEX
) {
4543 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4544 radeon_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
4545 radeon_ring_write(ring
, 0x0);
4550 * cik_vm_set_page - update the page tables using sDMA
4552 * @rdev: radeon_device pointer
4553 * @ib: indirect buffer to fill with commands
4554 * @pe: addr of the page entry
4555 * @addr: dst addr to write into pe
4556 * @count: number of page entries to update
4557 * @incr: increase next addr by incr bytes
4558 * @flags: access flags
4560 * Update the page tables using CP or sDMA (CIK).
4562 void cik_vm_set_page(struct radeon_device
*rdev
,
4563 struct radeon_ib
*ib
,
4565 uint64_t addr
, unsigned count
,
4566 uint32_t incr
, uint32_t flags
)
4568 uint32_t r600_flags
= cayman_vm_page_flags(rdev
, flags
);
4572 if (rdev
->asic
->vm
.pt_ring_index
== RADEON_RING_TYPE_GFX_INDEX
) {
4575 ndw
= 2 + count
* 2;
4579 ib
->ptr
[ib
->length_dw
++] = PACKET3(PACKET3_WRITE_DATA
, ndw
);
4580 ib
->ptr
[ib
->length_dw
++] = (WRITE_DATA_ENGINE_SEL(0) |
4581 WRITE_DATA_DST_SEL(1));
4582 ib
->ptr
[ib
->length_dw
++] = pe
;
4583 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
4584 for (; ndw
> 2; ndw
-= 2, --count
, pe
+= 8) {
4585 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
4586 value
= radeon_vm_map_gart(rdev
, addr
);
4587 value
&= 0xFFFFFFFFFFFFF000ULL
;
4588 } else if (flags
& RADEON_VM_PAGE_VALID
) {
4594 value
|= r600_flags
;
4595 ib
->ptr
[ib
->length_dw
++] = value
;
4596 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
4601 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
4607 /* for non-physically contiguous pages (system) */
4608 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
4609 ib
->ptr
[ib
->length_dw
++] = pe
;
4610 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
4611 ib
->ptr
[ib
->length_dw
++] = ndw
;
4612 for (; ndw
> 0; ndw
-= 2, --count
, pe
+= 8) {
4613 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
4614 value
= radeon_vm_map_gart(rdev
, addr
);
4615 value
&= 0xFFFFFFFFFFFFF000ULL
;
4616 } else if (flags
& RADEON_VM_PAGE_VALID
) {
4622 value
|= r600_flags
;
4623 ib
->ptr
[ib
->length_dw
++] = value
;
4624 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
4633 if (flags
& RADEON_VM_PAGE_VALID
)
4637 /* for physically contiguous pages (vram) */
4638 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE
, 0, 0);
4639 ib
->ptr
[ib
->length_dw
++] = pe
; /* dst addr */
4640 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
4641 ib
->ptr
[ib
->length_dw
++] = r600_flags
; /* mask */
4642 ib
->ptr
[ib
->length_dw
++] = 0;
4643 ib
->ptr
[ib
->length_dw
++] = value
; /* value */
4644 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
4645 ib
->ptr
[ib
->length_dw
++] = incr
; /* increment size */
4646 ib
->ptr
[ib
->length_dw
++] = 0;
4647 ib
->ptr
[ib
->length_dw
++] = ndw
; /* number of entries */
4653 while (ib
->length_dw
& 0x7)
4654 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0);
4659 * cik_dma_vm_flush - cik vm flush using sDMA
4661 * @rdev: radeon_device pointer
4663 * Update the page table base and flush the VM TLB
4666 void cik_dma_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
4668 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
4669 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4670 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4676 if (ridx
== R600_RING_TYPE_DMA_INDEX
)
4677 ref_and_mask
= SDMA0
;
4679 ref_and_mask
= SDMA1
;
4681 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4683 radeon_ring_write(ring
, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
4685 radeon_ring_write(ring
, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
4687 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
4689 /* update SH_MEM_* regs */
4690 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4691 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4692 radeon_ring_write(ring
, VMID(vm
->id
));
4694 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4695 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
4696 radeon_ring_write(ring
, 0);
4698 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4699 radeon_ring_write(ring
, SH_MEM_CONFIG
>> 2);
4700 radeon_ring_write(ring
, 0);
4702 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4703 radeon_ring_write(ring
, SH_MEM_APE1_BASE
>> 2);
4704 radeon_ring_write(ring
, 1);
4706 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4707 radeon_ring_write(ring
, SH_MEM_APE1_LIMIT
>> 2);
4708 radeon_ring_write(ring
, 0);
4710 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4711 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4712 radeon_ring_write(ring
, VMID(0));
4715 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
4716 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
4717 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
4718 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
4719 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
4720 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4723 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4724 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
4725 radeon_ring_write(ring
, 1 << vm
->id
);
4730 * The RLC is a multi-purpose microengine that handles a
4731 * variety of functions, the most important of which is
4732 * the interrupt controller.
4735 * cik_rlc_stop - stop the RLC ME
4737 * @rdev: radeon_device pointer
4739 * Halt the RLC ME (MicroEngine) (CIK).
4741 static void cik_rlc_stop(struct radeon_device
*rdev
)
4746 tmp
= RREG32(CP_INT_CNTL_RING0
);
4747 tmp
&= ~(CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
4748 WREG32(CP_INT_CNTL_RING0
, tmp
);
4750 RREG32(CB_CGTT_SCLK_CTRL
);
4751 RREG32(CB_CGTT_SCLK_CTRL
);
4752 RREG32(CB_CGTT_SCLK_CTRL
);
4753 RREG32(CB_CGTT_SCLK_CTRL
);
4755 tmp
= RREG32(RLC_CGCG_CGLS_CTRL
) & 0xfffffffc;
4756 WREG32(RLC_CGCG_CGLS_CTRL
, tmp
);
4758 WREG32(RLC_CNTL
, 0);
4760 for (i
= 0; i
< rdev
->config
.cik
.max_shader_engines
; i
++) {
4761 for (j
= 0; j
< rdev
->config
.cik
.max_sh_per_se
; j
++) {
4762 cik_select_se_sh(rdev
, i
, j
);
4763 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
4764 if (RREG32(RLC_SERDES_CU_MASTER_BUSY
) == 0)
4770 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
4772 mask
= SE_MASTER_BUSY_MASK
| GC_MASTER_BUSY
| TC0_MASTER_BUSY
| TC1_MASTER_BUSY
;
4773 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
4774 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
4781 * cik_rlc_start - start the RLC ME
4783 * @rdev: radeon_device pointer
4785 * Unhalt the RLC ME (MicroEngine) (CIK).
4787 static void cik_rlc_start(struct radeon_device
*rdev
)
4791 WREG32(RLC_CNTL
, RLC_ENABLE
);
4793 tmp
= RREG32(CP_INT_CNTL_RING0
);
4794 tmp
|= (CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
4795 WREG32(CP_INT_CNTL_RING0
, tmp
);
4801 * cik_rlc_resume - setup the RLC hw
4803 * @rdev: radeon_device pointer
4805 * Initialize the RLC registers, load the ucode,
4806 * and start the RLC (CIK).
4807 * Returns 0 for success, -EINVAL if the ucode is not available.
4809 static int cik_rlc_resume(struct radeon_device
*rdev
)
4812 u32 clear_state_info
[3];
4813 const __be32
*fw_data
;
4818 switch (rdev
->family
) {
4821 size
= BONAIRE_RLC_UCODE_SIZE
;
4824 size
= KV_RLC_UCODE_SIZE
;
4827 size
= KB_RLC_UCODE_SIZE
;
4833 WREG32(GRBM_SOFT_RESET
, SOFT_RESET_RLC
);
4834 RREG32(GRBM_SOFT_RESET
);
4836 WREG32(GRBM_SOFT_RESET
, 0);
4837 RREG32(GRBM_SOFT_RESET
);
4840 WREG32(RLC_LB_CNTR_INIT
, 0);
4841 WREG32(RLC_LB_CNTR_MAX
, 0x00008000);
4843 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
4844 WREG32(RLC_LB_INIT_CU_MASK
, 0xffffffff);
4845 WREG32(RLC_LB_PARAMS
, 0x00600408);
4846 WREG32(RLC_LB_CNTL
, 0x80000004);
4848 WREG32(RLC_MC_CNTL
, 0);
4849 WREG32(RLC_UCODE_CNTL
, 0);
4851 fw_data
= (const __be32
*)rdev
->rlc_fw
->data
;
4852 WREG32(RLC_GPM_UCODE_ADDR
, 0);
4853 for (i
= 0; i
< size
; i
++)
4854 WREG32(RLC_GPM_UCODE_DATA
, be32_to_cpup(fw_data
++));
4855 WREG32(RLC_GPM_UCODE_ADDR
, 0);
4858 clear_state_info
[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4859 clear_state_info
[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4860 clear_state_info
[2] = 0;//cik_default_size;
4861 WREG32(RLC_GPM_SCRATCH_ADDR
, 0x3d);
4862 for (i
= 0; i
< 3; i
++)
4863 WREG32(RLC_GPM_SCRATCH_DATA
, clear_state_info
[i
]);
4864 WREG32(RLC_DRIVER_DMA_STATUS
, 0);
4866 cik_rlc_start(rdev
);
4873 * Starting with r6xx, interrupts are handled via a ring buffer.
4874 * Ring buffers are areas of GPU accessible memory that the GPU
4875 * writes interrupt vectors into and the host reads vectors out of.
4876 * There is a rptr (read pointer) that determines where the
4877 * host is currently reading, and a wptr (write pointer)
4878 * which determines where the GPU has written. When the
4879 * pointers are equal, the ring is idle. When the GPU
4880 * writes vectors to the ring buffer, it increments the
4881 * wptr. When there is an interrupt, the host then starts
4882 * fetching commands and processing them until the pointers are
4883 * equal again at which point it updates the rptr.
4887 * cik_enable_interrupts - Enable the interrupt ring buffer
4889 * @rdev: radeon_device pointer
4891 * Enable the interrupt ring buffer (CIK).
4893 static void cik_enable_interrupts(struct radeon_device
*rdev
)
4895 u32 ih_cntl
= RREG32(IH_CNTL
);
4896 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
4898 ih_cntl
|= ENABLE_INTR
;
4899 ih_rb_cntl
|= IH_RB_ENABLE
;
4900 WREG32(IH_CNTL
, ih_cntl
);
4901 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
4902 rdev
->ih
.enabled
= true;
4906 * cik_disable_interrupts - Disable the interrupt ring buffer
4908 * @rdev: radeon_device pointer
4910 * Disable the interrupt ring buffer (CIK).
4912 static void cik_disable_interrupts(struct radeon_device
*rdev
)
4914 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
4915 u32 ih_cntl
= RREG32(IH_CNTL
);
4917 ih_rb_cntl
&= ~IH_RB_ENABLE
;
4918 ih_cntl
&= ~ENABLE_INTR
;
4919 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
4920 WREG32(IH_CNTL
, ih_cntl
);
4921 /* set rptr, wptr to 0 */
4922 WREG32(IH_RB_RPTR
, 0);
4923 WREG32(IH_RB_WPTR
, 0);
4924 rdev
->ih
.enabled
= false;
4929 * cik_disable_interrupt_state - Disable all interrupt sources
4931 * @rdev: radeon_device pointer
4933 * Clear all interrupt enable bits used by the driver (CIK).
4935 static void cik_disable_interrupt_state(struct radeon_device
*rdev
)
4940 WREG32(CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
4942 tmp
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
4943 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
4944 tmp
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
4945 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
4946 /* compute queues */
4947 WREG32(CP_ME1_PIPE0_INT_CNTL
, 0);
4948 WREG32(CP_ME1_PIPE1_INT_CNTL
, 0);
4949 WREG32(CP_ME1_PIPE2_INT_CNTL
, 0);
4950 WREG32(CP_ME1_PIPE3_INT_CNTL
, 0);
4951 WREG32(CP_ME2_PIPE0_INT_CNTL
, 0);
4952 WREG32(CP_ME2_PIPE1_INT_CNTL
, 0);
4953 WREG32(CP_ME2_PIPE2_INT_CNTL
, 0);
4954 WREG32(CP_ME2_PIPE3_INT_CNTL
, 0);
4956 WREG32(GRBM_INT_CNTL
, 0);
4957 /* vline/vblank, etc. */
4958 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, 0);
4959 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, 0);
4960 if (rdev
->num_crtc
>= 4) {
4961 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, 0);
4962 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, 0);
4964 if (rdev
->num_crtc
>= 6) {
4965 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, 0);
4966 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, 0);
4970 WREG32(DAC_AUTODETECT_INT_CONTROL
, 0);
4972 /* digital hotplug */
4973 tmp
= RREG32(DC_HPD1_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4974 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
4975 tmp
= RREG32(DC_HPD2_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4976 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
4977 tmp
= RREG32(DC_HPD3_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4978 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
4979 tmp
= RREG32(DC_HPD4_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4980 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
4981 tmp
= RREG32(DC_HPD5_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4982 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
4983 tmp
= RREG32(DC_HPD6_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4984 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
4989 * cik_irq_init - init and enable the interrupt ring
4991 * @rdev: radeon_device pointer
4993 * Allocate a ring buffer for the interrupt controller,
4994 * enable the RLC, disable interrupts, enable the IH
4995 * ring buffer and enable it (CIK).
4996 * Called at device load and reume.
4997 * Returns 0 for success, errors for failure.
4999 static int cik_irq_init(struct radeon_device
*rdev
)
5003 u32 interrupt_cntl
, ih_cntl
, ih_rb_cntl
;
5006 ret
= r600_ih_ring_alloc(rdev
);
5011 cik_disable_interrupts(rdev
);
5014 ret
= cik_rlc_resume(rdev
);
5016 r600_ih_ring_fini(rdev
);
5020 /* setup interrupt control */
5021 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5022 WREG32(INTERRUPT_CNTL2
, rdev
->ih
.gpu_addr
>> 8);
5023 interrupt_cntl
= RREG32(INTERRUPT_CNTL
);
5024 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5025 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5027 interrupt_cntl
&= ~IH_DUMMY_RD_OVERRIDE
;
5028 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5029 interrupt_cntl
&= ~IH_REQ_NONSNOOP_EN
;
5030 WREG32(INTERRUPT_CNTL
, interrupt_cntl
);
5032 WREG32(IH_RB_BASE
, rdev
->ih
.gpu_addr
>> 8);
5033 rb_bufsz
= order_base_2(rdev
->ih
.ring_size
/ 4);
5035 ih_rb_cntl
= (IH_WPTR_OVERFLOW_ENABLE
|
5036 IH_WPTR_OVERFLOW_CLEAR
|
5039 if (rdev
->wb
.enabled
)
5040 ih_rb_cntl
|= IH_WPTR_WRITEBACK_ENABLE
;
5042 /* set the writeback address whether it's enabled or not */
5043 WREG32(IH_RB_WPTR_ADDR_LO
, (rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFFFFFFFC);
5044 WREG32(IH_RB_WPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFF);
5046 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
5048 /* set rptr, wptr to 0 */
5049 WREG32(IH_RB_RPTR
, 0);
5050 WREG32(IH_RB_WPTR
, 0);
5052 /* Default settings for IH_CNTL (disabled at first) */
5053 ih_cntl
= MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5054 /* RPTR_REARM only works if msi's are enabled */
5055 if (rdev
->msi_enabled
)
5056 ih_cntl
|= RPTR_REARM
;
5057 WREG32(IH_CNTL
, ih_cntl
);
5059 /* force the active interrupt state to all disabled */
5060 cik_disable_interrupt_state(rdev
);
5062 pci_set_master(rdev
->pdev
);
5065 cik_enable_interrupts(rdev
);
5071 * cik_irq_set - enable/disable interrupt sources
5073 * @rdev: radeon_device pointer
5075 * Enable interrupt sources on the GPU (vblanks, hpd,
5077 * Returns 0 for success, errors for failure.
5079 int cik_irq_set(struct radeon_device
*rdev
)
5081 u32 cp_int_cntl
= CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
|
5082 PRIV_INSTR_INT_ENABLE
| PRIV_REG_INT_ENABLE
;
5083 u32 cp_m1p0
, cp_m1p1
, cp_m1p2
, cp_m1p3
;
5084 u32 cp_m2p0
, cp_m2p1
, cp_m2p2
, cp_m2p3
;
5085 u32 crtc1
= 0, crtc2
= 0, crtc3
= 0, crtc4
= 0, crtc5
= 0, crtc6
= 0;
5086 u32 hpd1
, hpd2
, hpd3
, hpd4
, hpd5
, hpd6
;
5087 u32 grbm_int_cntl
= 0;
5088 u32 dma_cntl
, dma_cntl1
;
5090 if (!rdev
->irq
.installed
) {
5091 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5094 /* don't enable anything if the ih is disabled */
5095 if (!rdev
->ih
.enabled
) {
5096 cik_disable_interrupts(rdev
);
5097 /* force the active interrupt state to all disabled */
5098 cik_disable_interrupt_state(rdev
);
5102 hpd1
= RREG32(DC_HPD1_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
5103 hpd2
= RREG32(DC_HPD2_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
5104 hpd3
= RREG32(DC_HPD3_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
5105 hpd4
= RREG32(DC_HPD4_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
5106 hpd5
= RREG32(DC_HPD5_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
5107 hpd6
= RREG32(DC_HPD6_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
5109 dma_cntl
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
5110 dma_cntl1
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
5112 cp_m1p0
= RREG32(CP_ME1_PIPE0_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5113 cp_m1p1
= RREG32(CP_ME1_PIPE1_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5114 cp_m1p2
= RREG32(CP_ME1_PIPE2_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5115 cp_m1p3
= RREG32(CP_ME1_PIPE3_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5116 cp_m2p0
= RREG32(CP_ME2_PIPE0_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5117 cp_m2p1
= RREG32(CP_ME2_PIPE1_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5118 cp_m2p2
= RREG32(CP_ME2_PIPE2_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5119 cp_m2p3
= RREG32(CP_ME2_PIPE3_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
5121 /* enable CP interrupts on all rings */
5122 if (atomic_read(&rdev
->irq
.ring_int
[RADEON_RING_TYPE_GFX_INDEX
])) {
5123 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5124 cp_int_cntl
|= TIME_STAMP_INT_ENABLE
;
5126 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP1_INDEX
])) {
5127 struct radeon_ring
*ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
5128 DRM_DEBUG("si_irq_set: sw int cp1\n");
5129 if (ring
->me
== 1) {
5130 switch (ring
->pipe
) {
5132 cp_m1p0
|= TIME_STAMP_INT_ENABLE
;
5135 cp_m1p1
|= TIME_STAMP_INT_ENABLE
;
5138 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
5141 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
5144 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring
->pipe
);
5147 } else if (ring
->me
== 2) {
5148 switch (ring
->pipe
) {
5150 cp_m2p0
|= TIME_STAMP_INT_ENABLE
;
5153 cp_m2p1
|= TIME_STAMP_INT_ENABLE
;
5156 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
5159 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
5162 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring
->pipe
);
5166 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring
->me
);
5169 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP2_INDEX
])) {
5170 struct radeon_ring
*ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
5171 DRM_DEBUG("si_irq_set: sw int cp2\n");
5172 if (ring
->me
== 1) {
5173 switch (ring
->pipe
) {
5175 cp_m1p0
|= TIME_STAMP_INT_ENABLE
;
5178 cp_m1p1
|= TIME_STAMP_INT_ENABLE
;
5181 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
5184 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
5187 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring
->pipe
);
5190 } else if (ring
->me
== 2) {
5191 switch (ring
->pipe
) {
5193 cp_m2p0
|= TIME_STAMP_INT_ENABLE
;
5196 cp_m2p1
|= TIME_STAMP_INT_ENABLE
;
5199 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
5202 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
5205 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring
->pipe
);
5209 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring
->me
);
5213 if (atomic_read(&rdev
->irq
.ring_int
[R600_RING_TYPE_DMA_INDEX
])) {
5214 DRM_DEBUG("cik_irq_set: sw int dma\n");
5215 dma_cntl
|= TRAP_ENABLE
;
5218 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_DMA1_INDEX
])) {
5219 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5220 dma_cntl1
|= TRAP_ENABLE
;
5223 if (rdev
->irq
.crtc_vblank_int
[0] ||
5224 atomic_read(&rdev
->irq
.pflip
[0])) {
5225 DRM_DEBUG("cik_irq_set: vblank 0\n");
5226 crtc1
|= VBLANK_INTERRUPT_MASK
;
5228 if (rdev
->irq
.crtc_vblank_int
[1] ||
5229 atomic_read(&rdev
->irq
.pflip
[1])) {
5230 DRM_DEBUG("cik_irq_set: vblank 1\n");
5231 crtc2
|= VBLANK_INTERRUPT_MASK
;
5233 if (rdev
->irq
.crtc_vblank_int
[2] ||
5234 atomic_read(&rdev
->irq
.pflip
[2])) {
5235 DRM_DEBUG("cik_irq_set: vblank 2\n");
5236 crtc3
|= VBLANK_INTERRUPT_MASK
;
5238 if (rdev
->irq
.crtc_vblank_int
[3] ||
5239 atomic_read(&rdev
->irq
.pflip
[3])) {
5240 DRM_DEBUG("cik_irq_set: vblank 3\n");
5241 crtc4
|= VBLANK_INTERRUPT_MASK
;
5243 if (rdev
->irq
.crtc_vblank_int
[4] ||
5244 atomic_read(&rdev
->irq
.pflip
[4])) {
5245 DRM_DEBUG("cik_irq_set: vblank 4\n");
5246 crtc5
|= VBLANK_INTERRUPT_MASK
;
5248 if (rdev
->irq
.crtc_vblank_int
[5] ||
5249 atomic_read(&rdev
->irq
.pflip
[5])) {
5250 DRM_DEBUG("cik_irq_set: vblank 5\n");
5251 crtc6
|= VBLANK_INTERRUPT_MASK
;
5253 if (rdev
->irq
.hpd
[0]) {
5254 DRM_DEBUG("cik_irq_set: hpd 1\n");
5255 hpd1
|= DC_HPDx_INT_EN
;
5257 if (rdev
->irq
.hpd
[1]) {
5258 DRM_DEBUG("cik_irq_set: hpd 2\n");
5259 hpd2
|= DC_HPDx_INT_EN
;
5261 if (rdev
->irq
.hpd
[2]) {
5262 DRM_DEBUG("cik_irq_set: hpd 3\n");
5263 hpd3
|= DC_HPDx_INT_EN
;
5265 if (rdev
->irq
.hpd
[3]) {
5266 DRM_DEBUG("cik_irq_set: hpd 4\n");
5267 hpd4
|= DC_HPDx_INT_EN
;
5269 if (rdev
->irq
.hpd
[4]) {
5270 DRM_DEBUG("cik_irq_set: hpd 5\n");
5271 hpd5
|= DC_HPDx_INT_EN
;
5273 if (rdev
->irq
.hpd
[5]) {
5274 DRM_DEBUG("cik_irq_set: hpd 6\n");
5275 hpd6
|= DC_HPDx_INT_EN
;
5278 WREG32(CP_INT_CNTL_RING0
, cp_int_cntl
);
5280 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, dma_cntl
);
5281 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, dma_cntl1
);
5283 WREG32(CP_ME1_PIPE0_INT_CNTL
, cp_m1p0
);
5284 WREG32(CP_ME1_PIPE1_INT_CNTL
, cp_m1p1
);
5285 WREG32(CP_ME1_PIPE2_INT_CNTL
, cp_m1p2
);
5286 WREG32(CP_ME1_PIPE3_INT_CNTL
, cp_m1p3
);
5287 WREG32(CP_ME2_PIPE0_INT_CNTL
, cp_m2p0
);
5288 WREG32(CP_ME2_PIPE1_INT_CNTL
, cp_m2p1
);
5289 WREG32(CP_ME2_PIPE2_INT_CNTL
, cp_m2p2
);
5290 WREG32(CP_ME2_PIPE3_INT_CNTL
, cp_m2p3
);
5292 WREG32(GRBM_INT_CNTL
, grbm_int_cntl
);
5294 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, crtc1
);
5295 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, crtc2
);
5296 if (rdev
->num_crtc
>= 4) {
5297 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, crtc3
);
5298 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, crtc4
);
5300 if (rdev
->num_crtc
>= 6) {
5301 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, crtc5
);
5302 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, crtc6
);
5305 WREG32(DC_HPD1_INT_CONTROL
, hpd1
);
5306 WREG32(DC_HPD2_INT_CONTROL
, hpd2
);
5307 WREG32(DC_HPD3_INT_CONTROL
, hpd3
);
5308 WREG32(DC_HPD4_INT_CONTROL
, hpd4
);
5309 WREG32(DC_HPD5_INT_CONTROL
, hpd5
);
5310 WREG32(DC_HPD6_INT_CONTROL
, hpd6
);
5316 * cik_irq_ack - ack interrupt sources
5318 * @rdev: radeon_device pointer
5320 * Ack interrupt sources on the GPU (vblanks, hpd,
5321 * etc.) (CIK). Certain interrupts sources are sw
5322 * generated and do not require an explicit ack.
5324 static inline void cik_irq_ack(struct radeon_device
*rdev
)
5328 rdev
->irq
.stat_regs
.cik
.disp_int
= RREG32(DISP_INTERRUPT_STATUS
);
5329 rdev
->irq
.stat_regs
.cik
.disp_int_cont
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE
);
5330 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE2
);
5331 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE3
);
5332 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE4
);
5333 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE5
);
5334 rdev
->irq
.stat_regs
.cik
.disp_int_cont6
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE6
);
5336 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
)
5337 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VBLANK_ACK
);
5338 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
)
5339 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VLINE_ACK
);
5340 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
)
5341 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VBLANK_ACK
);
5342 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
)
5343 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VLINE_ACK
);
5345 if (rdev
->num_crtc
>= 4) {
5346 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
)
5347 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VBLANK_ACK
);
5348 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
)
5349 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VLINE_ACK
);
5350 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
)
5351 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VBLANK_ACK
);
5352 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
)
5353 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VLINE_ACK
);
5356 if (rdev
->num_crtc
>= 6) {
5357 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
)
5358 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VBLANK_ACK
);
5359 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
)
5360 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VLINE_ACK
);
5361 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
)
5362 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VBLANK_ACK
);
5363 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
)
5364 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VLINE_ACK
);
5367 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
5368 tmp
= RREG32(DC_HPD1_INT_CONTROL
);
5369 tmp
|= DC_HPDx_INT_ACK
;
5370 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
5372 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
5373 tmp
= RREG32(DC_HPD2_INT_CONTROL
);
5374 tmp
|= DC_HPDx_INT_ACK
;
5375 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
5377 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
5378 tmp
= RREG32(DC_HPD3_INT_CONTROL
);
5379 tmp
|= DC_HPDx_INT_ACK
;
5380 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
5382 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
5383 tmp
= RREG32(DC_HPD4_INT_CONTROL
);
5384 tmp
|= DC_HPDx_INT_ACK
;
5385 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
5387 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
5388 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
5389 tmp
|= DC_HPDx_INT_ACK
;
5390 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
5392 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
5393 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
5394 tmp
|= DC_HPDx_INT_ACK
;
5395 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
5400 * cik_irq_disable - disable interrupts
5402 * @rdev: radeon_device pointer
5404 * Disable interrupts on the hw (CIK).
5406 static void cik_irq_disable(struct radeon_device
*rdev
)
5408 cik_disable_interrupts(rdev
);
5409 /* Wait and acknowledge irq */
5412 cik_disable_interrupt_state(rdev
);
5416 * cik_irq_disable - disable interrupts for suspend
5418 * @rdev: radeon_device pointer
5420 * Disable interrupts and stop the RLC (CIK).
5423 static void cik_irq_suspend(struct radeon_device
*rdev
)
5425 cik_irq_disable(rdev
);
5430 * cik_irq_fini - tear down interrupt support
5432 * @rdev: radeon_device pointer
5434 * Disable interrupts on the hw and free the IH ring
5436 * Used for driver unload.
5438 static void cik_irq_fini(struct radeon_device
*rdev
)
5440 cik_irq_suspend(rdev
);
5441 r600_ih_ring_fini(rdev
);
5445 * cik_get_ih_wptr - get the IH ring buffer wptr
5447 * @rdev: radeon_device pointer
5449 * Get the IH ring buffer wptr from either the register
5450 * or the writeback memory buffer (CIK). Also check for
5451 * ring buffer overflow and deal with it.
5452 * Used by cik_irq_process().
5453 * Returns the value of the wptr.
5455 static inline u32
cik_get_ih_wptr(struct radeon_device
*rdev
)
5459 if (rdev
->wb
.enabled
)
5460 wptr
= le32_to_cpu(rdev
->wb
.wb
[R600_WB_IH_WPTR_OFFSET
/4]);
5462 wptr
= RREG32(IH_RB_WPTR
);
5464 if (wptr
& RB_OVERFLOW
) {
5465 /* When a ring buffer overflow happen start parsing interrupt
5466 * from the last not overwritten vector (wptr + 16). Hopefully
5467 * this should allow us to catchup.
5469 dev_warn(rdev
->dev
, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5470 wptr
, rdev
->ih
.rptr
, (wptr
+ 16) + rdev
->ih
.ptr_mask
);
5471 rdev
->ih
.rptr
= (wptr
+ 16) & rdev
->ih
.ptr_mask
;
5472 tmp
= RREG32(IH_RB_CNTL
);
5473 tmp
|= IH_WPTR_OVERFLOW_CLEAR
;
5474 WREG32(IH_RB_CNTL
, tmp
);
5476 return (wptr
& rdev
->ih
.ptr_mask
);
5480 * Each IV ring entry is 128 bits:
5481 * [7:0] - interrupt source id
5483 * [59:32] - interrupt source data
5484 * [63:60] - reserved
5487 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5488 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5489 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5490 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5491 * PIPE_ID - ME0 0=3D
5492 * - ME1&2 compute dispatcher (4 pipes each)
5494 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5495 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5496 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5499 * [127:96] - reserved
5502 * cik_irq_process - interrupt handler
5504 * @rdev: radeon_device pointer
5506 * Interrupt hander (CIK). Walk the IH ring,
5507 * ack interrupts and schedule work to handle
5509 * Returns irq process return code.
5511 int cik_irq_process(struct radeon_device
*rdev
)
5513 struct radeon_ring
*cp1_ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
5514 struct radeon_ring
*cp2_ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
5517 u32 src_id
, src_data
, ring_id
;
5518 u8 me_id
, pipe_id
, queue_id
;
5520 bool queue_hotplug
= false;
5521 bool queue_reset
= false;
5522 u32 addr
, status
, mc_client
;
5524 if (!rdev
->ih
.enabled
|| rdev
->shutdown
)
5527 wptr
= cik_get_ih_wptr(rdev
);
5530 /* is somebody else already processing irqs? */
5531 if (atomic_xchg(&rdev
->ih
.lock
, 1))
5534 rptr
= rdev
->ih
.rptr
;
5535 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr
, wptr
);
5537 /* Order reading of wptr vs. reading of IH ring data */
5540 /* display interrupts */
5543 while (rptr
!= wptr
) {
5544 /* wptr/rptr are in bytes! */
5545 ring_index
= rptr
/ 4;
5546 src_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
]) & 0xff;
5547 src_data
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 1]) & 0xfffffff;
5548 ring_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 2]) & 0xff;
5551 case 1: /* D1 vblank/vline */
5553 case 0: /* D1 vblank */
5554 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
) {
5555 if (rdev
->irq
.crtc_vblank_int
[0]) {
5556 drm_handle_vblank(rdev
->ddev
, 0);
5557 rdev
->pm
.vblank_sync
= true;
5558 wake_up(&rdev
->irq
.vblank_queue
);
5560 if (atomic_read(&rdev
->irq
.pflip
[0]))
5561 radeon_crtc_handle_flip(rdev
, 0);
5562 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VBLANK_INTERRUPT
;
5563 DRM_DEBUG("IH: D1 vblank\n");
5566 case 1: /* D1 vline */
5567 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
) {
5568 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VLINE_INTERRUPT
;
5569 DRM_DEBUG("IH: D1 vline\n");
5573 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5577 case 2: /* D2 vblank/vline */
5579 case 0: /* D2 vblank */
5580 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
) {
5581 if (rdev
->irq
.crtc_vblank_int
[1]) {
5582 drm_handle_vblank(rdev
->ddev
, 1);
5583 rdev
->pm
.vblank_sync
= true;
5584 wake_up(&rdev
->irq
.vblank_queue
);
5586 if (atomic_read(&rdev
->irq
.pflip
[1]))
5587 radeon_crtc_handle_flip(rdev
, 1);
5588 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VBLANK_INTERRUPT
;
5589 DRM_DEBUG("IH: D2 vblank\n");
5592 case 1: /* D2 vline */
5593 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
) {
5594 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VLINE_INTERRUPT
;
5595 DRM_DEBUG("IH: D2 vline\n");
5599 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5603 case 3: /* D3 vblank/vline */
5605 case 0: /* D3 vblank */
5606 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
) {
5607 if (rdev
->irq
.crtc_vblank_int
[2]) {
5608 drm_handle_vblank(rdev
->ddev
, 2);
5609 rdev
->pm
.vblank_sync
= true;
5610 wake_up(&rdev
->irq
.vblank_queue
);
5612 if (atomic_read(&rdev
->irq
.pflip
[2]))
5613 radeon_crtc_handle_flip(rdev
, 2);
5614 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VBLANK_INTERRUPT
;
5615 DRM_DEBUG("IH: D3 vblank\n");
5618 case 1: /* D3 vline */
5619 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
) {
5620 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VLINE_INTERRUPT
;
5621 DRM_DEBUG("IH: D3 vline\n");
5625 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5629 case 4: /* D4 vblank/vline */
5631 case 0: /* D4 vblank */
5632 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
) {
5633 if (rdev
->irq
.crtc_vblank_int
[3]) {
5634 drm_handle_vblank(rdev
->ddev
, 3);
5635 rdev
->pm
.vblank_sync
= true;
5636 wake_up(&rdev
->irq
.vblank_queue
);
5638 if (atomic_read(&rdev
->irq
.pflip
[3]))
5639 radeon_crtc_handle_flip(rdev
, 3);
5640 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VBLANK_INTERRUPT
;
5641 DRM_DEBUG("IH: D4 vblank\n");
5644 case 1: /* D4 vline */
5645 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
) {
5646 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VLINE_INTERRUPT
;
5647 DRM_DEBUG("IH: D4 vline\n");
5651 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5655 case 5: /* D5 vblank/vline */
5657 case 0: /* D5 vblank */
5658 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
) {
5659 if (rdev
->irq
.crtc_vblank_int
[4]) {
5660 drm_handle_vblank(rdev
->ddev
, 4);
5661 rdev
->pm
.vblank_sync
= true;
5662 wake_up(&rdev
->irq
.vblank_queue
);
5664 if (atomic_read(&rdev
->irq
.pflip
[4]))
5665 radeon_crtc_handle_flip(rdev
, 4);
5666 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VBLANK_INTERRUPT
;
5667 DRM_DEBUG("IH: D5 vblank\n");
5670 case 1: /* D5 vline */
5671 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
) {
5672 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VLINE_INTERRUPT
;
5673 DRM_DEBUG("IH: D5 vline\n");
5677 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5681 case 6: /* D6 vblank/vline */
5683 case 0: /* D6 vblank */
5684 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
) {
5685 if (rdev
->irq
.crtc_vblank_int
[5]) {
5686 drm_handle_vblank(rdev
->ddev
, 5);
5687 rdev
->pm
.vblank_sync
= true;
5688 wake_up(&rdev
->irq
.vblank_queue
);
5690 if (atomic_read(&rdev
->irq
.pflip
[5]))
5691 radeon_crtc_handle_flip(rdev
, 5);
5692 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VBLANK_INTERRUPT
;
5693 DRM_DEBUG("IH: D6 vblank\n");
5696 case 1: /* D6 vline */
5697 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
) {
5698 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VLINE_INTERRUPT
;
5699 DRM_DEBUG("IH: D6 vline\n");
5703 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5707 case 42: /* HPD hotplug */
5710 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
5711 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~DC_HPD1_INTERRUPT
;
5712 queue_hotplug
= true;
5713 DRM_DEBUG("IH: HPD1\n");
5717 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
5718 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~DC_HPD2_INTERRUPT
;
5719 queue_hotplug
= true;
5720 DRM_DEBUG("IH: HPD2\n");
5724 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
5725 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~DC_HPD3_INTERRUPT
;
5726 queue_hotplug
= true;
5727 DRM_DEBUG("IH: HPD3\n");
5731 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
5732 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~DC_HPD4_INTERRUPT
;
5733 queue_hotplug
= true;
5734 DRM_DEBUG("IH: HPD4\n");
5738 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
5739 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~DC_HPD5_INTERRUPT
;
5740 queue_hotplug
= true;
5741 DRM_DEBUG("IH: HPD5\n");
5745 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
5746 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~DC_HPD6_INTERRUPT
;
5747 queue_hotplug
= true;
5748 DRM_DEBUG("IH: HPD6\n");
5752 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5758 addr
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
);
5759 status
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
);
5760 mc_client
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT
);
5761 dev_err(rdev
->dev
, "GPU fault detected: %d 0x%08x\n", src_id
, src_data
);
5762 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5764 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5766 cik_vm_decode_fault(rdev
, status
, addr
, mc_client
);
5767 /* reset addr and status */
5768 WREG32_P(VM_CONTEXT1_CNTL2
, 1, ~1);
5770 case 176: /* GFX RB CP_INT */
5771 case 177: /* GFX IB CP_INT */
5772 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
5774 case 181: /* CP EOP event */
5775 DRM_DEBUG("IH: CP EOP\n");
5776 /* XXX check the bitfield order! */
5777 me_id
= (ring_id
& 0x60) >> 5;
5778 pipe_id
= (ring_id
& 0x18) >> 3;
5779 queue_id
= (ring_id
& 0x7) >> 0;
5782 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
5786 if ((cp1_ring
->me
== me_id
) & (cp1_ring
->pipe
== pipe_id
))
5787 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
5788 if ((cp2_ring
->me
== me_id
) & (cp2_ring
->pipe
== pipe_id
))
5789 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
5793 case 184: /* CP Privileged reg access */
5794 DRM_ERROR("Illegal register access in command stream\n");
5795 /* XXX check the bitfield order! */
5796 me_id
= (ring_id
& 0x60) >> 5;
5797 pipe_id
= (ring_id
& 0x18) >> 3;
5798 queue_id
= (ring_id
& 0x7) >> 0;
5801 /* This results in a full GPU reset, but all we need to do is soft
5802 * reset the CP for gfx
5816 case 185: /* CP Privileged inst */
5817 DRM_ERROR("Illegal instruction in command stream\n");
5818 /* XXX check the bitfield order! */
5819 me_id
= (ring_id
& 0x60) >> 5;
5820 pipe_id
= (ring_id
& 0x18) >> 3;
5821 queue_id
= (ring_id
& 0x7) >> 0;
5824 /* This results in a full GPU reset, but all we need to do is soft
5825 * reset the CP for gfx
5839 case 224: /* SDMA trap event */
5840 /* XXX check the bitfield order! */
5841 me_id
= (ring_id
& 0x3) >> 0;
5842 queue_id
= (ring_id
& 0xc) >> 2;
5843 DRM_DEBUG("IH: SDMA trap\n");
5848 radeon_fence_process(rdev
, R600_RING_TYPE_DMA_INDEX
);
5861 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
5873 case 241: /* SDMA Privileged inst */
5874 case 247: /* SDMA Privileged inst */
5875 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5876 /* XXX check the bitfield order! */
5877 me_id
= (ring_id
& 0x3) >> 0;
5878 queue_id
= (ring_id
& 0xc) >> 2;
5912 case 233: /* GUI IDLE */
5913 DRM_DEBUG("IH: GUI idle\n");
5916 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5920 /* wptr/rptr are in bytes! */
5922 rptr
&= rdev
->ih
.ptr_mask
;
5925 schedule_work(&rdev
->hotplug_work
);
5927 schedule_work(&rdev
->reset_work
);
5928 rdev
->ih
.rptr
= rptr
;
5929 WREG32(IH_RB_RPTR
, rdev
->ih
.rptr
);
5930 atomic_set(&rdev
->ih
.lock
, 0);
5932 /* make sure wptr hasn't changed while processing */
5933 wptr
= cik_get_ih_wptr(rdev
);
5941 * startup/shutdown callbacks
5944 * cik_startup - program the asic to a functional state
5946 * @rdev: radeon_device pointer
5948 * Programs the asic to a functional state (CIK).
5949 * Called by cik_init() and cik_resume().
5950 * Returns 0 for success, error for failure.
5952 static int cik_startup(struct radeon_device
*rdev
)
5954 struct radeon_ring
*ring
;
5957 if (rdev
->flags
& RADEON_IS_IGP
) {
5958 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
5959 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
) {
5960 r
= cik_init_microcode(rdev
);
5962 DRM_ERROR("Failed to load firmware!\n");
5967 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
5968 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
||
5970 r
= cik_init_microcode(rdev
);
5972 DRM_ERROR("Failed to load firmware!\n");
5977 r
= ci_mc_load_microcode(rdev
);
5979 DRM_ERROR("Failed to load MC firmware!\n");
5984 r
= r600_vram_scratch_init(rdev
);
5988 cik_mc_program(rdev
);
5989 r
= cik_pcie_gart_enable(rdev
);
5994 /* allocate rlc buffers */
5995 r
= si_rlc_init(rdev
);
5997 DRM_ERROR("Failed to init rlc BOs!\n");
6001 /* allocate wb buffer */
6002 r
= radeon_wb_init(rdev
);
6006 /* allocate mec buffers */
6007 r
= cik_mec_init(rdev
);
6009 DRM_ERROR("Failed to init MEC BOs!\n");
6013 r
= radeon_fence_driver_start_ring(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
6015 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
6019 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
6021 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
6025 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
6027 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
6031 r
= radeon_fence_driver_start_ring(rdev
, R600_RING_TYPE_DMA_INDEX
);
6033 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
6037 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
6039 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
6043 r
= cik_uvd_resume(rdev
);
6045 r
= radeon_fence_driver_start_ring(rdev
,
6046 R600_RING_TYPE_UVD_INDEX
);
6048 dev_err(rdev
->dev
, "UVD fences init error (%d).\n", r
);
6051 rdev
->ring
[R600_RING_TYPE_UVD_INDEX
].ring_size
= 0;
6054 if (!rdev
->irq
.installed
) {
6055 r
= radeon_irq_kms_init(rdev
);
6060 r
= cik_irq_init(rdev
);
6062 DRM_ERROR("radeon: IH init failed (%d).\n", r
);
6063 radeon_irq_kms_fini(rdev
);
6068 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
6069 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP_RPTR_OFFSET
,
6070 CP_RB0_RPTR
, CP_RB0_WPTR
,
6071 0, 0xfffff, RADEON_CP_PACKET2
);
6075 /* set up the compute queues */
6076 /* type-2 packets are deprecated on MEC, use type-3 instead */
6077 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
6078 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP1_RPTR_OFFSET
,
6079 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
6080 0, 0xfffff, PACKET3(PACKET3_NOP
, 0x3FFF));
6083 ring
->me
= 1; /* first MEC */
6084 ring
->pipe
= 0; /* first pipe */
6085 ring
->queue
= 0; /* first queue */
6086 ring
->wptr_offs
= CIK_WB_CP1_WPTR_OFFSET
;
6088 /* type-2 packets are deprecated on MEC, use type-3 instead */
6089 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
6090 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP2_RPTR_OFFSET
,
6091 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
6092 0, 0xffffffff, PACKET3(PACKET3_NOP
, 0x3FFF));
6095 /* dGPU only have 1 MEC */
6096 ring
->me
= 1; /* first MEC */
6097 ring
->pipe
= 0; /* first pipe */
6098 ring
->queue
= 1; /* second queue */
6099 ring
->wptr_offs
= CIK_WB_CP2_WPTR_OFFSET
;
6101 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
6102 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, R600_WB_DMA_RPTR_OFFSET
,
6103 SDMA0_GFX_RB_RPTR
+ SDMA0_REGISTER_OFFSET
,
6104 SDMA0_GFX_RB_WPTR
+ SDMA0_REGISTER_OFFSET
,
6105 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
6109 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
6110 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, CAYMAN_WB_DMA1_RPTR_OFFSET
,
6111 SDMA0_GFX_RB_RPTR
+ SDMA1_REGISTER_OFFSET
,
6112 SDMA0_GFX_RB_WPTR
+ SDMA1_REGISTER_OFFSET
,
6113 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
6117 r
= cik_cp_resume(rdev
);
6121 r
= cik_sdma_resume(rdev
);
6125 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
6126 if (ring
->ring_size
) {
6127 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
,
6128 R600_WB_UVD_RPTR_OFFSET
,
6129 UVD_RBC_RB_RPTR
, UVD_RBC_RB_WPTR
,
6130 0, 0xfffff, RADEON_CP_PACKET2
);
6132 r
= r600_uvd_init(rdev
);
6134 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r
);
6137 r
= radeon_ib_pool_init(rdev
);
6139 dev_err(rdev
->dev
, "IB initialization failed (%d).\n", r
);
6143 r
= radeon_vm_manager_init(rdev
);
6145 dev_err(rdev
->dev
, "vm manager initialization failed (%d).\n", r
);
6153 * cik_resume - resume the asic to a functional state
6155 * @rdev: radeon_device pointer
6157 * Programs the asic to a functional state (CIK).
6159 * Returns 0 for success, error for failure.
6161 int cik_resume(struct radeon_device
*rdev
)
6166 atom_asic_init(rdev
->mode_info
.atom_context
);
6168 /* init golden registers */
6169 cik_init_golden_registers(rdev
);
6171 rdev
->accel_working
= true;
6172 r
= cik_startup(rdev
);
6174 DRM_ERROR("cik startup failed on resume\n");
6175 rdev
->accel_working
= false;
6184 * cik_suspend - suspend the asic
6186 * @rdev: radeon_device pointer
6188 * Bring the chip into a state suitable for suspend (CIK).
6189 * Called at suspend.
6190 * Returns 0 for success.
6192 int cik_suspend(struct radeon_device
*rdev
)
6194 radeon_vm_manager_fini(rdev
);
6195 cik_cp_enable(rdev
, false);
6196 cik_sdma_enable(rdev
, false);
6197 r600_uvd_rbc_stop(rdev
);
6198 radeon_uvd_suspend(rdev
);
6199 cik_irq_suspend(rdev
);
6200 radeon_wb_disable(rdev
);
6201 cik_pcie_gart_disable(rdev
);
6205 /* Plan is to move initialization in that function and use
6206 * helper function so that radeon_device_init pretty much
6207 * do nothing more than calling asic specific function. This
6208 * should also allow to remove a bunch of callback function
6212 * cik_init - asic specific driver and hw init
6214 * @rdev: radeon_device pointer
6216 * Setup asic specific driver variables and program the hw
6217 * to a functional state (CIK).
6218 * Called at driver startup.
6219 * Returns 0 for success, errors for failure.
6221 int cik_init(struct radeon_device
*rdev
)
6223 struct radeon_ring
*ring
;
6227 if (!radeon_get_bios(rdev
)) {
6228 if (ASIC_IS_AVIVO(rdev
))
6231 /* Must be an ATOMBIOS */
6232 if (!rdev
->is_atom_bios
) {
6233 dev_err(rdev
->dev
, "Expecting atombios for cayman GPU\n");
6236 r
= radeon_atombios_init(rdev
);
6240 /* Post card if necessary */
6241 if (!radeon_card_posted(rdev
)) {
6243 dev_err(rdev
->dev
, "Card not posted and no BIOS - ignoring\n");
6246 DRM_INFO("GPU not posted. posting now...\n");
6247 atom_asic_init(rdev
->mode_info
.atom_context
);
6249 /* init golden registers */
6250 cik_init_golden_registers(rdev
);
6251 /* Initialize scratch registers */
6252 cik_scratch_init(rdev
);
6253 /* Initialize surface registers */
6254 radeon_surface_init(rdev
);
6255 /* Initialize clocks */
6256 radeon_get_clock_info(rdev
->ddev
);
6259 r
= radeon_fence_driver_init(rdev
);
6263 /* initialize memory controller */
6264 r
= cik_mc_init(rdev
);
6267 /* Memory manager */
6268 r
= radeon_bo_init(rdev
);
6272 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
6273 ring
->ring_obj
= NULL
;
6274 r600_ring_init(rdev
, ring
, 1024 * 1024);
6276 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
6277 ring
->ring_obj
= NULL
;
6278 r600_ring_init(rdev
, ring
, 1024 * 1024);
6279 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
6283 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
6284 ring
->ring_obj
= NULL
;
6285 r600_ring_init(rdev
, ring
, 1024 * 1024);
6286 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
6290 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
6291 ring
->ring_obj
= NULL
;
6292 r600_ring_init(rdev
, ring
, 256 * 1024);
6294 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
6295 ring
->ring_obj
= NULL
;
6296 r600_ring_init(rdev
, ring
, 256 * 1024);
6298 r
= radeon_uvd_init(rdev
);
6300 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
6301 ring
->ring_obj
= NULL
;
6302 r600_ring_init(rdev
, ring
, 4096);
6305 rdev
->ih
.ring_obj
= NULL
;
6306 r600_ih_ring_init(rdev
, 64 * 1024);
6308 r
= r600_pcie_gart_init(rdev
);
6312 rdev
->accel_working
= true;
6313 r
= cik_startup(rdev
);
6315 dev_err(rdev
->dev
, "disabling GPU acceleration\n");
6317 cik_sdma_fini(rdev
);
6321 radeon_wb_fini(rdev
);
6322 radeon_ib_pool_fini(rdev
);
6323 radeon_vm_manager_fini(rdev
);
6324 radeon_irq_kms_fini(rdev
);
6325 cik_pcie_gart_fini(rdev
);
6326 rdev
->accel_working
= false;
6329 /* Don't start up if the MC ucode is missing.
6330 * The default clocks and voltages before the MC ucode
6331 * is loaded are not suffient for advanced operations.
6333 if (!rdev
->mc_fw
&& !(rdev
->flags
& RADEON_IS_IGP
)) {
6334 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6342 * cik_fini - asic specific driver and hw fini
6344 * @rdev: radeon_device pointer
6346 * Tear down the asic specific driver variables and program the hw
6347 * to an idle state (CIK).
6348 * Called at driver unload.
6350 void cik_fini(struct radeon_device
*rdev
)
6353 cik_sdma_fini(rdev
);
6357 radeon_wb_fini(rdev
);
6358 radeon_vm_manager_fini(rdev
);
6359 radeon_ib_pool_fini(rdev
);
6360 radeon_irq_kms_fini(rdev
);
6361 radeon_uvd_fini(rdev
);
6362 cik_pcie_gart_fini(rdev
);
6363 r600_vram_scratch_fini(rdev
);
6364 radeon_gem_fini(rdev
);
6365 radeon_fence_driver_fini(rdev
);
6366 radeon_bo_fini(rdev
);
6367 radeon_atombios_fini(rdev
);
6372 /* display watermark setup */
6374 * dce8_line_buffer_adjust - Set up the line buffer
6376 * @rdev: radeon_device pointer
6377 * @radeon_crtc: the selected display controller
6378 * @mode: the current display mode on the selected display
6381 * Setup up the line buffer allocation for
6382 * the selected display controller (CIK).
6383 * Returns the line buffer size in pixels.
6385 static u32
dce8_line_buffer_adjust(struct radeon_device
*rdev
,
6386 struct radeon_crtc
*radeon_crtc
,
6387 struct drm_display_mode
*mode
)
6393 * There are 6 line buffers, one for each display controllers.
6394 * There are 3 partitions per LB. Select the number of partitions
6395 * to enable based on the display width. For display widths larger
6396 * than 4096, you need use to use 2 display controllers and combine
6397 * them using the stereo blender.
6399 if (radeon_crtc
->base
.enabled
&& mode
) {
6400 if (mode
->crtc_hdisplay
< 1920)
6402 else if (mode
->crtc_hdisplay
< 2560)
6404 else if (mode
->crtc_hdisplay
< 4096)
6407 DRM_DEBUG_KMS("Mode too big for LB!\n");
6413 WREG32(LB_MEMORY_CTRL
+ radeon_crtc
->crtc_offset
,
6414 LB_MEMORY_CONFIG(tmp
) | LB_MEMORY_SIZE(0x6B0));
6416 if (radeon_crtc
->base
.enabled
&& mode
) {
6428 /* controller not enabled, so no lb used */
6433 * cik_get_number_of_dram_channels - get the number of dram channels
6435 * @rdev: radeon_device pointer
6437 * Look up the number of video ram channels (CIK).
6438 * Used for display watermark bandwidth calculations
6439 * Returns the number of dram channels
6441 static u32
cik_get_number_of_dram_channels(struct radeon_device
*rdev
)
6443 u32 tmp
= RREG32(MC_SHARED_CHMAP
);
6445 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
6468 struct dce8_wm_params
{
6469 u32 dram_channels
; /* number of dram channels */
6470 u32 yclk
; /* bandwidth per dram data pin in kHz */
6471 u32 sclk
; /* engine clock in kHz */
6472 u32 disp_clk
; /* display clock in kHz */
6473 u32 src_width
; /* viewport width */
6474 u32 active_time
; /* active display time in ns */
6475 u32 blank_time
; /* blank time in ns */
6476 bool interlaced
; /* mode is interlaced */
6477 fixed20_12 vsc
; /* vertical scale ratio */
6478 u32 num_heads
; /* number of active crtcs */
6479 u32 bytes_per_pixel
; /* bytes per pixel display + overlay */
6480 u32 lb_size
; /* line buffer allocated to pipe */
6481 u32 vtaps
; /* vertical scaler taps */
6485 * dce8_dram_bandwidth - get the dram bandwidth
6487 * @wm: watermark calculation data
6489 * Calculate the raw dram bandwidth (CIK).
6490 * Used for display watermark bandwidth calculations
6491 * Returns the dram bandwidth in MBytes/s
6493 static u32
dce8_dram_bandwidth(struct dce8_wm_params
*wm
)
6495 /* Calculate raw DRAM Bandwidth */
6496 fixed20_12 dram_efficiency
; /* 0.7 */
6497 fixed20_12 yclk
, dram_channels
, bandwidth
;
6500 a
.full
= dfixed_const(1000);
6501 yclk
.full
= dfixed_const(wm
->yclk
);
6502 yclk
.full
= dfixed_div(yclk
, a
);
6503 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
6504 a
.full
= dfixed_const(10);
6505 dram_efficiency
.full
= dfixed_const(7);
6506 dram_efficiency
.full
= dfixed_div(dram_efficiency
, a
);
6507 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
6508 bandwidth
.full
= dfixed_mul(bandwidth
, dram_efficiency
);
6510 return dfixed_trunc(bandwidth
);
6514 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6516 * @wm: watermark calculation data
6518 * Calculate the dram bandwidth used for display (CIK).
6519 * Used for display watermark bandwidth calculations
6520 * Returns the dram bandwidth for display in MBytes/s
6522 static u32
dce8_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
6524 /* Calculate DRAM Bandwidth and the part allocated to display. */
6525 fixed20_12 disp_dram_allocation
; /* 0.3 to 0.7 */
6526 fixed20_12 yclk
, dram_channels
, bandwidth
;
6529 a
.full
= dfixed_const(1000);
6530 yclk
.full
= dfixed_const(wm
->yclk
);
6531 yclk
.full
= dfixed_div(yclk
, a
);
6532 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
6533 a
.full
= dfixed_const(10);
6534 disp_dram_allocation
.full
= dfixed_const(3); /* XXX worse case value 0.3 */
6535 disp_dram_allocation
.full
= dfixed_div(disp_dram_allocation
, a
);
6536 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
6537 bandwidth
.full
= dfixed_mul(bandwidth
, disp_dram_allocation
);
6539 return dfixed_trunc(bandwidth
);
6543 * dce8_data_return_bandwidth - get the data return bandwidth
6545 * @wm: watermark calculation data
6547 * Calculate the data return bandwidth used for display (CIK).
6548 * Used for display watermark bandwidth calculations
6549 * Returns the data return bandwidth in MBytes/s
6551 static u32
dce8_data_return_bandwidth(struct dce8_wm_params
*wm
)
6553 /* Calculate the display Data return Bandwidth */
6554 fixed20_12 return_efficiency
; /* 0.8 */
6555 fixed20_12 sclk
, bandwidth
;
6558 a
.full
= dfixed_const(1000);
6559 sclk
.full
= dfixed_const(wm
->sclk
);
6560 sclk
.full
= dfixed_div(sclk
, a
);
6561 a
.full
= dfixed_const(10);
6562 return_efficiency
.full
= dfixed_const(8);
6563 return_efficiency
.full
= dfixed_div(return_efficiency
, a
);
6564 a
.full
= dfixed_const(32);
6565 bandwidth
.full
= dfixed_mul(a
, sclk
);
6566 bandwidth
.full
= dfixed_mul(bandwidth
, return_efficiency
);
6568 return dfixed_trunc(bandwidth
);
6572 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6574 * @wm: watermark calculation data
6576 * Calculate the dmif bandwidth used for display (CIK).
6577 * Used for display watermark bandwidth calculations
6578 * Returns the dmif bandwidth in MBytes/s
6580 static u32
dce8_dmif_request_bandwidth(struct dce8_wm_params
*wm
)
6582 /* Calculate the DMIF Request Bandwidth */
6583 fixed20_12 disp_clk_request_efficiency
; /* 0.8 */
6584 fixed20_12 disp_clk
, bandwidth
;
6587 a
.full
= dfixed_const(1000);
6588 disp_clk
.full
= dfixed_const(wm
->disp_clk
);
6589 disp_clk
.full
= dfixed_div(disp_clk
, a
);
6590 a
.full
= dfixed_const(32);
6591 b
.full
= dfixed_mul(a
, disp_clk
);
6593 a
.full
= dfixed_const(10);
6594 disp_clk_request_efficiency
.full
= dfixed_const(8);
6595 disp_clk_request_efficiency
.full
= dfixed_div(disp_clk_request_efficiency
, a
);
6597 bandwidth
.full
= dfixed_mul(b
, disp_clk_request_efficiency
);
6599 return dfixed_trunc(bandwidth
);
6603 * dce8_available_bandwidth - get the min available bandwidth
6605 * @wm: watermark calculation data
6607 * Calculate the min available bandwidth used for display (CIK).
6608 * Used for display watermark bandwidth calculations
6609 * Returns the min available bandwidth in MBytes/s
6611 static u32
dce8_available_bandwidth(struct dce8_wm_params
*wm
)
6613 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6614 u32 dram_bandwidth
= dce8_dram_bandwidth(wm
);
6615 u32 data_return_bandwidth
= dce8_data_return_bandwidth(wm
);
6616 u32 dmif_req_bandwidth
= dce8_dmif_request_bandwidth(wm
);
6618 return min(dram_bandwidth
, min(data_return_bandwidth
, dmif_req_bandwidth
));
6622 * dce8_average_bandwidth - get the average available bandwidth
6624 * @wm: watermark calculation data
6626 * Calculate the average available bandwidth used for display (CIK).
6627 * Used for display watermark bandwidth calculations
6628 * Returns the average available bandwidth in MBytes/s
6630 static u32
dce8_average_bandwidth(struct dce8_wm_params
*wm
)
6632 /* Calculate the display mode Average Bandwidth
6633 * DisplayMode should contain the source and destination dimensions,
6637 fixed20_12 line_time
;
6638 fixed20_12 src_width
;
6639 fixed20_12 bandwidth
;
6642 a
.full
= dfixed_const(1000);
6643 line_time
.full
= dfixed_const(wm
->active_time
+ wm
->blank_time
);
6644 line_time
.full
= dfixed_div(line_time
, a
);
6645 bpp
.full
= dfixed_const(wm
->bytes_per_pixel
);
6646 src_width
.full
= dfixed_const(wm
->src_width
);
6647 bandwidth
.full
= dfixed_mul(src_width
, bpp
);
6648 bandwidth
.full
= dfixed_mul(bandwidth
, wm
->vsc
);
6649 bandwidth
.full
= dfixed_div(bandwidth
, line_time
);
6651 return dfixed_trunc(bandwidth
);
6655 * dce8_latency_watermark - get the latency watermark
6657 * @wm: watermark calculation data
6659 * Calculate the latency watermark (CIK).
6660 * Used for display watermark bandwidth calculations
6661 * Returns the latency watermark in ns
6663 static u32
dce8_latency_watermark(struct dce8_wm_params
*wm
)
6665 /* First calculate the latency in ns */
6666 u32 mc_latency
= 2000; /* 2000 ns. */
6667 u32 available_bandwidth
= dce8_available_bandwidth(wm
);
6668 u32 worst_chunk_return_time
= (512 * 8 * 1000) / available_bandwidth
;
6669 u32 cursor_line_pair_return_time
= (128 * 4 * 1000) / available_bandwidth
;
6670 u32 dc_latency
= 40000000 / wm
->disp_clk
; /* dc pipe latency */
6671 u32 other_heads_data_return_time
= ((wm
->num_heads
+ 1) * worst_chunk_return_time
) +
6672 (wm
->num_heads
* cursor_line_pair_return_time
);
6673 u32 latency
= mc_latency
+ other_heads_data_return_time
+ dc_latency
;
6674 u32 max_src_lines_per_dst_line
, lb_fill_bw
, line_fill_time
;
6675 u32 tmp
, dmif_size
= 12288;
6678 if (wm
->num_heads
== 0)
6681 a
.full
= dfixed_const(2);
6682 b
.full
= dfixed_const(1);
6683 if ((wm
->vsc
.full
> a
.full
) ||
6684 ((wm
->vsc
.full
> b
.full
) && (wm
->vtaps
>= 3)) ||
6686 ((wm
->vsc
.full
>= a
.full
) && wm
->interlaced
))
6687 max_src_lines_per_dst_line
= 4;
6689 max_src_lines_per_dst_line
= 2;
6691 a
.full
= dfixed_const(available_bandwidth
);
6692 b
.full
= dfixed_const(wm
->num_heads
);
6693 a
.full
= dfixed_div(a
, b
);
6695 b
.full
= dfixed_const(mc_latency
+ 512);
6696 c
.full
= dfixed_const(wm
->disp_clk
);
6697 b
.full
= dfixed_div(b
, c
);
6699 c
.full
= dfixed_const(dmif_size
);
6700 b
.full
= dfixed_div(c
, b
);
6702 tmp
= min(dfixed_trunc(a
), dfixed_trunc(b
));
6704 b
.full
= dfixed_const(1000);
6705 c
.full
= dfixed_const(wm
->disp_clk
);
6706 b
.full
= dfixed_div(c
, b
);
6707 c
.full
= dfixed_const(wm
->bytes_per_pixel
);
6708 b
.full
= dfixed_mul(b
, c
);
6710 lb_fill_bw
= min(tmp
, dfixed_trunc(b
));
6712 a
.full
= dfixed_const(max_src_lines_per_dst_line
* wm
->src_width
* wm
->bytes_per_pixel
);
6713 b
.full
= dfixed_const(1000);
6714 c
.full
= dfixed_const(lb_fill_bw
);
6715 b
.full
= dfixed_div(c
, b
);
6716 a
.full
= dfixed_div(a
, b
);
6717 line_fill_time
= dfixed_trunc(a
);
6719 if (line_fill_time
< wm
->active_time
)
6722 return latency
+ (line_fill_time
- wm
->active_time
);
6727 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6728 * average and available dram bandwidth
6730 * @wm: watermark calculation data
6732 * Check if the display average bandwidth fits in the display
6733 * dram bandwidth (CIK).
6734 * Used for display watermark bandwidth calculations
6735 * Returns true if the display fits, false if not.
6737 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
6739 if (dce8_average_bandwidth(wm
) <=
6740 (dce8_dram_bandwidth_for_display(wm
) / wm
->num_heads
))
6747 * dce8_average_bandwidth_vs_available_bandwidth - check
6748 * average and available bandwidth
6750 * @wm: watermark calculation data
6752 * Check if the display average bandwidth fits in the display
6753 * available bandwidth (CIK).
6754 * Used for display watermark bandwidth calculations
6755 * Returns true if the display fits, false if not.
6757 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params
*wm
)
6759 if (dce8_average_bandwidth(wm
) <=
6760 (dce8_available_bandwidth(wm
) / wm
->num_heads
))
6767 * dce8_check_latency_hiding - check latency hiding
6769 * @wm: watermark calculation data
6771 * Check latency hiding (CIK).
6772 * Used for display watermark bandwidth calculations
6773 * Returns true if the display fits, false if not.
6775 static bool dce8_check_latency_hiding(struct dce8_wm_params
*wm
)
6777 u32 lb_partitions
= wm
->lb_size
/ wm
->src_width
;
6778 u32 line_time
= wm
->active_time
+ wm
->blank_time
;
6779 u32 latency_tolerant_lines
;
6783 a
.full
= dfixed_const(1);
6784 if (wm
->vsc
.full
> a
.full
)
6785 latency_tolerant_lines
= 1;
6787 if (lb_partitions
<= (wm
->vtaps
+ 1))
6788 latency_tolerant_lines
= 1;
6790 latency_tolerant_lines
= 2;
6793 latency_hiding
= (latency_tolerant_lines
* line_time
+ wm
->blank_time
);
6795 if (dce8_latency_watermark(wm
) <= latency_hiding
)
6802 * dce8_program_watermarks - program display watermarks
6804 * @rdev: radeon_device pointer
6805 * @radeon_crtc: the selected display controller
6806 * @lb_size: line buffer size
6807 * @num_heads: number of display controllers in use
6809 * Calculate and program the display watermarks for the
6810 * selected display controller (CIK).
6812 static void dce8_program_watermarks(struct radeon_device
*rdev
,
6813 struct radeon_crtc
*radeon_crtc
,
6814 u32 lb_size
, u32 num_heads
)
6816 struct drm_display_mode
*mode
= &radeon_crtc
->base
.mode
;
6817 struct dce8_wm_params wm
;
6820 u32 latency_watermark_a
= 0, latency_watermark_b
= 0;
6823 if (radeon_crtc
->base
.enabled
&& num_heads
&& mode
) {
6824 pixel_period
= 1000000 / (u32
)mode
->clock
;
6825 line_time
= min((u32
)mode
->crtc_htotal
* pixel_period
, (u32
)65535);
6827 wm
.yclk
= rdev
->pm
.current_mclk
* 10;
6828 wm
.sclk
= rdev
->pm
.current_sclk
* 10;
6829 wm
.disp_clk
= mode
->clock
;
6830 wm
.src_width
= mode
->crtc_hdisplay
;
6831 wm
.active_time
= mode
->crtc_hdisplay
* pixel_period
;
6832 wm
.blank_time
= line_time
- wm
.active_time
;
6833 wm
.interlaced
= false;
6834 if (mode
->flags
& DRM_MODE_FLAG_INTERLACE
)
6835 wm
.interlaced
= true;
6836 wm
.vsc
= radeon_crtc
->vsc
;
6838 if (radeon_crtc
->rmx_type
!= RMX_OFF
)
6840 wm
.bytes_per_pixel
= 4; /* XXX: get this from fb config */
6841 wm
.lb_size
= lb_size
;
6842 wm
.dram_channels
= cik_get_number_of_dram_channels(rdev
);
6843 wm
.num_heads
= num_heads
;
6845 /* set for high clocks */
6846 latency_watermark_a
= min(dce8_latency_watermark(&wm
), (u32
)65535);
6847 /* set for low clocks */
6848 /* wm.yclk = low clk; wm.sclk = low clk */
6849 latency_watermark_b
= min(dce8_latency_watermark(&wm
), (u32
)65535);
6851 /* possibly force display priority to high */
6852 /* should really do this at mode validation time... */
6853 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm
) ||
6854 !dce8_average_bandwidth_vs_available_bandwidth(&wm
) ||
6855 !dce8_check_latency_hiding(&wm
) ||
6856 (rdev
->disp_priority
== 2)) {
6857 DRM_DEBUG_KMS("force priority to high\n");
6862 wm_mask
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
6864 tmp
&= ~LATENCY_WATERMARK_MASK(3);
6865 tmp
|= LATENCY_WATERMARK_MASK(1);
6866 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
6867 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
6868 (LATENCY_LOW_WATERMARK(latency_watermark_a
) |
6869 LATENCY_HIGH_WATERMARK(line_time
)));
6871 tmp
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
6872 tmp
&= ~LATENCY_WATERMARK_MASK(3);
6873 tmp
|= LATENCY_WATERMARK_MASK(2);
6874 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
6875 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
6876 (LATENCY_LOW_WATERMARK(latency_watermark_b
) |
6877 LATENCY_HIGH_WATERMARK(line_time
)));
6878 /* restore original selection */
6879 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, wm_mask
);
6883 * dce8_bandwidth_update - program display watermarks
6885 * @rdev: radeon_device pointer
6887 * Calculate and program the display watermarks and line
6888 * buffer allocation (CIK).
6890 void dce8_bandwidth_update(struct radeon_device
*rdev
)
6892 struct drm_display_mode
*mode
= NULL
;
6893 u32 num_heads
= 0, lb_size
;
6896 radeon_update_display_priority(rdev
);
6898 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
6899 if (rdev
->mode_info
.crtcs
[i
]->base
.enabled
)
6902 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
6903 mode
= &rdev
->mode_info
.crtcs
[i
]->base
.mode
;
6904 lb_size
= dce8_line_buffer_adjust(rdev
, rdev
->mode_info
.crtcs
[i
], mode
);
6905 dce8_program_watermarks(rdev
, rdev
->mode_info
.crtcs
[i
], lb_size
, num_heads
);
6910 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6912 * @rdev: radeon_device pointer
6914 * Fetches a GPU clock counter snapshot (SI).
6915 * Returns the 64 bit clock counter snapshot.
6917 uint64_t cik_get_gpu_clock_counter(struct radeon_device
*rdev
)
6921 mutex_lock(&rdev
->gpu_clock_mutex
);
6922 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
6923 clock
= (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB
) |
6924 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
6925 mutex_unlock(&rdev
->gpu_clock_mutex
);
6929 static int cik_set_uvd_clock(struct radeon_device
*rdev
, u32 clock
,
6930 u32 cntl_reg
, u32 status_reg
)
6933 struct atom_clock_dividers dividers
;
6936 r
= radeon_atom_get_clock_dividers(rdev
, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK
,
6937 clock
, false, ÷rs
);
6941 tmp
= RREG32_SMC(cntl_reg
);
6942 tmp
&= ~(DCLK_DIR_CNTL_EN
|DCLK_DIVIDER_MASK
);
6943 tmp
|= dividers
.post_divider
;
6944 WREG32_SMC(cntl_reg
, tmp
);
6946 for (i
= 0; i
< 100; i
++) {
6947 if (RREG32_SMC(status_reg
) & DCLK_STATUS
)
6957 int cik_set_uvd_clocks(struct radeon_device
*rdev
, u32 vclk
, u32 dclk
)
6961 r
= cik_set_uvd_clock(rdev
, vclk
, CG_VCLK_CNTL
, CG_VCLK_STATUS
);
6965 r
= cik_set_uvd_clock(rdev
, dclk
, CG_DCLK_CNTL
, CG_DCLK_STATUS
);
6969 int cik_uvd_resume(struct radeon_device
*rdev
)
6975 r
= radeon_uvd_resume(rdev
);
6979 /* programm the VCPU memory controller bits 0-27 */
6980 addr
= rdev
->uvd
.gpu_addr
>> 3;
6981 size
= RADEON_GPU_PAGE_ALIGN(rdev
->uvd
.fw_size
+ 4) >> 3;
6982 WREG32(UVD_VCPU_CACHE_OFFSET0
, addr
);
6983 WREG32(UVD_VCPU_CACHE_SIZE0
, size
);
6986 size
= RADEON_UVD_STACK_SIZE
>> 3;
6987 WREG32(UVD_VCPU_CACHE_OFFSET1
, addr
);
6988 WREG32(UVD_VCPU_CACHE_SIZE1
, size
);
6991 size
= RADEON_UVD_HEAP_SIZE
>> 3;
6992 WREG32(UVD_VCPU_CACHE_OFFSET2
, addr
);
6993 WREG32(UVD_VCPU_CACHE_SIZE2
, size
);
6996 addr
= (rdev
->uvd
.gpu_addr
>> 28) & 0xF;
6997 WREG32(UVD_LMI_ADDR_EXT
, (addr
<< 12) | (addr
<< 0));
7000 addr
= (rdev
->uvd
.gpu_addr
>> 32) & 0xFF;
7001 WREG32(UVD_LMI_EXT40_ADDR
, addr
| (0x9 << 16) | (0x1 << 31));