]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drm/amdgpu: Enable gfxoff quirk on MacBook Pro
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57
58 #define GFX9_NUM_GFX_RINGS 1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62
63 #define mmGCEA_PROBE_MAP 0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX 0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129
130 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
132 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
134 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
136 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
138 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
140 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
142
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
147
148 enum ta_ras_gfx_subblock {
149 /*CPC*/
150 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152 TA_RAS_BLOCK__GFX_CPC_UCODE,
153 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160 /* CPF*/
161 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164 TA_RAS_BLOCK__GFX_CPF_TAG,
165 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166 /* CPG*/
167 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170 TA_RAS_BLOCK__GFX_CPG_TAG,
171 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172 /* GDS*/
173 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180 /* SPI*/
181 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182 /* SQ*/
183 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185 TA_RAS_BLOCK__GFX_SQ_LDS_D,
186 TA_RAS_BLOCK__GFX_SQ_LDS_I,
187 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189 /* SQC (3 ranges)*/
190 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 /* SQC range 0*/
192 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 /* SQC range 1*/
204 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 /* SQC range 2*/
218 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232 /* TA*/
233 TA_RAS_BLOCK__GFX_TA_INDEX_START,
234 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240 /* TCA*/
241 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245 /* TCC (5 sub-ranges)*/
246 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 /* TCC range 0*/
248 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258 /* TCC range 1*/
259 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 /* TCC range 2*/
265 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 /* TCC range 3*/
277 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 /* TCC range 4*/
283 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290 /* TCI*/
291 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292 /* TCP*/
293 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302 /* TD*/
303 TA_RAS_BLOCK__GFX_TD_INDEX_START,
304 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308 /* EA (3 sub-ranges)*/
309 TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 /* EA range 0*/
311 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321 /* EA range 1*/
322 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331 /* EA range 2*/
332 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339 /* UTC VM L2 bank*/
340 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341 /* UTC VM walker*/
342 TA_RAS_BLOCK__UTC_VML2_WALKER,
343 /* UTC ATC L2 2MB cache*/
344 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345 /* UTC ATC L2 4KB cache*/
346 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347 TA_RAS_BLOCK__GFX_MAX
348 };
349
350 struct ras_gfx_subblock {
351 unsigned char *name;
352 int ta_subblock;
353 int hw_supported_error_type;
354 int sw_supported_error_type;
355 };
356
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
358 [AMDGPU_RAS_BLOCK__##subblock] = { \
359 #subblock, \
360 TA_RAS_BLOCK__##subblock, \
361 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
362 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
363 }
364
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383 0),
384 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385 0),
386 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394 0, 0),
395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396 0),
397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 0, 0),
399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400 0),
401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 0, 0),
403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404 0),
405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406 1),
407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408 0, 0, 0),
409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 0),
411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412 0),
413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414 0),
415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 0),
417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 0),
419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420 0, 0),
421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422 0),
423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424 0),
425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426 0, 0, 0),
427 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428 0),
429 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430 0),
431 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432 0),
433 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 0),
435 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 0),
437 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438 0, 0),
439 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440 0),
441 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450 1),
451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452 1),
453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454 1),
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456 0),
457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458 0),
459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471 0),
472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474 0),
475 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476 0, 0),
477 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478 0),
479 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741
742 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
743 {
744 static void *scratch_reg0;
745 static void *scratch_reg1;
746 static void *scratch_reg2;
747 static void *scratch_reg3;
748 static void *spare_int;
749 static uint32_t grbm_cntl;
750 static uint32_t grbm_idx;
751
752 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
753 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
754 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
755 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
756 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
757
758 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
759 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
760
761 if (amdgpu_sriov_runtime(adev)) {
762 pr_err("shouldn't call rlcg write register during runtime\n");
763 return;
764 }
765
766 if (offset == grbm_cntl || offset == grbm_idx) {
767 if (offset == grbm_cntl)
768 writel(v, scratch_reg2);
769 else if (offset == grbm_idx)
770 writel(v, scratch_reg3);
771
772 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
773 } else {
774 uint32_t i = 0;
775 uint32_t retries = 50000;
776
777 writel(v, scratch_reg0);
778 writel(offset | 0x80000000, scratch_reg1);
779 writel(1, spare_int);
780 for (i = 0; i < retries; i++) {
781 u32 tmp;
782
783 tmp = readl(scratch_reg1);
784 if (!(tmp & 0x80000000))
785 break;
786
787 udelay(10);
788 }
789 if (i >= retries)
790 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
791 }
792
793 }
794
795 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
796 u32 v, u32 acc_flags, u32 hwip)
797 {
798 if ((acc_flags & AMDGPU_REGS_RLC) &&
799 amdgpu_sriov_fullaccess(adev)) {
800 gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
801
802 return;
803 }
804
805 if (acc_flags & AMDGPU_REGS_NO_KIQ)
806 WREG32_NO_KIQ(offset, v);
807 else
808 WREG32(offset, v);
809 }
810
811 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
812 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
813 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
814 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
815
816 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
817 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
818 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
819 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
820 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
821 struct amdgpu_cu_info *cu_info);
822 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
823 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
824 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
825 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
826 void *ras_error_status);
827 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
828 void *inject_if);
829 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
830
831 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
832 uint64_t queue_mask)
833 {
834 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
835 amdgpu_ring_write(kiq_ring,
836 PACKET3_SET_RESOURCES_VMID_MASK(0) |
837 /* vmid_mask:0* queue_type:0 (KIQ) */
838 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
839 amdgpu_ring_write(kiq_ring,
840 lower_32_bits(queue_mask)); /* queue mask lo */
841 amdgpu_ring_write(kiq_ring,
842 upper_32_bits(queue_mask)); /* queue mask hi */
843 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
844 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
845 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
846 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
847 }
848
849 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
850 struct amdgpu_ring *ring)
851 {
852 struct amdgpu_device *adev = kiq_ring->adev;
853 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
854 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
855 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
856
857 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
858 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
859 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
860 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
861 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
862 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
863 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
864 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
865 /*queue_type: normal compute queue */
866 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
867 /* alloc format: all_on_one_pipe */
868 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
869 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
870 /* num_queues: must be 1 */
871 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
872 amdgpu_ring_write(kiq_ring,
873 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
874 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
875 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
876 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
877 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
878 }
879
880 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
881 struct amdgpu_ring *ring,
882 enum amdgpu_unmap_queues_action action,
883 u64 gpu_addr, u64 seq)
884 {
885 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
886
887 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
888 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
889 PACKET3_UNMAP_QUEUES_ACTION(action) |
890 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
891 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
892 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
893 amdgpu_ring_write(kiq_ring,
894 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
895
896 if (action == PREEMPT_QUEUES_NO_UNMAP) {
897 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
898 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
899 amdgpu_ring_write(kiq_ring, seq);
900 } else {
901 amdgpu_ring_write(kiq_ring, 0);
902 amdgpu_ring_write(kiq_ring, 0);
903 amdgpu_ring_write(kiq_ring, 0);
904 }
905 }
906
907 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
908 struct amdgpu_ring *ring,
909 u64 addr,
910 u64 seq)
911 {
912 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
913
914 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
915 amdgpu_ring_write(kiq_ring,
916 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
917 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
918 PACKET3_QUERY_STATUS_COMMAND(2));
919 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
920 amdgpu_ring_write(kiq_ring,
921 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
922 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
923 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
924 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
925 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
926 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
927 }
928
929 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
930 uint16_t pasid, uint32_t flush_type,
931 bool all_hub)
932 {
933 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
934 amdgpu_ring_write(kiq_ring,
935 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
936 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
937 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
938 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
939 }
940
941 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
942 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
943 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
944 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
945 .kiq_query_status = gfx_v9_0_kiq_query_status,
946 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
947 .set_resources_size = 8,
948 .map_queues_size = 7,
949 .unmap_queues_size = 6,
950 .query_status_size = 7,
951 .invalidate_tlbs_size = 2,
952 };
953
954 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
955 {
956 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
957 }
958
959 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
960 {
961 switch (adev->asic_type) {
962 case CHIP_VEGA10:
963 soc15_program_register_sequence(adev,
964 golden_settings_gc_9_0,
965 ARRAY_SIZE(golden_settings_gc_9_0));
966 soc15_program_register_sequence(adev,
967 golden_settings_gc_9_0_vg10,
968 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
969 break;
970 case CHIP_VEGA12:
971 soc15_program_register_sequence(adev,
972 golden_settings_gc_9_2_1,
973 ARRAY_SIZE(golden_settings_gc_9_2_1));
974 soc15_program_register_sequence(adev,
975 golden_settings_gc_9_2_1_vg12,
976 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
977 break;
978 case CHIP_VEGA20:
979 soc15_program_register_sequence(adev,
980 golden_settings_gc_9_0,
981 ARRAY_SIZE(golden_settings_gc_9_0));
982 soc15_program_register_sequence(adev,
983 golden_settings_gc_9_0_vg20,
984 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
985 break;
986 case CHIP_ARCTURUS:
987 soc15_program_register_sequence(adev,
988 golden_settings_gc_9_4_1_arct,
989 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
990 break;
991 case CHIP_RAVEN:
992 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
993 ARRAY_SIZE(golden_settings_gc_9_1));
994 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
995 soc15_program_register_sequence(adev,
996 golden_settings_gc_9_1_rv2,
997 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
998 else
999 soc15_program_register_sequence(adev,
1000 golden_settings_gc_9_1_rv1,
1001 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1002 break;
1003 case CHIP_RENOIR:
1004 soc15_program_register_sequence(adev,
1005 golden_settings_gc_9_1_rn,
1006 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1007 return; /* for renoir, don't need common goldensetting */
1008 case CHIP_ALDEBARAN:
1009 gfx_v9_4_2_init_golden_registers(adev,
1010 adev->smuio.funcs->get_die_id(adev));
1011 break;
1012 default:
1013 break;
1014 }
1015
1016 if ((adev->asic_type != CHIP_ARCTURUS) &&
1017 (adev->asic_type != CHIP_ALDEBARAN))
1018 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1019 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1020 }
1021
1022 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1023 {
1024 adev->gfx.scratch.num_reg = 8;
1025 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1026 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1027 }
1028
1029 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1030 bool wc, uint32_t reg, uint32_t val)
1031 {
1032 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1033 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1034 WRITE_DATA_DST_SEL(0) |
1035 (wc ? WR_CONFIRM : 0));
1036 amdgpu_ring_write(ring, reg);
1037 amdgpu_ring_write(ring, 0);
1038 amdgpu_ring_write(ring, val);
1039 }
1040
1041 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1042 int mem_space, int opt, uint32_t addr0,
1043 uint32_t addr1, uint32_t ref, uint32_t mask,
1044 uint32_t inv)
1045 {
1046 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1047 amdgpu_ring_write(ring,
1048 /* memory (1) or register (0) */
1049 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1050 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1051 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1052 WAIT_REG_MEM_ENGINE(eng_sel)));
1053
1054 if (mem_space)
1055 BUG_ON(addr0 & 0x3); /* Dword align */
1056 amdgpu_ring_write(ring, addr0);
1057 amdgpu_ring_write(ring, addr1);
1058 amdgpu_ring_write(ring, ref);
1059 amdgpu_ring_write(ring, mask);
1060 amdgpu_ring_write(ring, inv); /* poll interval */
1061 }
1062
1063 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1064 {
1065 struct amdgpu_device *adev = ring->adev;
1066 uint32_t scratch;
1067 uint32_t tmp = 0;
1068 unsigned i;
1069 int r;
1070
1071 r = amdgpu_gfx_scratch_get(adev, &scratch);
1072 if (r)
1073 return r;
1074
1075 WREG32(scratch, 0xCAFEDEAD);
1076 r = amdgpu_ring_alloc(ring, 3);
1077 if (r)
1078 goto error_free_scratch;
1079
1080 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1081 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1082 amdgpu_ring_write(ring, 0xDEADBEEF);
1083 amdgpu_ring_commit(ring);
1084
1085 for (i = 0; i < adev->usec_timeout; i++) {
1086 tmp = RREG32(scratch);
1087 if (tmp == 0xDEADBEEF)
1088 break;
1089 udelay(1);
1090 }
1091
1092 if (i >= adev->usec_timeout)
1093 r = -ETIMEDOUT;
1094
1095 error_free_scratch:
1096 amdgpu_gfx_scratch_free(adev, scratch);
1097 return r;
1098 }
1099
1100 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1101 {
1102 struct amdgpu_device *adev = ring->adev;
1103 struct amdgpu_ib ib;
1104 struct dma_fence *f = NULL;
1105
1106 unsigned index;
1107 uint64_t gpu_addr;
1108 uint32_t tmp;
1109 long r;
1110
1111 r = amdgpu_device_wb_get(adev, &index);
1112 if (r)
1113 return r;
1114
1115 gpu_addr = adev->wb.gpu_addr + (index * 4);
1116 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1117 memset(&ib, 0, sizeof(ib));
1118 r = amdgpu_ib_get(adev, NULL, 16,
1119 AMDGPU_IB_POOL_DIRECT, &ib);
1120 if (r)
1121 goto err1;
1122
1123 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1124 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1125 ib.ptr[2] = lower_32_bits(gpu_addr);
1126 ib.ptr[3] = upper_32_bits(gpu_addr);
1127 ib.ptr[4] = 0xDEADBEEF;
1128 ib.length_dw = 5;
1129
1130 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1131 if (r)
1132 goto err2;
1133
1134 r = dma_fence_wait_timeout(f, false, timeout);
1135 if (r == 0) {
1136 r = -ETIMEDOUT;
1137 goto err2;
1138 } else if (r < 0) {
1139 goto err2;
1140 }
1141
1142 tmp = adev->wb.wb[index];
1143 if (tmp == 0xDEADBEEF)
1144 r = 0;
1145 else
1146 r = -EINVAL;
1147
1148 err2:
1149 amdgpu_ib_free(adev, &ib, NULL);
1150 dma_fence_put(f);
1151 err1:
1152 amdgpu_device_wb_free(adev, index);
1153 return r;
1154 }
1155
1156
1157 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1158 {
1159 release_firmware(adev->gfx.pfp_fw);
1160 adev->gfx.pfp_fw = NULL;
1161 release_firmware(adev->gfx.me_fw);
1162 adev->gfx.me_fw = NULL;
1163 release_firmware(adev->gfx.ce_fw);
1164 adev->gfx.ce_fw = NULL;
1165 release_firmware(adev->gfx.rlc_fw);
1166 adev->gfx.rlc_fw = NULL;
1167 release_firmware(adev->gfx.mec_fw);
1168 adev->gfx.mec_fw = NULL;
1169 release_firmware(adev->gfx.mec2_fw);
1170 adev->gfx.mec2_fw = NULL;
1171
1172 kfree(adev->gfx.rlc.register_list_format);
1173 }
1174
1175 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1176 {
1177 const struct rlc_firmware_header_v2_1 *rlc_hdr;
1178
1179 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1180 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1181 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1182 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1183 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1184 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1185 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1186 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1187 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1188 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1189 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1190 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1191 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1192 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1193 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1194 }
1195
1196 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1197 {
1198 adev->gfx.me_fw_write_wait = false;
1199 adev->gfx.mec_fw_write_wait = false;
1200
1201 if ((adev->asic_type != CHIP_ARCTURUS) &&
1202 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1203 (adev->gfx.mec_feature_version < 46) ||
1204 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1205 (adev->gfx.pfp_feature_version < 46)))
1206 DRM_WARN_ONCE("CP firmware version too old, please update!");
1207
1208 switch (adev->asic_type) {
1209 case CHIP_VEGA10:
1210 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1211 (adev->gfx.me_feature_version >= 42) &&
1212 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1213 (adev->gfx.pfp_feature_version >= 42))
1214 adev->gfx.me_fw_write_wait = true;
1215
1216 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1217 (adev->gfx.mec_feature_version >= 42))
1218 adev->gfx.mec_fw_write_wait = true;
1219 break;
1220 case CHIP_VEGA12:
1221 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1222 (adev->gfx.me_feature_version >= 44) &&
1223 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1224 (adev->gfx.pfp_feature_version >= 44))
1225 adev->gfx.me_fw_write_wait = true;
1226
1227 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1228 (adev->gfx.mec_feature_version >= 44))
1229 adev->gfx.mec_fw_write_wait = true;
1230 break;
1231 case CHIP_VEGA20:
1232 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1233 (adev->gfx.me_feature_version >= 44) &&
1234 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1235 (adev->gfx.pfp_feature_version >= 44))
1236 adev->gfx.me_fw_write_wait = true;
1237
1238 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1239 (adev->gfx.mec_feature_version >= 44))
1240 adev->gfx.mec_fw_write_wait = true;
1241 break;
1242 case CHIP_RAVEN:
1243 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1244 (adev->gfx.me_feature_version >= 42) &&
1245 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1246 (adev->gfx.pfp_feature_version >= 42))
1247 adev->gfx.me_fw_write_wait = true;
1248
1249 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1250 (adev->gfx.mec_feature_version >= 42))
1251 adev->gfx.mec_fw_write_wait = true;
1252 break;
1253 default:
1254 adev->gfx.me_fw_write_wait = true;
1255 adev->gfx.mec_fw_write_wait = true;
1256 break;
1257 }
1258 }
1259
1260 struct amdgpu_gfxoff_quirk {
1261 u16 chip_vendor;
1262 u16 chip_device;
1263 u16 subsys_vendor;
1264 u16 subsys_device;
1265 u8 revision;
1266 };
1267
1268 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1269 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1270 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1271 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1272 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1273 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1274 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1275 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1276 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1277 { 0, 0, 0, 0, 0 },
1278 };
1279
1280 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1281 {
1282 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1283
1284 while (p && p->chip_device != 0) {
1285 if (pdev->vendor == p->chip_vendor &&
1286 pdev->device == p->chip_device &&
1287 pdev->subsystem_vendor == p->subsys_vendor &&
1288 pdev->subsystem_device == p->subsys_device &&
1289 pdev->revision == p->revision) {
1290 return true;
1291 }
1292 ++p;
1293 }
1294 return false;
1295 }
1296
1297 static bool is_raven_kicker(struct amdgpu_device *adev)
1298 {
1299 if (adev->pm.fw_version >= 0x41e2b)
1300 return true;
1301 else
1302 return false;
1303 }
1304
1305 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1306 {
1307 if ((adev->asic_type == CHIP_RENOIR) &&
1308 (adev->gfx.me_fw_version >= 0x000000a5) &&
1309 (adev->gfx.me_feature_version >= 52))
1310 return true;
1311 else
1312 return false;
1313 }
1314
1315 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1316 {
1317 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1318 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1319
1320 switch (adev->asic_type) {
1321 case CHIP_VEGA10:
1322 case CHIP_VEGA12:
1323 case CHIP_VEGA20:
1324 break;
1325 case CHIP_RAVEN:
1326 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1327 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1328 ((!is_raven_kicker(adev) &&
1329 adev->gfx.rlc_fw_version < 531) ||
1330 (adev->gfx.rlc_feature_version < 1) ||
1331 !adev->gfx.rlc.is_rlc_v2_1))
1332 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1333
1334 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1335 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1336 AMD_PG_SUPPORT_CP |
1337 AMD_PG_SUPPORT_RLC_SMU_HS;
1338 break;
1339 case CHIP_RENOIR:
1340 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1341 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1342 AMD_PG_SUPPORT_CP |
1343 AMD_PG_SUPPORT_RLC_SMU_HS;
1344 break;
1345 default:
1346 break;
1347 }
1348 }
1349
1350 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1351 const char *chip_name)
1352 {
1353 char fw_name[30];
1354 int err;
1355 struct amdgpu_firmware_info *info = NULL;
1356 const struct common_firmware_header *header = NULL;
1357 const struct gfx_firmware_header_v1_0 *cp_hdr;
1358
1359 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1360 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1361 if (err)
1362 goto out;
1363 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1364 if (err)
1365 goto out;
1366 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1367 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1368 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1369
1370 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1371 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1372 if (err)
1373 goto out;
1374 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1375 if (err)
1376 goto out;
1377 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1378 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1379 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1380
1381 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1382 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1383 if (err)
1384 goto out;
1385 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1386 if (err)
1387 goto out;
1388 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1389 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1390 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1391
1392 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1393 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1394 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1395 info->fw = adev->gfx.pfp_fw;
1396 header = (const struct common_firmware_header *)info->fw->data;
1397 adev->firmware.fw_size +=
1398 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1399
1400 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1401 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1402 info->fw = adev->gfx.me_fw;
1403 header = (const struct common_firmware_header *)info->fw->data;
1404 adev->firmware.fw_size +=
1405 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1406
1407 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1408 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1409 info->fw = adev->gfx.ce_fw;
1410 header = (const struct common_firmware_header *)info->fw->data;
1411 adev->firmware.fw_size +=
1412 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1413 }
1414
1415 out:
1416 if (err) {
1417 dev_err(adev->dev,
1418 "gfx9: Failed to load firmware \"%s\"\n",
1419 fw_name);
1420 release_firmware(adev->gfx.pfp_fw);
1421 adev->gfx.pfp_fw = NULL;
1422 release_firmware(adev->gfx.me_fw);
1423 adev->gfx.me_fw = NULL;
1424 release_firmware(adev->gfx.ce_fw);
1425 adev->gfx.ce_fw = NULL;
1426 }
1427 return err;
1428 }
1429
1430 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1431 const char *chip_name)
1432 {
1433 char fw_name[30];
1434 int err;
1435 struct amdgpu_firmware_info *info = NULL;
1436 const struct common_firmware_header *header = NULL;
1437 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1438 unsigned int *tmp = NULL;
1439 unsigned int i = 0;
1440 uint16_t version_major;
1441 uint16_t version_minor;
1442 uint32_t smu_version;
1443
1444 /*
1445 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1446 * instead of picasso_rlc.bin.
1447 * Judgment method:
1448 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1449 * or revision >= 0xD8 && revision <= 0xDF
1450 * otherwise is PCO FP5
1451 */
1452 if (!strcmp(chip_name, "picasso") &&
1453 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1454 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1455 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1456 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1457 (smu_version >= 0x41e2b))
1458 /**
1459 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1460 */
1461 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1462 else
1463 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1464 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1465 if (err)
1466 goto out;
1467 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1468 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1469
1470 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1471 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1472 if (version_major == 2 && version_minor == 1)
1473 adev->gfx.rlc.is_rlc_v2_1 = true;
1474
1475 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1476 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1477 adev->gfx.rlc.save_and_restore_offset =
1478 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1479 adev->gfx.rlc.clear_state_descriptor_offset =
1480 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1481 adev->gfx.rlc.avail_scratch_ram_locations =
1482 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1483 adev->gfx.rlc.reg_restore_list_size =
1484 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1485 adev->gfx.rlc.reg_list_format_start =
1486 le32_to_cpu(rlc_hdr->reg_list_format_start);
1487 adev->gfx.rlc.reg_list_format_separate_start =
1488 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1489 adev->gfx.rlc.starting_offsets_start =
1490 le32_to_cpu(rlc_hdr->starting_offsets_start);
1491 adev->gfx.rlc.reg_list_format_size_bytes =
1492 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1493 adev->gfx.rlc.reg_list_size_bytes =
1494 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1495 adev->gfx.rlc.register_list_format =
1496 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1497 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1498 if (!adev->gfx.rlc.register_list_format) {
1499 err = -ENOMEM;
1500 goto out;
1501 }
1502
1503 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1504 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1505 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1506 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1507
1508 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1509
1510 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1511 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1512 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1513 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1514
1515 if (adev->gfx.rlc.is_rlc_v2_1)
1516 gfx_v9_0_init_rlc_ext_microcode(adev);
1517
1518 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1519 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1520 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1521 info->fw = adev->gfx.rlc_fw;
1522 header = (const struct common_firmware_header *)info->fw->data;
1523 adev->firmware.fw_size +=
1524 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1525
1526 if (adev->gfx.rlc.is_rlc_v2_1 &&
1527 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1528 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1529 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1530 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1531 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1532 info->fw = adev->gfx.rlc_fw;
1533 adev->firmware.fw_size +=
1534 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1535
1536 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1537 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1538 info->fw = adev->gfx.rlc_fw;
1539 adev->firmware.fw_size +=
1540 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1541
1542 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1543 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1544 info->fw = adev->gfx.rlc_fw;
1545 adev->firmware.fw_size +=
1546 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1547 }
1548 }
1549
1550 out:
1551 if (err) {
1552 dev_err(adev->dev,
1553 "gfx9: Failed to load firmware \"%s\"\n",
1554 fw_name);
1555 release_firmware(adev->gfx.rlc_fw);
1556 adev->gfx.rlc_fw = NULL;
1557 }
1558 return err;
1559 }
1560
1561 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1562 {
1563 if (adev->asic_type == CHIP_ALDEBARAN ||
1564 adev->asic_type == CHIP_ARCTURUS ||
1565 adev->asic_type == CHIP_RENOIR)
1566 return false;
1567
1568 return true;
1569 }
1570
1571 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1572 const char *chip_name)
1573 {
1574 char fw_name[30];
1575 int err;
1576 struct amdgpu_firmware_info *info = NULL;
1577 const struct common_firmware_header *header = NULL;
1578 const struct gfx_firmware_header_v1_0 *cp_hdr;
1579
1580 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1581 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1582 if (err)
1583 goto out;
1584 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1585 if (err)
1586 goto out;
1587 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1588 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1589 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1590
1591
1592 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1593 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1594 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1595 if (!err) {
1596 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1597 if (err)
1598 goto out;
1599 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1600 adev->gfx.mec2_fw->data;
1601 adev->gfx.mec2_fw_version =
1602 le32_to_cpu(cp_hdr->header.ucode_version);
1603 adev->gfx.mec2_feature_version =
1604 le32_to_cpu(cp_hdr->ucode_feature_version);
1605 } else {
1606 err = 0;
1607 adev->gfx.mec2_fw = NULL;
1608 }
1609 } else {
1610 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1611 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1612 }
1613
1614 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1615 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1616 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1617 info->fw = adev->gfx.mec_fw;
1618 header = (const struct common_firmware_header *)info->fw->data;
1619 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1620 adev->firmware.fw_size +=
1621 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1622
1623 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1624 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1625 info->fw = adev->gfx.mec_fw;
1626 adev->firmware.fw_size +=
1627 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1628
1629 if (adev->gfx.mec2_fw) {
1630 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1631 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1632 info->fw = adev->gfx.mec2_fw;
1633 header = (const struct common_firmware_header *)info->fw->data;
1634 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1635 adev->firmware.fw_size +=
1636 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1637
1638 /* TODO: Determine if MEC2 JT FW loading can be removed
1639 for all GFX V9 asic and above */
1640 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1641 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1642 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1643 info->fw = adev->gfx.mec2_fw;
1644 adev->firmware.fw_size +=
1645 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1646 PAGE_SIZE);
1647 }
1648 }
1649 }
1650
1651 out:
1652 gfx_v9_0_check_if_need_gfxoff(adev);
1653 gfx_v9_0_check_fw_write_wait(adev);
1654 if (err) {
1655 dev_err(adev->dev,
1656 "gfx9: Failed to load firmware \"%s\"\n",
1657 fw_name);
1658 release_firmware(adev->gfx.mec_fw);
1659 adev->gfx.mec_fw = NULL;
1660 release_firmware(adev->gfx.mec2_fw);
1661 adev->gfx.mec2_fw = NULL;
1662 }
1663 return err;
1664 }
1665
1666 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1667 {
1668 const char *chip_name;
1669 int r;
1670
1671 DRM_DEBUG("\n");
1672
1673 switch (adev->asic_type) {
1674 case CHIP_VEGA10:
1675 chip_name = "vega10";
1676 break;
1677 case CHIP_VEGA12:
1678 chip_name = "vega12";
1679 break;
1680 case CHIP_VEGA20:
1681 chip_name = "vega20";
1682 break;
1683 case CHIP_RAVEN:
1684 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1685 chip_name = "raven2";
1686 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1687 chip_name = "picasso";
1688 else
1689 chip_name = "raven";
1690 break;
1691 case CHIP_ARCTURUS:
1692 chip_name = "arcturus";
1693 break;
1694 case CHIP_RENOIR:
1695 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1696 chip_name = "renoir";
1697 else
1698 chip_name = "green_sardine";
1699 break;
1700 case CHIP_ALDEBARAN:
1701 chip_name = "aldebaran";
1702 break;
1703 default:
1704 BUG();
1705 }
1706
1707 /* No CPG in Arcturus */
1708 if (adev->gfx.num_gfx_rings) {
1709 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1710 if (r)
1711 return r;
1712 }
1713
1714 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1715 if (r)
1716 return r;
1717
1718 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1719 if (r)
1720 return r;
1721
1722 return r;
1723 }
1724
1725 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1726 {
1727 u32 count = 0;
1728 const struct cs_section_def *sect = NULL;
1729 const struct cs_extent_def *ext = NULL;
1730
1731 /* begin clear state */
1732 count += 2;
1733 /* context control state */
1734 count += 3;
1735
1736 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1737 for (ext = sect->section; ext->extent != NULL; ++ext) {
1738 if (sect->id == SECT_CONTEXT)
1739 count += 2 + ext->reg_count;
1740 else
1741 return 0;
1742 }
1743 }
1744
1745 /* end clear state */
1746 count += 2;
1747 /* clear state */
1748 count += 2;
1749
1750 return count;
1751 }
1752
1753 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1754 volatile u32 *buffer)
1755 {
1756 u32 count = 0, i;
1757 const struct cs_section_def *sect = NULL;
1758 const struct cs_extent_def *ext = NULL;
1759
1760 if (adev->gfx.rlc.cs_data == NULL)
1761 return;
1762 if (buffer == NULL)
1763 return;
1764
1765 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1766 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1767
1768 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1769 buffer[count++] = cpu_to_le32(0x80000000);
1770 buffer[count++] = cpu_to_le32(0x80000000);
1771
1772 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1773 for (ext = sect->section; ext->extent != NULL; ++ext) {
1774 if (sect->id == SECT_CONTEXT) {
1775 buffer[count++] =
1776 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1777 buffer[count++] = cpu_to_le32(ext->reg_index -
1778 PACKET3_SET_CONTEXT_REG_START);
1779 for (i = 0; i < ext->reg_count; i++)
1780 buffer[count++] = cpu_to_le32(ext->extent[i]);
1781 } else {
1782 return;
1783 }
1784 }
1785 }
1786
1787 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1788 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1789
1790 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1791 buffer[count++] = cpu_to_le32(0);
1792 }
1793
1794 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1795 {
1796 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1797 uint32_t pg_always_on_cu_num = 2;
1798 uint32_t always_on_cu_num;
1799 uint32_t i, j, k;
1800 uint32_t mask, cu_bitmap, counter;
1801
1802 if (adev->flags & AMD_IS_APU)
1803 always_on_cu_num = 4;
1804 else if (adev->asic_type == CHIP_VEGA12)
1805 always_on_cu_num = 8;
1806 else
1807 always_on_cu_num = 12;
1808
1809 mutex_lock(&adev->grbm_idx_mutex);
1810 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1811 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1812 mask = 1;
1813 cu_bitmap = 0;
1814 counter = 0;
1815 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1816
1817 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1818 if (cu_info->bitmap[i][j] & mask) {
1819 if (counter == pg_always_on_cu_num)
1820 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1821 if (counter < always_on_cu_num)
1822 cu_bitmap |= mask;
1823 else
1824 break;
1825 counter++;
1826 }
1827 mask <<= 1;
1828 }
1829
1830 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1831 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1832 }
1833 }
1834 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1835 mutex_unlock(&adev->grbm_idx_mutex);
1836 }
1837
1838 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1839 {
1840 uint32_t data;
1841
1842 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1843 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1844 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1845 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1846 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1847
1848 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1849 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1850
1851 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1852 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1853
1854 mutex_lock(&adev->grbm_idx_mutex);
1855 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1856 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1857 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1858
1859 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1860 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1861 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1862 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1863 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1864
1865 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1866 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1867 data &= 0x0000FFFF;
1868 data |= 0x00C00000;
1869 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1870
1871 /*
1872 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1873 * programmed in gfx_v9_0_init_always_on_cu_mask()
1874 */
1875
1876 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1877 * but used for RLC_LB_CNTL configuration */
1878 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1879 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1880 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1881 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1882 mutex_unlock(&adev->grbm_idx_mutex);
1883
1884 gfx_v9_0_init_always_on_cu_mask(adev);
1885 }
1886
1887 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1888 {
1889 uint32_t data;
1890
1891 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1892 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1893 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1894 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1895 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1896
1897 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1898 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1899
1900 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1901 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1902
1903 mutex_lock(&adev->grbm_idx_mutex);
1904 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1905 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1906 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1907
1908 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1909 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1910 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1911 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1912 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1913
1914 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1915 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1916 data &= 0x0000FFFF;
1917 data |= 0x00C00000;
1918 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1919
1920 /*
1921 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1922 * programmed in gfx_v9_0_init_always_on_cu_mask()
1923 */
1924
1925 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1926 * but used for RLC_LB_CNTL configuration */
1927 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1928 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1929 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1930 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1931 mutex_unlock(&adev->grbm_idx_mutex);
1932
1933 gfx_v9_0_init_always_on_cu_mask(adev);
1934 }
1935
1936 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1937 {
1938 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1939 }
1940
1941 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1942 {
1943 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1944 return 5;
1945 else
1946 return 4;
1947 }
1948
1949 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1950 {
1951 const struct cs_section_def *cs_data;
1952 int r;
1953
1954 adev->gfx.rlc.cs_data = gfx9_cs_data;
1955
1956 cs_data = adev->gfx.rlc.cs_data;
1957
1958 if (cs_data) {
1959 /* init clear state block */
1960 r = amdgpu_gfx_rlc_init_csb(adev);
1961 if (r)
1962 return r;
1963 }
1964
1965 if (adev->flags & AMD_IS_APU) {
1966 /* TODO: double check the cp_table_size for RV */
1967 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1968 r = amdgpu_gfx_rlc_init_cpt(adev);
1969 if (r)
1970 return r;
1971 }
1972
1973 switch (adev->asic_type) {
1974 case CHIP_RAVEN:
1975 gfx_v9_0_init_lbpw(adev);
1976 break;
1977 case CHIP_VEGA20:
1978 gfx_v9_4_init_lbpw(adev);
1979 break;
1980 default:
1981 break;
1982 }
1983
1984 /* init spm vmid with 0xf */
1985 if (adev->gfx.rlc.funcs->update_spm_vmid)
1986 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1987
1988 return 0;
1989 }
1990
1991 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1992 {
1993 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1994 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1995 }
1996
1997 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1998 {
1999 int r;
2000 u32 *hpd;
2001 const __le32 *fw_data;
2002 unsigned fw_size;
2003 u32 *fw;
2004 size_t mec_hpd_size;
2005
2006 const struct gfx_firmware_header_v1_0 *mec_hdr;
2007
2008 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2009
2010 /* take ownership of the relevant compute queues */
2011 amdgpu_gfx_compute_queue_acquire(adev);
2012 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2013 if (mec_hpd_size) {
2014 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2015 AMDGPU_GEM_DOMAIN_VRAM,
2016 &adev->gfx.mec.hpd_eop_obj,
2017 &adev->gfx.mec.hpd_eop_gpu_addr,
2018 (void **)&hpd);
2019 if (r) {
2020 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2021 gfx_v9_0_mec_fini(adev);
2022 return r;
2023 }
2024
2025 memset(hpd, 0, mec_hpd_size);
2026
2027 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2028 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2029 }
2030
2031 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2032
2033 fw_data = (const __le32 *)
2034 (adev->gfx.mec_fw->data +
2035 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2036 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2037
2038 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2039 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2040 &adev->gfx.mec.mec_fw_obj,
2041 &adev->gfx.mec.mec_fw_gpu_addr,
2042 (void **)&fw);
2043 if (r) {
2044 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2045 gfx_v9_0_mec_fini(adev);
2046 return r;
2047 }
2048
2049 memcpy(fw, fw_data, fw_size);
2050
2051 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2052 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2053
2054 return 0;
2055 }
2056
2057 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2058 {
2059 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2060 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2061 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2062 (address << SQ_IND_INDEX__INDEX__SHIFT) |
2063 (SQ_IND_INDEX__FORCE_READ_MASK));
2064 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2065 }
2066
2067 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2068 uint32_t wave, uint32_t thread,
2069 uint32_t regno, uint32_t num, uint32_t *out)
2070 {
2071 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2072 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2073 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2074 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2075 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2076 (SQ_IND_INDEX__FORCE_READ_MASK) |
2077 (SQ_IND_INDEX__AUTO_INCR_MASK));
2078 while (num--)
2079 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2080 }
2081
2082 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2083 {
2084 /* type 1 wave data */
2085 dst[(*no_fields)++] = 1;
2086 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2087 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2088 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2089 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2090 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2091 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2092 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2093 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2094 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2095 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2096 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2097 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2098 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2099 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2100 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2101 }
2102
2103 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2104 uint32_t wave, uint32_t start,
2105 uint32_t size, uint32_t *dst)
2106 {
2107 wave_read_regs(
2108 adev, simd, wave, 0,
2109 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2110 }
2111
2112 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2113 uint32_t wave, uint32_t thread,
2114 uint32_t start, uint32_t size,
2115 uint32_t *dst)
2116 {
2117 wave_read_regs(
2118 adev, simd, wave, thread,
2119 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2120 }
2121
2122 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2123 u32 me, u32 pipe, u32 q, u32 vm)
2124 {
2125 soc15_grbm_select(adev, me, pipe, q, vm);
2126 }
2127
2128 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2129 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2130 .select_se_sh = &gfx_v9_0_select_se_sh,
2131 .read_wave_data = &gfx_v9_0_read_wave_data,
2132 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2133 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2134 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2135 };
2136
2137 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2138 .ras_late_init = amdgpu_gfx_ras_late_init,
2139 .ras_fini = amdgpu_gfx_ras_fini,
2140 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2141 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2142 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2143 };
2144
2145 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2146 {
2147 u32 gb_addr_config;
2148 int err;
2149
2150 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2151
2152 switch (adev->asic_type) {
2153 case CHIP_VEGA10:
2154 adev->gfx.config.max_hw_contexts = 8;
2155 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2156 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2157 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2158 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2159 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2160 break;
2161 case CHIP_VEGA12:
2162 adev->gfx.config.max_hw_contexts = 8;
2163 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2164 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2165 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2166 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2167 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2168 DRM_INFO("fix gfx.config for vega12\n");
2169 break;
2170 case CHIP_VEGA20:
2171 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2172 adev->gfx.config.max_hw_contexts = 8;
2173 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2174 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2175 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2176 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2177 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2178 gb_addr_config &= ~0xf3e777ff;
2179 gb_addr_config |= 0x22014042;
2180 /* check vbios table if gpu info is not available */
2181 err = amdgpu_atomfirmware_get_gfx_info(adev);
2182 if (err)
2183 return err;
2184 break;
2185 case CHIP_RAVEN:
2186 adev->gfx.config.max_hw_contexts = 8;
2187 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2188 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2189 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2190 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2191 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2192 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2193 else
2194 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2195 break;
2196 case CHIP_ARCTURUS:
2197 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2198 adev->gfx.config.max_hw_contexts = 8;
2199 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2200 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2201 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2202 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2203 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2204 gb_addr_config &= ~0xf3e777ff;
2205 gb_addr_config |= 0x22014042;
2206 break;
2207 case CHIP_RENOIR:
2208 adev->gfx.config.max_hw_contexts = 8;
2209 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2210 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2211 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2212 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2213 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2214 gb_addr_config &= ~0xf3e777ff;
2215 gb_addr_config |= 0x22010042;
2216 break;
2217 case CHIP_ALDEBARAN:
2218 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2219 adev->gfx.config.max_hw_contexts = 8;
2220 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2221 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2222 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2223 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2224 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2225 gb_addr_config &= ~0xf3e777ff;
2226 gb_addr_config |= 0x22014042;
2227 /* check vbios table if gpu info is not available */
2228 err = amdgpu_atomfirmware_get_gfx_info(adev);
2229 if (err)
2230 return err;
2231 break;
2232 default:
2233 BUG();
2234 break;
2235 }
2236
2237 adev->gfx.config.gb_addr_config = gb_addr_config;
2238
2239 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2240 REG_GET_FIELD(
2241 adev->gfx.config.gb_addr_config,
2242 GB_ADDR_CONFIG,
2243 NUM_PIPES);
2244
2245 adev->gfx.config.max_tile_pipes =
2246 adev->gfx.config.gb_addr_config_fields.num_pipes;
2247
2248 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2249 REG_GET_FIELD(
2250 adev->gfx.config.gb_addr_config,
2251 GB_ADDR_CONFIG,
2252 NUM_BANKS);
2253 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2254 REG_GET_FIELD(
2255 adev->gfx.config.gb_addr_config,
2256 GB_ADDR_CONFIG,
2257 MAX_COMPRESSED_FRAGS);
2258 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2259 REG_GET_FIELD(
2260 adev->gfx.config.gb_addr_config,
2261 GB_ADDR_CONFIG,
2262 NUM_RB_PER_SE);
2263 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2264 REG_GET_FIELD(
2265 adev->gfx.config.gb_addr_config,
2266 GB_ADDR_CONFIG,
2267 NUM_SHADER_ENGINES);
2268 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2269 REG_GET_FIELD(
2270 adev->gfx.config.gb_addr_config,
2271 GB_ADDR_CONFIG,
2272 PIPE_INTERLEAVE_SIZE));
2273
2274 return 0;
2275 }
2276
2277 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2278 int mec, int pipe, int queue)
2279 {
2280 unsigned irq_type;
2281 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2282 unsigned int hw_prio;
2283
2284 ring = &adev->gfx.compute_ring[ring_id];
2285
2286 /* mec0 is me1 */
2287 ring->me = mec + 1;
2288 ring->pipe = pipe;
2289 ring->queue = queue;
2290
2291 ring->ring_obj = NULL;
2292 ring->use_doorbell = true;
2293 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2294 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2295 + (ring_id * GFX9_MEC_HPD_SIZE);
2296 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2297
2298 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2299 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2300 + ring->pipe;
2301 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2302 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2303 /* type-2 packets are deprecated on MEC, use type-3 instead */
2304 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2305 hw_prio, NULL);
2306 }
2307
2308 static int gfx_v9_0_sw_init(void *handle)
2309 {
2310 int i, j, k, r, ring_id;
2311 struct amdgpu_ring *ring;
2312 struct amdgpu_kiq *kiq;
2313 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2314
2315 switch (adev->asic_type) {
2316 case CHIP_VEGA10:
2317 case CHIP_VEGA12:
2318 case CHIP_VEGA20:
2319 case CHIP_RAVEN:
2320 case CHIP_ARCTURUS:
2321 case CHIP_RENOIR:
2322 case CHIP_ALDEBARAN:
2323 adev->gfx.mec.num_mec = 2;
2324 break;
2325 default:
2326 adev->gfx.mec.num_mec = 1;
2327 break;
2328 }
2329
2330 adev->gfx.mec.num_pipe_per_mec = 4;
2331 adev->gfx.mec.num_queue_per_pipe = 8;
2332
2333 /* EOP Event */
2334 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2335 if (r)
2336 return r;
2337
2338 /* Privileged reg */
2339 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2340 &adev->gfx.priv_reg_irq);
2341 if (r)
2342 return r;
2343
2344 /* Privileged inst */
2345 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2346 &adev->gfx.priv_inst_irq);
2347 if (r)
2348 return r;
2349
2350 /* ECC error */
2351 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2352 &adev->gfx.cp_ecc_error_irq);
2353 if (r)
2354 return r;
2355
2356 /* FUE error */
2357 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2358 &adev->gfx.cp_ecc_error_irq);
2359 if (r)
2360 return r;
2361
2362 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2363
2364 gfx_v9_0_scratch_init(adev);
2365
2366 r = gfx_v9_0_init_microcode(adev);
2367 if (r) {
2368 DRM_ERROR("Failed to load gfx firmware!\n");
2369 return r;
2370 }
2371
2372 r = adev->gfx.rlc.funcs->init(adev);
2373 if (r) {
2374 DRM_ERROR("Failed to init rlc BOs!\n");
2375 return r;
2376 }
2377
2378 r = gfx_v9_0_mec_init(adev);
2379 if (r) {
2380 DRM_ERROR("Failed to init MEC BOs!\n");
2381 return r;
2382 }
2383
2384 /* set up the gfx ring */
2385 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2386 ring = &adev->gfx.gfx_ring[i];
2387 ring->ring_obj = NULL;
2388 if (!i)
2389 sprintf(ring->name, "gfx");
2390 else
2391 sprintf(ring->name, "gfx_%d", i);
2392 ring->use_doorbell = true;
2393 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2394 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2395 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2396 AMDGPU_RING_PRIO_DEFAULT, NULL);
2397 if (r)
2398 return r;
2399 }
2400
2401 /* set up the compute queues - allocate horizontally across pipes */
2402 ring_id = 0;
2403 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2404 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2405 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2406 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2407 continue;
2408
2409 r = gfx_v9_0_compute_ring_init(adev,
2410 ring_id,
2411 i, k, j);
2412 if (r)
2413 return r;
2414
2415 ring_id++;
2416 }
2417 }
2418 }
2419
2420 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2421 if (r) {
2422 DRM_ERROR("Failed to init KIQ BOs!\n");
2423 return r;
2424 }
2425
2426 kiq = &adev->gfx.kiq;
2427 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2428 if (r)
2429 return r;
2430
2431 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2432 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2433 if (r)
2434 return r;
2435
2436 adev->gfx.ce_ram_size = 0x8000;
2437
2438 r = gfx_v9_0_gpu_early_init(adev);
2439 if (r)
2440 return r;
2441
2442 return 0;
2443 }
2444
2445
2446 static int gfx_v9_0_sw_fini(void *handle)
2447 {
2448 int i;
2449 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2450
2451 if (adev->gfx.ras_funcs &&
2452 adev->gfx.ras_funcs->ras_fini)
2453 adev->gfx.ras_funcs->ras_fini(adev);
2454
2455 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2456 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2457 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2458 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2459
2460 amdgpu_gfx_mqd_sw_fini(adev);
2461 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2462 amdgpu_gfx_kiq_fini(adev);
2463
2464 gfx_v9_0_mec_fini(adev);
2465 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2466 if (adev->flags & AMD_IS_APU) {
2467 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2468 &adev->gfx.rlc.cp_table_gpu_addr,
2469 (void **)&adev->gfx.rlc.cp_table_ptr);
2470 }
2471 gfx_v9_0_free_microcode(adev);
2472
2473 return 0;
2474 }
2475
2476
2477 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2478 {
2479 /* TODO */
2480 }
2481
2482 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2483 u32 instance)
2484 {
2485 u32 data;
2486
2487 if (instance == 0xffffffff)
2488 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2489 else
2490 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2491
2492 if (se_num == 0xffffffff)
2493 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2494 else
2495 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2496
2497 if (sh_num == 0xffffffff)
2498 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2499 else
2500 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2501
2502 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2503 }
2504
2505 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2506 {
2507 u32 data, mask;
2508
2509 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2510 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2511
2512 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2513 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2514
2515 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2516 adev->gfx.config.max_sh_per_se);
2517
2518 return (~data) & mask;
2519 }
2520
2521 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2522 {
2523 int i, j;
2524 u32 data;
2525 u32 active_rbs = 0;
2526 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2527 adev->gfx.config.max_sh_per_se;
2528
2529 mutex_lock(&adev->grbm_idx_mutex);
2530 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2531 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2532 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2533 data = gfx_v9_0_get_rb_active_bitmap(adev);
2534 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2535 rb_bitmap_width_per_sh);
2536 }
2537 }
2538 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2539 mutex_unlock(&adev->grbm_idx_mutex);
2540
2541 adev->gfx.config.backend_enable_mask = active_rbs;
2542 adev->gfx.config.num_rbs = hweight32(active_rbs);
2543 }
2544
2545 #define DEFAULT_SH_MEM_BASES (0x6000)
2546 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2547 {
2548 int i;
2549 uint32_t sh_mem_config;
2550 uint32_t sh_mem_bases;
2551
2552 /*
2553 * Configure apertures:
2554 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2555 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2556 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2557 */
2558 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2559
2560 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2561 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2562 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2563
2564 mutex_lock(&adev->srbm_mutex);
2565 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2566 soc15_grbm_select(adev, 0, 0, 0, i);
2567 /* CP and shaders */
2568 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2569 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2570 }
2571 soc15_grbm_select(adev, 0, 0, 0, 0);
2572 mutex_unlock(&adev->srbm_mutex);
2573
2574 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2575 acccess. These should be enabled by FW for target VMIDs. */
2576 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2577 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2578 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2579 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2580 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2581 }
2582 }
2583
2584 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2585 {
2586 int vmid;
2587
2588 /*
2589 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2590 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2591 * the driver can enable them for graphics. VMID0 should maintain
2592 * access so that HWS firmware can save/restore entries.
2593 */
2594 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2595 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2596 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2597 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2598 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2599 }
2600 }
2601
2602 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2603 {
2604 uint32_t tmp;
2605
2606 switch (adev->asic_type) {
2607 case CHIP_ARCTURUS:
2608 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2609 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2610 DISABLE_BARRIER_WAITCNT, 1);
2611 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2612 break;
2613 default:
2614 break;
2615 }
2616 }
2617
2618 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2619 {
2620 u32 tmp;
2621 int i;
2622
2623 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2624
2625 gfx_v9_0_tiling_mode_table_init(adev);
2626
2627 gfx_v9_0_setup_rb(adev);
2628 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2629 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2630
2631 /* XXX SH_MEM regs */
2632 /* where to put LDS, scratch, GPUVM in FSA64 space */
2633 mutex_lock(&adev->srbm_mutex);
2634 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2635 soc15_grbm_select(adev, 0, 0, 0, i);
2636 /* CP and shaders */
2637 if (i == 0) {
2638 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2639 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2640 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2641 !!adev->gmc.noretry);
2642 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2643 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2644 } else {
2645 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2646 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2647 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2648 !!adev->gmc.noretry);
2649 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2650 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2651 (adev->gmc.private_aperture_start >> 48));
2652 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2653 (adev->gmc.shared_aperture_start >> 48));
2654 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2655 }
2656 }
2657 soc15_grbm_select(adev, 0, 0, 0, 0);
2658
2659 mutex_unlock(&adev->srbm_mutex);
2660
2661 gfx_v9_0_init_compute_vmid(adev);
2662 gfx_v9_0_init_gds_vmid(adev);
2663 gfx_v9_0_init_sq_config(adev);
2664 }
2665
2666 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2667 {
2668 u32 i, j, k;
2669 u32 mask;
2670
2671 mutex_lock(&adev->grbm_idx_mutex);
2672 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2673 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2674 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2675 for (k = 0; k < adev->usec_timeout; k++) {
2676 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2677 break;
2678 udelay(1);
2679 }
2680 if (k == adev->usec_timeout) {
2681 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2682 0xffffffff, 0xffffffff);
2683 mutex_unlock(&adev->grbm_idx_mutex);
2684 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2685 i, j);
2686 return;
2687 }
2688 }
2689 }
2690 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2691 mutex_unlock(&adev->grbm_idx_mutex);
2692
2693 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2694 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2695 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2696 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2697 for (k = 0; k < adev->usec_timeout; k++) {
2698 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2699 break;
2700 udelay(1);
2701 }
2702 }
2703
2704 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2705 bool enable)
2706 {
2707 u32 tmp;
2708
2709 /* These interrupts should be enabled to drive DS clock */
2710
2711 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2712
2713 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2714 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2715 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2716 if(adev->gfx.num_gfx_rings)
2717 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2718
2719 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2720 }
2721
2722 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2723 {
2724 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2725 /* csib */
2726 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2727 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2728 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2729 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2730 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2731 adev->gfx.rlc.clear_state_size);
2732 }
2733
2734 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2735 int indirect_offset,
2736 int list_size,
2737 int *unique_indirect_regs,
2738 int unique_indirect_reg_count,
2739 int *indirect_start_offsets,
2740 int *indirect_start_offsets_count,
2741 int max_start_offsets_count)
2742 {
2743 int idx;
2744
2745 for (; indirect_offset < list_size; indirect_offset++) {
2746 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2747 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2748 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2749
2750 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2751 indirect_offset += 2;
2752
2753 /* look for the matching indice */
2754 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2755 if (unique_indirect_regs[idx] ==
2756 register_list_format[indirect_offset] ||
2757 !unique_indirect_regs[idx])
2758 break;
2759 }
2760
2761 BUG_ON(idx >= unique_indirect_reg_count);
2762
2763 if (!unique_indirect_regs[idx])
2764 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2765
2766 indirect_offset++;
2767 }
2768 }
2769 }
2770
2771 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2772 {
2773 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2774 int unique_indirect_reg_count = 0;
2775
2776 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2777 int indirect_start_offsets_count = 0;
2778
2779 int list_size = 0;
2780 int i = 0, j = 0;
2781 u32 tmp = 0;
2782
2783 u32 *register_list_format =
2784 kmemdup(adev->gfx.rlc.register_list_format,
2785 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2786 if (!register_list_format)
2787 return -ENOMEM;
2788
2789 /* setup unique_indirect_regs array and indirect_start_offsets array */
2790 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2791 gfx_v9_1_parse_ind_reg_list(register_list_format,
2792 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2793 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2794 unique_indirect_regs,
2795 unique_indirect_reg_count,
2796 indirect_start_offsets,
2797 &indirect_start_offsets_count,
2798 ARRAY_SIZE(indirect_start_offsets));
2799
2800 /* enable auto inc in case it is disabled */
2801 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2802 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2803 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2804
2805 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2806 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2807 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2808 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2809 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2810 adev->gfx.rlc.register_restore[i]);
2811
2812 /* load indirect register */
2813 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2814 adev->gfx.rlc.reg_list_format_start);
2815
2816 /* direct register portion */
2817 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2818 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2819 register_list_format[i]);
2820
2821 /* indirect register portion */
2822 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2823 if (register_list_format[i] == 0xFFFFFFFF) {
2824 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2825 continue;
2826 }
2827
2828 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2829 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2830
2831 for (j = 0; j < unique_indirect_reg_count; j++) {
2832 if (register_list_format[i] == unique_indirect_regs[j]) {
2833 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2834 break;
2835 }
2836 }
2837
2838 BUG_ON(j >= unique_indirect_reg_count);
2839
2840 i++;
2841 }
2842
2843 /* set save/restore list size */
2844 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2845 list_size = list_size >> 1;
2846 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2847 adev->gfx.rlc.reg_restore_list_size);
2848 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2849
2850 /* write the starting offsets to RLC scratch ram */
2851 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2852 adev->gfx.rlc.starting_offsets_start);
2853 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2854 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2855 indirect_start_offsets[i]);
2856
2857 /* load unique indirect regs*/
2858 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2859 if (unique_indirect_regs[i] != 0) {
2860 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2861 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2862 unique_indirect_regs[i] & 0x3FFFF);
2863
2864 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2865 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2866 unique_indirect_regs[i] >> 20);
2867 }
2868 }
2869
2870 kfree(register_list_format);
2871 return 0;
2872 }
2873
2874 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2875 {
2876 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2877 }
2878
2879 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2880 bool enable)
2881 {
2882 uint32_t data = 0;
2883 uint32_t default_data = 0;
2884
2885 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2886 if (enable) {
2887 /* enable GFXIP control over CGPG */
2888 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2889 if(default_data != data)
2890 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2891
2892 /* update status */
2893 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2894 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2895 if(default_data != data)
2896 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2897 } else {
2898 /* restore GFXIP control over GCPG */
2899 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2900 if(default_data != data)
2901 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2902 }
2903 }
2904
2905 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2906 {
2907 uint32_t data = 0;
2908
2909 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2910 AMD_PG_SUPPORT_GFX_SMG |
2911 AMD_PG_SUPPORT_GFX_DMG)) {
2912 /* init IDLE_POLL_COUNT = 60 */
2913 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2914 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2915 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2916 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2917
2918 /* init RLC PG Delay */
2919 data = 0;
2920 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2921 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2922 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2923 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2924 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2925
2926 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2927 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2928 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2929 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2930
2931 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2932 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2933 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2934 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2935
2936 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2937 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2938
2939 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2940 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2941 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2942 if (adev->asic_type != CHIP_RENOIR)
2943 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2944 }
2945 }
2946
2947 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2948 bool enable)
2949 {
2950 uint32_t data = 0;
2951 uint32_t default_data = 0;
2952
2953 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2954 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2955 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2956 enable ? 1 : 0);
2957 if (default_data != data)
2958 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2959 }
2960
2961 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2962 bool enable)
2963 {
2964 uint32_t data = 0;
2965 uint32_t default_data = 0;
2966
2967 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2968 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2969 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2970 enable ? 1 : 0);
2971 if(default_data != data)
2972 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2973 }
2974
2975 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2976 bool enable)
2977 {
2978 uint32_t data = 0;
2979 uint32_t default_data = 0;
2980
2981 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2982 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2983 CP_PG_DISABLE,
2984 enable ? 0 : 1);
2985 if(default_data != data)
2986 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2987 }
2988
2989 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2990 bool enable)
2991 {
2992 uint32_t data, default_data;
2993
2994 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2995 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2996 GFX_POWER_GATING_ENABLE,
2997 enable ? 1 : 0);
2998 if(default_data != data)
2999 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3000 }
3001
3002 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3003 bool enable)
3004 {
3005 uint32_t data, default_data;
3006
3007 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3008 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3009 GFX_PIPELINE_PG_ENABLE,
3010 enable ? 1 : 0);
3011 if(default_data != data)
3012 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3013
3014 if (!enable)
3015 /* read any GFX register to wake up GFX */
3016 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3017 }
3018
3019 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3020 bool enable)
3021 {
3022 uint32_t data, default_data;
3023
3024 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3025 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3026 STATIC_PER_CU_PG_ENABLE,
3027 enable ? 1 : 0);
3028 if(default_data != data)
3029 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3030 }
3031
3032 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3033 bool enable)
3034 {
3035 uint32_t data, default_data;
3036
3037 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3038 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3039 DYN_PER_CU_PG_ENABLE,
3040 enable ? 1 : 0);
3041 if(default_data != data)
3042 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3043 }
3044
3045 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3046 {
3047 gfx_v9_0_init_csb(adev);
3048
3049 /*
3050 * Rlc save restore list is workable since v2_1.
3051 * And it's needed by gfxoff feature.
3052 */
3053 if (adev->gfx.rlc.is_rlc_v2_1) {
3054 if (adev->asic_type == CHIP_VEGA12 ||
3055 (adev->apu_flags & AMD_APU_IS_RAVEN2))
3056 gfx_v9_1_init_rlc_save_restore_list(adev);
3057 gfx_v9_0_enable_save_restore_machine(adev);
3058 }
3059
3060 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3061 AMD_PG_SUPPORT_GFX_SMG |
3062 AMD_PG_SUPPORT_GFX_DMG |
3063 AMD_PG_SUPPORT_CP |
3064 AMD_PG_SUPPORT_GDS |
3065 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3066 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3067 adev->gfx.rlc.cp_table_gpu_addr >> 8);
3068 gfx_v9_0_init_gfx_power_gating(adev);
3069 }
3070 }
3071
3072 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3073 {
3074 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3075 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3076 gfx_v9_0_wait_for_rlc_serdes(adev);
3077 }
3078
3079 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3080 {
3081 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3082 udelay(50);
3083 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3084 udelay(50);
3085 }
3086
3087 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3088 {
3089 #ifdef AMDGPU_RLC_DEBUG_RETRY
3090 u32 rlc_ucode_ver;
3091 #endif
3092
3093 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3094 udelay(50);
3095
3096 /* carrizo do enable cp interrupt after cp inited */
3097 if (!(adev->flags & AMD_IS_APU)) {
3098 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3099 udelay(50);
3100 }
3101
3102 #ifdef AMDGPU_RLC_DEBUG_RETRY
3103 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3104 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3105 if(rlc_ucode_ver == 0x108) {
3106 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3107 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3108 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3109 * default is 0x9C4 to create a 100us interval */
3110 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3111 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3112 * to disable the page fault retry interrupts, default is
3113 * 0x100 (256) */
3114 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3115 }
3116 #endif
3117 }
3118
3119 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3120 {
3121 const struct rlc_firmware_header_v2_0 *hdr;
3122 const __le32 *fw_data;
3123 unsigned i, fw_size;
3124
3125 if (!adev->gfx.rlc_fw)
3126 return -EINVAL;
3127
3128 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3129 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3130
3131 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3132 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3133 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3134
3135 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3136 RLCG_UCODE_LOADING_START_ADDRESS);
3137 for (i = 0; i < fw_size; i++)
3138 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3139 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3140
3141 return 0;
3142 }
3143
3144 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3145 {
3146 int r;
3147
3148 if (amdgpu_sriov_vf(adev)) {
3149 gfx_v9_0_init_csb(adev);
3150 return 0;
3151 }
3152
3153 adev->gfx.rlc.funcs->stop(adev);
3154
3155 /* disable CG */
3156 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3157
3158 gfx_v9_0_init_pg(adev);
3159
3160 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3161 /* legacy rlc firmware loading */
3162 r = gfx_v9_0_rlc_load_microcode(adev);
3163 if (r)
3164 return r;
3165 }
3166
3167 switch (adev->asic_type) {
3168 case CHIP_RAVEN:
3169 if (amdgpu_lbpw == 0)
3170 gfx_v9_0_enable_lbpw(adev, false);
3171 else
3172 gfx_v9_0_enable_lbpw(adev, true);
3173 break;
3174 case CHIP_VEGA20:
3175 if (amdgpu_lbpw > 0)
3176 gfx_v9_0_enable_lbpw(adev, true);
3177 else
3178 gfx_v9_0_enable_lbpw(adev, false);
3179 break;
3180 default:
3181 break;
3182 }
3183
3184 adev->gfx.rlc.funcs->start(adev);
3185
3186 return 0;
3187 }
3188
3189 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3190 {
3191 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3192
3193 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3194 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3195 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3196 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3197 udelay(50);
3198 }
3199
3200 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3201 {
3202 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3203 const struct gfx_firmware_header_v1_0 *ce_hdr;
3204 const struct gfx_firmware_header_v1_0 *me_hdr;
3205 const __le32 *fw_data;
3206 unsigned i, fw_size;
3207
3208 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3209 return -EINVAL;
3210
3211 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3212 adev->gfx.pfp_fw->data;
3213 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3214 adev->gfx.ce_fw->data;
3215 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3216 adev->gfx.me_fw->data;
3217
3218 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3219 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3220 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3221
3222 gfx_v9_0_cp_gfx_enable(adev, false);
3223
3224 /* PFP */
3225 fw_data = (const __le32 *)
3226 (adev->gfx.pfp_fw->data +
3227 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3228 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3229 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3230 for (i = 0; i < fw_size; i++)
3231 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3232 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3233
3234 /* CE */
3235 fw_data = (const __le32 *)
3236 (adev->gfx.ce_fw->data +
3237 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3238 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3239 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3240 for (i = 0; i < fw_size; i++)
3241 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3242 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3243
3244 /* ME */
3245 fw_data = (const __le32 *)
3246 (adev->gfx.me_fw->data +
3247 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3248 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3249 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3250 for (i = 0; i < fw_size; i++)
3251 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3252 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3253
3254 return 0;
3255 }
3256
3257 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3258 {
3259 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3260 const struct cs_section_def *sect = NULL;
3261 const struct cs_extent_def *ext = NULL;
3262 int r, i, tmp;
3263
3264 /* init the CP */
3265 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3266 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3267
3268 gfx_v9_0_cp_gfx_enable(adev, true);
3269
3270 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3271 if (r) {
3272 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3273 return r;
3274 }
3275
3276 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3277 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3278
3279 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3280 amdgpu_ring_write(ring, 0x80000000);
3281 amdgpu_ring_write(ring, 0x80000000);
3282
3283 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3284 for (ext = sect->section; ext->extent != NULL; ++ext) {
3285 if (sect->id == SECT_CONTEXT) {
3286 amdgpu_ring_write(ring,
3287 PACKET3(PACKET3_SET_CONTEXT_REG,
3288 ext->reg_count));
3289 amdgpu_ring_write(ring,
3290 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3291 for (i = 0; i < ext->reg_count; i++)
3292 amdgpu_ring_write(ring, ext->extent[i]);
3293 }
3294 }
3295 }
3296
3297 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3298 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3299
3300 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3301 amdgpu_ring_write(ring, 0);
3302
3303 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3304 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3305 amdgpu_ring_write(ring, 0x8000);
3306 amdgpu_ring_write(ring, 0x8000);
3307
3308 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3309 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3310 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3311 amdgpu_ring_write(ring, tmp);
3312 amdgpu_ring_write(ring, 0);
3313
3314 amdgpu_ring_commit(ring);
3315
3316 return 0;
3317 }
3318
3319 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3320 {
3321 struct amdgpu_ring *ring;
3322 u32 tmp;
3323 u32 rb_bufsz;
3324 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3325
3326 /* Set the write pointer delay */
3327 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3328
3329 /* set the RB to use vmid 0 */
3330 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3331
3332 /* Set ring buffer size */
3333 ring = &adev->gfx.gfx_ring[0];
3334 rb_bufsz = order_base_2(ring->ring_size / 8);
3335 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3336 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3337 #ifdef __BIG_ENDIAN
3338 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3339 #endif
3340 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3341
3342 /* Initialize the ring buffer's write pointers */
3343 ring->wptr = 0;
3344 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3345 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3346
3347 /* set the wb address wether it's enabled or not */
3348 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3349 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3350 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3351
3352 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3353 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3354 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3355
3356 mdelay(1);
3357 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3358
3359 rb_addr = ring->gpu_addr >> 8;
3360 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3361 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3362
3363 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3364 if (ring->use_doorbell) {
3365 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3366 DOORBELL_OFFSET, ring->doorbell_index);
3367 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3368 DOORBELL_EN, 1);
3369 } else {
3370 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3371 }
3372 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3373
3374 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3375 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3376 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3377
3378 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3379 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3380
3381
3382 /* start the ring */
3383 gfx_v9_0_cp_gfx_start(adev);
3384 ring->sched.ready = true;
3385
3386 return 0;
3387 }
3388
3389 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3390 {
3391 if (enable) {
3392 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3393 } else {
3394 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3395 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3396 adev->gfx.kiq.ring.sched.ready = false;
3397 }
3398 udelay(50);
3399 }
3400
3401 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3402 {
3403 const struct gfx_firmware_header_v1_0 *mec_hdr;
3404 const __le32 *fw_data;
3405 unsigned i;
3406 u32 tmp;
3407
3408 if (!adev->gfx.mec_fw)
3409 return -EINVAL;
3410
3411 gfx_v9_0_cp_compute_enable(adev, false);
3412
3413 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3414 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3415
3416 fw_data = (const __le32 *)
3417 (adev->gfx.mec_fw->data +
3418 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3419 tmp = 0;
3420 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3421 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3422 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3423
3424 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3425 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3426 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3427 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3428
3429 /* MEC1 */
3430 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3431 mec_hdr->jt_offset);
3432 for (i = 0; i < mec_hdr->jt_size; i++)
3433 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3434 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3435
3436 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3437 adev->gfx.mec_fw_version);
3438 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3439
3440 return 0;
3441 }
3442
3443 /* KIQ functions */
3444 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3445 {
3446 uint32_t tmp;
3447 struct amdgpu_device *adev = ring->adev;
3448
3449 /* tell RLC which is KIQ queue */
3450 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3451 tmp &= 0xffffff00;
3452 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3453 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3454 tmp |= 0x80;
3455 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3456 }
3457
3458 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3459 {
3460 struct amdgpu_device *adev = ring->adev;
3461
3462 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3463 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3464 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3465 mqd->cp_hqd_queue_priority =
3466 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3467 }
3468 }
3469 }
3470
3471 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3472 {
3473 struct amdgpu_device *adev = ring->adev;
3474 struct v9_mqd *mqd = ring->mqd_ptr;
3475 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3476 uint32_t tmp;
3477
3478 mqd->header = 0xC0310800;
3479 mqd->compute_pipelinestat_enable = 0x00000001;
3480 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3481 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3482 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3483 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3484 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3485 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3486 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3487 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3488 mqd->compute_misc_reserved = 0x00000003;
3489
3490 mqd->dynamic_cu_mask_addr_lo =
3491 lower_32_bits(ring->mqd_gpu_addr
3492 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3493 mqd->dynamic_cu_mask_addr_hi =
3494 upper_32_bits(ring->mqd_gpu_addr
3495 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3496
3497 eop_base_addr = ring->eop_gpu_addr >> 8;
3498 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3499 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3500
3501 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3502 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3503 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3504 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3505
3506 mqd->cp_hqd_eop_control = tmp;
3507
3508 /* enable doorbell? */
3509 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3510
3511 if (ring->use_doorbell) {
3512 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3513 DOORBELL_OFFSET, ring->doorbell_index);
3514 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3515 DOORBELL_EN, 1);
3516 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3517 DOORBELL_SOURCE, 0);
3518 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3519 DOORBELL_HIT, 0);
3520 } else {
3521 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3522 DOORBELL_EN, 0);
3523 }
3524
3525 mqd->cp_hqd_pq_doorbell_control = tmp;
3526
3527 /* disable the queue if it's active */
3528 ring->wptr = 0;
3529 mqd->cp_hqd_dequeue_request = 0;
3530 mqd->cp_hqd_pq_rptr = 0;
3531 mqd->cp_hqd_pq_wptr_lo = 0;
3532 mqd->cp_hqd_pq_wptr_hi = 0;
3533
3534 /* set the pointer to the MQD */
3535 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3536 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3537
3538 /* set MQD vmid to 0 */
3539 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3540 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3541 mqd->cp_mqd_control = tmp;
3542
3543 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3544 hqd_gpu_addr = ring->gpu_addr >> 8;
3545 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3546 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3547
3548 /* set up the HQD, this is similar to CP_RB0_CNTL */
3549 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3550 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3551 (order_base_2(ring->ring_size / 4) - 1));
3552 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3553 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3554 #ifdef __BIG_ENDIAN
3555 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3556 #endif
3557 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3558 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3559 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3560 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3561 mqd->cp_hqd_pq_control = tmp;
3562
3563 /* set the wb address whether it's enabled or not */
3564 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3565 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3566 mqd->cp_hqd_pq_rptr_report_addr_hi =
3567 upper_32_bits(wb_gpu_addr) & 0xffff;
3568
3569 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3570 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3571 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3572 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3573
3574 tmp = 0;
3575 /* enable the doorbell if requested */
3576 if (ring->use_doorbell) {
3577 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3578 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3579 DOORBELL_OFFSET, ring->doorbell_index);
3580
3581 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3582 DOORBELL_EN, 1);
3583 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3584 DOORBELL_SOURCE, 0);
3585 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3586 DOORBELL_HIT, 0);
3587 }
3588
3589 mqd->cp_hqd_pq_doorbell_control = tmp;
3590
3591 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3592 ring->wptr = 0;
3593 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3594
3595 /* set the vmid for the queue */
3596 mqd->cp_hqd_vmid = 0;
3597
3598 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3599 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3600 mqd->cp_hqd_persistent_state = tmp;
3601
3602 /* set MIN_IB_AVAIL_SIZE */
3603 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3604 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3605 mqd->cp_hqd_ib_control = tmp;
3606
3607 /* set static priority for a queue/ring */
3608 gfx_v9_0_mqd_set_priority(ring, mqd);
3609 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3610
3611 /* map_queues packet doesn't need activate the queue,
3612 * so only kiq need set this field.
3613 */
3614 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3615 mqd->cp_hqd_active = 1;
3616
3617 return 0;
3618 }
3619
3620 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3621 {
3622 struct amdgpu_device *adev = ring->adev;
3623 struct v9_mqd *mqd = ring->mqd_ptr;
3624 int j;
3625
3626 /* disable wptr polling */
3627 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3628
3629 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3630 mqd->cp_hqd_eop_base_addr_lo);
3631 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3632 mqd->cp_hqd_eop_base_addr_hi);
3633
3634 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3635 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3636 mqd->cp_hqd_eop_control);
3637
3638 /* enable doorbell? */
3639 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3640 mqd->cp_hqd_pq_doorbell_control);
3641
3642 /* disable the queue if it's active */
3643 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3644 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3645 for (j = 0; j < adev->usec_timeout; j++) {
3646 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3647 break;
3648 udelay(1);
3649 }
3650 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3651 mqd->cp_hqd_dequeue_request);
3652 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3653 mqd->cp_hqd_pq_rptr);
3654 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3655 mqd->cp_hqd_pq_wptr_lo);
3656 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3657 mqd->cp_hqd_pq_wptr_hi);
3658 }
3659
3660 /* set the pointer to the MQD */
3661 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3662 mqd->cp_mqd_base_addr_lo);
3663 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3664 mqd->cp_mqd_base_addr_hi);
3665
3666 /* set MQD vmid to 0 */
3667 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3668 mqd->cp_mqd_control);
3669
3670 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3671 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3672 mqd->cp_hqd_pq_base_lo);
3673 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3674 mqd->cp_hqd_pq_base_hi);
3675
3676 /* set up the HQD, this is similar to CP_RB0_CNTL */
3677 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3678 mqd->cp_hqd_pq_control);
3679
3680 /* set the wb address whether it's enabled or not */
3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3682 mqd->cp_hqd_pq_rptr_report_addr_lo);
3683 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3684 mqd->cp_hqd_pq_rptr_report_addr_hi);
3685
3686 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3687 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3688 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3689 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3690 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3691
3692 /* enable the doorbell if requested */
3693 if (ring->use_doorbell) {
3694 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3695 (adev->doorbell_index.kiq * 2) << 2);
3696 /* If GC has entered CGPG, ringing doorbell > first page
3697 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3698 * workaround this issue. And this change has to align with firmware
3699 * update.
3700 */
3701 if (check_if_enlarge_doorbell_range(adev))
3702 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3703 (adev->doorbell.size - 4));
3704 else
3705 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3706 (adev->doorbell_index.userqueue_end * 2) << 2);
3707 }
3708
3709 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3710 mqd->cp_hqd_pq_doorbell_control);
3711
3712 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3713 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3714 mqd->cp_hqd_pq_wptr_lo);
3715 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3716 mqd->cp_hqd_pq_wptr_hi);
3717
3718 /* set the vmid for the queue */
3719 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3720
3721 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3722 mqd->cp_hqd_persistent_state);
3723
3724 /* activate the queue */
3725 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3726 mqd->cp_hqd_active);
3727
3728 if (ring->use_doorbell)
3729 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3730
3731 return 0;
3732 }
3733
3734 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3735 {
3736 struct amdgpu_device *adev = ring->adev;
3737 int j;
3738
3739 /* disable the queue if it's active */
3740 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3741
3742 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3743
3744 for (j = 0; j < adev->usec_timeout; j++) {
3745 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3746 break;
3747 udelay(1);
3748 }
3749
3750 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3751 DRM_DEBUG("KIQ dequeue request failed.\n");
3752
3753 /* Manual disable if dequeue request times out */
3754 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3755 }
3756
3757 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3758 0);
3759 }
3760
3761 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3762 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3763 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3764 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3765 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3766 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3767 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3768 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3769
3770 return 0;
3771 }
3772
3773 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3774 {
3775 struct amdgpu_device *adev = ring->adev;
3776 struct v9_mqd *mqd = ring->mqd_ptr;
3777 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3778 struct v9_mqd *tmp_mqd;
3779
3780 gfx_v9_0_kiq_setting(ring);
3781
3782 /* GPU could be in bad state during probe, driver trigger the reset
3783 * after load the SMU, in this case , the mqd is not be initialized.
3784 * driver need to re-init the mqd.
3785 * check mqd->cp_hqd_pq_control since this value should not be 0
3786 */
3787 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3788 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3789 /* for GPU_RESET case , reset MQD to a clean status */
3790 if (adev->gfx.mec.mqd_backup[mqd_idx])
3791 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3792
3793 /* reset ring buffer */
3794 ring->wptr = 0;
3795 amdgpu_ring_clear_ring(ring);
3796
3797 mutex_lock(&adev->srbm_mutex);
3798 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3799 gfx_v9_0_kiq_init_register(ring);
3800 soc15_grbm_select(adev, 0, 0, 0, 0);
3801 mutex_unlock(&adev->srbm_mutex);
3802 } else {
3803 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3804 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3805 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3806 mutex_lock(&adev->srbm_mutex);
3807 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3808 gfx_v9_0_mqd_init(ring);
3809 gfx_v9_0_kiq_init_register(ring);
3810 soc15_grbm_select(adev, 0, 0, 0, 0);
3811 mutex_unlock(&adev->srbm_mutex);
3812
3813 if (adev->gfx.mec.mqd_backup[mqd_idx])
3814 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3815 }
3816
3817 return 0;
3818 }
3819
3820 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3821 {
3822 struct amdgpu_device *adev = ring->adev;
3823 struct v9_mqd *mqd = ring->mqd_ptr;
3824 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3825 struct v9_mqd *tmp_mqd;
3826
3827 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3828 * is not be initialized before
3829 */
3830 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3831
3832 if (!tmp_mqd->cp_hqd_pq_control ||
3833 (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3834 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3835 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3836 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3837 mutex_lock(&adev->srbm_mutex);
3838 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3839 gfx_v9_0_mqd_init(ring);
3840 soc15_grbm_select(adev, 0, 0, 0, 0);
3841 mutex_unlock(&adev->srbm_mutex);
3842
3843 if (adev->gfx.mec.mqd_backup[mqd_idx])
3844 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3845 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3846 /* reset MQD to a clean status */
3847 if (adev->gfx.mec.mqd_backup[mqd_idx])
3848 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3849
3850 /* reset ring buffer */
3851 ring->wptr = 0;
3852 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3853 amdgpu_ring_clear_ring(ring);
3854 } else {
3855 amdgpu_ring_clear_ring(ring);
3856 }
3857
3858 return 0;
3859 }
3860
3861 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3862 {
3863 struct amdgpu_ring *ring;
3864 int r;
3865
3866 ring = &adev->gfx.kiq.ring;
3867
3868 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3869 if (unlikely(r != 0))
3870 return r;
3871
3872 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3873 if (unlikely(r != 0))
3874 return r;
3875
3876 gfx_v9_0_kiq_init_queue(ring);
3877 amdgpu_bo_kunmap(ring->mqd_obj);
3878 ring->mqd_ptr = NULL;
3879 amdgpu_bo_unreserve(ring->mqd_obj);
3880 ring->sched.ready = true;
3881 return 0;
3882 }
3883
3884 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3885 {
3886 struct amdgpu_ring *ring = NULL;
3887 int r = 0, i;
3888
3889 gfx_v9_0_cp_compute_enable(adev, true);
3890
3891 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3892 ring = &adev->gfx.compute_ring[i];
3893
3894 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3895 if (unlikely(r != 0))
3896 goto done;
3897 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3898 if (!r) {
3899 r = gfx_v9_0_kcq_init_queue(ring);
3900 amdgpu_bo_kunmap(ring->mqd_obj);
3901 ring->mqd_ptr = NULL;
3902 }
3903 amdgpu_bo_unreserve(ring->mqd_obj);
3904 if (r)
3905 goto done;
3906 }
3907
3908 r = amdgpu_gfx_enable_kcq(adev);
3909 done:
3910 return r;
3911 }
3912
3913 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3914 {
3915 int r, i;
3916 struct amdgpu_ring *ring;
3917
3918 if (!(adev->flags & AMD_IS_APU))
3919 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3920
3921 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3922 if (adev->gfx.num_gfx_rings) {
3923 /* legacy firmware loading */
3924 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3925 if (r)
3926 return r;
3927 }
3928
3929 r = gfx_v9_0_cp_compute_load_microcode(adev);
3930 if (r)
3931 return r;
3932 }
3933
3934 r = gfx_v9_0_kiq_resume(adev);
3935 if (r)
3936 return r;
3937
3938 if (adev->gfx.num_gfx_rings) {
3939 r = gfx_v9_0_cp_gfx_resume(adev);
3940 if (r)
3941 return r;
3942 }
3943
3944 r = gfx_v9_0_kcq_resume(adev);
3945 if (r)
3946 return r;
3947
3948 if (adev->gfx.num_gfx_rings) {
3949 ring = &adev->gfx.gfx_ring[0];
3950 r = amdgpu_ring_test_helper(ring);
3951 if (r)
3952 return r;
3953 }
3954
3955 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3956 ring = &adev->gfx.compute_ring[i];
3957 amdgpu_ring_test_helper(ring);
3958 }
3959
3960 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3961
3962 return 0;
3963 }
3964
3965 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3966 {
3967 u32 tmp;
3968
3969 if (adev->asic_type != CHIP_ARCTURUS &&
3970 adev->asic_type != CHIP_ALDEBARAN)
3971 return;
3972
3973 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3974 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3975 adev->df.hash_status.hash_64k);
3976 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3977 adev->df.hash_status.hash_2m);
3978 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3979 adev->df.hash_status.hash_1g);
3980 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3981 }
3982
3983 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3984 {
3985 if (adev->gfx.num_gfx_rings)
3986 gfx_v9_0_cp_gfx_enable(adev, enable);
3987 gfx_v9_0_cp_compute_enable(adev, enable);
3988 }
3989
3990 static int gfx_v9_0_hw_init(void *handle)
3991 {
3992 int r;
3993 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3994
3995 if (!amdgpu_sriov_vf(adev))
3996 gfx_v9_0_init_golden_registers(adev);
3997
3998 gfx_v9_0_constants_init(adev);
3999
4000 gfx_v9_0_init_tcp_config(adev);
4001
4002 r = adev->gfx.rlc.funcs->resume(adev);
4003 if (r)
4004 return r;
4005
4006 r = gfx_v9_0_cp_resume(adev);
4007 if (r)
4008 return r;
4009
4010 if (adev->asic_type == CHIP_ALDEBARAN)
4011 gfx_v9_4_2_set_power_brake_sequence(adev);
4012
4013 return r;
4014 }
4015
4016 static int gfx_v9_0_hw_fini(void *handle)
4017 {
4018 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4019
4020 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4021 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4022 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4023
4024 /* DF freeze and kcq disable will fail */
4025 if (!amdgpu_ras_intr_triggered())
4026 /* disable KCQ to avoid CPC touch memory not valid anymore */
4027 amdgpu_gfx_disable_kcq(adev);
4028
4029 if (amdgpu_sriov_vf(adev)) {
4030 gfx_v9_0_cp_gfx_enable(adev, false);
4031 /* must disable polling for SRIOV when hw finished, otherwise
4032 * CPC engine may still keep fetching WB address which is already
4033 * invalid after sw finished and trigger DMAR reading error in
4034 * hypervisor side.
4035 */
4036 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4037 return 0;
4038 }
4039
4040 /* Use deinitialize sequence from CAIL when unbinding device from driver,
4041 * otherwise KIQ is hanging when binding back
4042 */
4043 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4044 mutex_lock(&adev->srbm_mutex);
4045 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4046 adev->gfx.kiq.ring.pipe,
4047 adev->gfx.kiq.ring.queue, 0);
4048 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4049 soc15_grbm_select(adev, 0, 0, 0, 0);
4050 mutex_unlock(&adev->srbm_mutex);
4051 }
4052
4053 gfx_v9_0_cp_enable(adev, false);
4054
4055 /* Skip suspend with A+A reset */
4056 if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
4057 dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
4058 return 0;
4059 }
4060
4061 adev->gfx.rlc.funcs->stop(adev);
4062 return 0;
4063 }
4064
4065 static int gfx_v9_0_suspend(void *handle)
4066 {
4067 return gfx_v9_0_hw_fini(handle);
4068 }
4069
4070 static int gfx_v9_0_resume(void *handle)
4071 {
4072 return gfx_v9_0_hw_init(handle);
4073 }
4074
4075 static bool gfx_v9_0_is_idle(void *handle)
4076 {
4077 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4078
4079 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4080 GRBM_STATUS, GUI_ACTIVE))
4081 return false;
4082 else
4083 return true;
4084 }
4085
4086 static int gfx_v9_0_wait_for_idle(void *handle)
4087 {
4088 unsigned i;
4089 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4090
4091 for (i = 0; i < adev->usec_timeout; i++) {
4092 if (gfx_v9_0_is_idle(handle))
4093 return 0;
4094 udelay(1);
4095 }
4096 return -ETIMEDOUT;
4097 }
4098
4099 static int gfx_v9_0_soft_reset(void *handle)
4100 {
4101 u32 grbm_soft_reset = 0;
4102 u32 tmp;
4103 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4104
4105 /* GRBM_STATUS */
4106 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4107 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4108 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4109 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4110 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4111 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4112 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4113 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4114 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4115 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4116 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4117 }
4118
4119 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4120 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4121 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4122 }
4123
4124 /* GRBM_STATUS2 */
4125 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4126 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4127 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4128 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4129
4130
4131 if (grbm_soft_reset) {
4132 /* stop the rlc */
4133 adev->gfx.rlc.funcs->stop(adev);
4134
4135 if (adev->gfx.num_gfx_rings)
4136 /* Disable GFX parsing/prefetching */
4137 gfx_v9_0_cp_gfx_enable(adev, false);
4138
4139 /* Disable MEC parsing/prefetching */
4140 gfx_v9_0_cp_compute_enable(adev, false);
4141
4142 if (grbm_soft_reset) {
4143 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4144 tmp |= grbm_soft_reset;
4145 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4146 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4147 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4148
4149 udelay(50);
4150
4151 tmp &= ~grbm_soft_reset;
4152 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4153 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4154 }
4155
4156 /* Wait a little for things to settle down */
4157 udelay(50);
4158 }
4159 return 0;
4160 }
4161
4162 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4163 {
4164 signed long r, cnt = 0;
4165 unsigned long flags;
4166 uint32_t seq, reg_val_offs = 0;
4167 uint64_t value = 0;
4168 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4169 struct amdgpu_ring *ring = &kiq->ring;
4170
4171 BUG_ON(!ring->funcs->emit_rreg);
4172
4173 spin_lock_irqsave(&kiq->ring_lock, flags);
4174 if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4175 pr_err("critical bug! too many kiq readers\n");
4176 goto failed_unlock;
4177 }
4178 amdgpu_ring_alloc(ring, 32);
4179 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4180 amdgpu_ring_write(ring, 9 | /* src: register*/
4181 (5 << 8) | /* dst: memory */
4182 (1 << 16) | /* count sel */
4183 (1 << 20)); /* write confirm */
4184 amdgpu_ring_write(ring, 0);
4185 amdgpu_ring_write(ring, 0);
4186 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4187 reg_val_offs * 4));
4188 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4189 reg_val_offs * 4));
4190 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4191 if (r)
4192 goto failed_undo;
4193
4194 amdgpu_ring_commit(ring);
4195 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4196
4197 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4198
4199 /* don't wait anymore for gpu reset case because this way may
4200 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4201 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4202 * never return if we keep waiting in virt_kiq_rreg, which cause
4203 * gpu_recover() hang there.
4204 *
4205 * also don't wait anymore for IRQ context
4206 * */
4207 if (r < 1 && (amdgpu_in_reset(adev)))
4208 goto failed_kiq_read;
4209
4210 might_sleep();
4211 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4212 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4213 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4214 }
4215
4216 if (cnt > MAX_KIQ_REG_TRY)
4217 goto failed_kiq_read;
4218
4219 mb();
4220 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4221 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4222 amdgpu_device_wb_free(adev, reg_val_offs);
4223 return value;
4224
4225 failed_undo:
4226 amdgpu_ring_undo(ring);
4227 failed_unlock:
4228 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4229 failed_kiq_read:
4230 if (reg_val_offs)
4231 amdgpu_device_wb_free(adev, reg_val_offs);
4232 pr_err("failed to read gpu clock\n");
4233 return ~0;
4234 }
4235
4236 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4237 {
4238 uint64_t clock, clock_lo, clock_hi, hi_check;
4239
4240 switch (adev->asic_type) {
4241 case CHIP_RENOIR:
4242 preempt_disable();
4243 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4244 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4245 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4246 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4247 * roughly every 42 seconds.
4248 */
4249 if (hi_check != clock_hi) {
4250 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4251 clock_hi = hi_check;
4252 }
4253 preempt_enable();
4254 clock = clock_lo | (clock_hi << 32ULL);
4255 break;
4256 default:
4257 amdgpu_gfx_off_ctrl(adev, false);
4258 mutex_lock(&adev->gfx.gpu_clock_mutex);
4259 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4260 clock = gfx_v9_0_kiq_read_clock(adev);
4261 } else {
4262 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4263 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4264 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4265 }
4266 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4267 amdgpu_gfx_off_ctrl(adev, true);
4268 break;
4269 }
4270 return clock;
4271 }
4272
4273 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4274 uint32_t vmid,
4275 uint32_t gds_base, uint32_t gds_size,
4276 uint32_t gws_base, uint32_t gws_size,
4277 uint32_t oa_base, uint32_t oa_size)
4278 {
4279 struct amdgpu_device *adev = ring->adev;
4280
4281 /* GDS Base */
4282 gfx_v9_0_write_data_to_reg(ring, 0, false,
4283 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4284 gds_base);
4285
4286 /* GDS Size */
4287 gfx_v9_0_write_data_to_reg(ring, 0, false,
4288 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4289 gds_size);
4290
4291 /* GWS */
4292 gfx_v9_0_write_data_to_reg(ring, 0, false,
4293 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4294 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4295
4296 /* OA */
4297 gfx_v9_0_write_data_to_reg(ring, 0, false,
4298 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4299 (1 << (oa_size + oa_base)) - (1 << oa_base));
4300 }
4301
4302 static const u32 vgpr_init_compute_shader[] =
4303 {
4304 0xb07c0000, 0xbe8000ff,
4305 0x000000f8, 0xbf110800,
4306 0x7e000280, 0x7e020280,
4307 0x7e040280, 0x7e060280,
4308 0x7e080280, 0x7e0a0280,
4309 0x7e0c0280, 0x7e0e0280,
4310 0x80808800, 0xbe803200,
4311 0xbf84fff5, 0xbf9c0000,
4312 0xd28c0001, 0x0001007f,
4313 0xd28d0001, 0x0002027e,
4314 0x10020288, 0xb8810904,
4315 0xb7814000, 0xd1196a01,
4316 0x00000301, 0xbe800087,
4317 0xbefc00c1, 0xd89c4000,
4318 0x00020201, 0xd89cc080,
4319 0x00040401, 0x320202ff,
4320 0x00000800, 0x80808100,
4321 0xbf84fff8, 0x7e020280,
4322 0xbf810000, 0x00000000,
4323 };
4324
4325 static const u32 sgpr_init_compute_shader[] =
4326 {
4327 0xb07c0000, 0xbe8000ff,
4328 0x0000005f, 0xbee50080,
4329 0xbe812c65, 0xbe822c65,
4330 0xbe832c65, 0xbe842c65,
4331 0xbe852c65, 0xb77c0005,
4332 0x80808500, 0xbf84fff8,
4333 0xbe800080, 0xbf810000,
4334 };
4335
4336 static const u32 vgpr_init_compute_shader_arcturus[] = {
4337 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4338 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4339 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4340 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4341 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4342 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4343 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4344 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4345 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4346 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4347 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4348 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4349 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4350 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4351 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4352 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4353 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4354 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4355 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4356 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4357 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4358 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4359 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4360 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4361 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4362 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4363 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4364 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4365 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4366 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4367 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4368 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4369 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4370 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4371 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4372 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4373 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4374 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4375 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4376 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4377 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4378 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4379 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4380 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4381 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4382 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4383 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4384 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4385 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4386 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4387 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4388 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4389 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4390 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4391 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4392 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4393 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4394 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4395 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4396 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4397 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4398 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4399 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4400 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4401 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4402 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4403 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4404 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4405 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4406 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4407 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4408 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4409 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4410 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4411 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4412 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4413 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4414 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4415 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4416 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4417 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4418 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4419 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4420 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4421 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4422 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4423 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4424 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4425 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4426 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4427 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4428 0xbf84fff8, 0xbf810000,
4429 };
4430
4431 /* When below register arrays changed, please update gpr_reg_size,
4432 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4433 to cover all gfx9 ASICs */
4434 static const struct soc15_reg_entry vgpr_init_regs[] = {
4435 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4436 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4437 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4438 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4439 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4440 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4448 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4449 };
4450
4451 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4453 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4454 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4455 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4456 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4457 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4458 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4459 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4460 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4461 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4462 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4463 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4464 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4465 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4466 };
4467
4468 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4469 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4470 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4471 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4472 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4473 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4474 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4475 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4476 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4477 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4478 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4483 };
4484
4485 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4489 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4490 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4491 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4492 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4493 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4494 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4495 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4496 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4497 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4498 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4499 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4500 };
4501
4502 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4503 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4504 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4505 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4506 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4507 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4508 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4509 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4510 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4511 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4512 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4513 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4514 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4515 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4516 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4517 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4518 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4519 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4520 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4521 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4522 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4523 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4524 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4525 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4526 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4527 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4528 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4529 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4530 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4531 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4532 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4533 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4534 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4535 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4536 };
4537
4538 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4539 {
4540 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4541 int i, r;
4542
4543 /* only support when RAS is enabled */
4544 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4545 return 0;
4546
4547 r = amdgpu_ring_alloc(ring, 7);
4548 if (r) {
4549 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4550 ring->name, r);
4551 return r;
4552 }
4553
4554 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4555 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4556
4557 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4558 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4559 PACKET3_DMA_DATA_DST_SEL(1) |
4560 PACKET3_DMA_DATA_SRC_SEL(2) |
4561 PACKET3_DMA_DATA_ENGINE(0)));
4562 amdgpu_ring_write(ring, 0);
4563 amdgpu_ring_write(ring, 0);
4564 amdgpu_ring_write(ring, 0);
4565 amdgpu_ring_write(ring, 0);
4566 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4567 adev->gds.gds_size);
4568
4569 amdgpu_ring_commit(ring);
4570
4571 for (i = 0; i < adev->usec_timeout; i++) {
4572 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4573 break;
4574 udelay(1);
4575 }
4576
4577 if (i >= adev->usec_timeout)
4578 r = -ETIMEDOUT;
4579
4580 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4581
4582 return r;
4583 }
4584
4585 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4586 {
4587 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4588 struct amdgpu_ib ib;
4589 struct dma_fence *f = NULL;
4590 int r, i;
4591 unsigned total_size, vgpr_offset, sgpr_offset;
4592 u64 gpu_addr;
4593
4594 int compute_dim_x = adev->gfx.config.max_shader_engines *
4595 adev->gfx.config.max_cu_per_sh *
4596 adev->gfx.config.max_sh_per_se;
4597 int sgpr_work_group_size = 5;
4598 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4599 int vgpr_init_shader_size;
4600 const u32 *vgpr_init_shader_ptr;
4601 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4602
4603 /* only support when RAS is enabled */
4604 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4605 return 0;
4606
4607 /* bail if the compute ring is not ready */
4608 if (!ring->sched.ready)
4609 return 0;
4610
4611 if (adev->asic_type == CHIP_ARCTURUS) {
4612 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4613 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4614 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4615 } else {
4616 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4617 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4618 vgpr_init_regs_ptr = vgpr_init_regs;
4619 }
4620
4621 total_size =
4622 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4623 total_size +=
4624 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4625 total_size +=
4626 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4627 total_size = ALIGN(total_size, 256);
4628 vgpr_offset = total_size;
4629 total_size += ALIGN(vgpr_init_shader_size, 256);
4630 sgpr_offset = total_size;
4631 total_size += sizeof(sgpr_init_compute_shader);
4632
4633 /* allocate an indirect buffer to put the commands in */
4634 memset(&ib, 0, sizeof(ib));
4635 r = amdgpu_ib_get(adev, NULL, total_size,
4636 AMDGPU_IB_POOL_DIRECT, &ib);
4637 if (r) {
4638 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4639 return r;
4640 }
4641
4642 /* load the compute shaders */
4643 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4644 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4645
4646 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4647 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4648
4649 /* init the ib length to 0 */
4650 ib.length_dw = 0;
4651
4652 /* VGPR */
4653 /* write the register state for the compute dispatch */
4654 for (i = 0; i < gpr_reg_size; i++) {
4655 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4656 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4657 - PACKET3_SET_SH_REG_START;
4658 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4659 }
4660 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4661 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4662 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4663 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4664 - PACKET3_SET_SH_REG_START;
4665 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4666 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4667
4668 /* write dispatch packet */
4669 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4670 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4671 ib.ptr[ib.length_dw++] = 1; /* y */
4672 ib.ptr[ib.length_dw++] = 1; /* z */
4673 ib.ptr[ib.length_dw++] =
4674 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4675
4676 /* write CS partial flush packet */
4677 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4678 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4679
4680 /* SGPR1 */
4681 /* write the register state for the compute dispatch */
4682 for (i = 0; i < gpr_reg_size; i++) {
4683 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4684 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4685 - PACKET3_SET_SH_REG_START;
4686 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4687 }
4688 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4689 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4690 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4691 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4692 - PACKET3_SET_SH_REG_START;
4693 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4694 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4695
4696 /* write dispatch packet */
4697 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4698 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4699 ib.ptr[ib.length_dw++] = 1; /* y */
4700 ib.ptr[ib.length_dw++] = 1; /* z */
4701 ib.ptr[ib.length_dw++] =
4702 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4703
4704 /* write CS partial flush packet */
4705 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4706 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4707
4708 /* SGPR2 */
4709 /* write the register state for the compute dispatch */
4710 for (i = 0; i < gpr_reg_size; i++) {
4711 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4712 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4713 - PACKET3_SET_SH_REG_START;
4714 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4715 }
4716 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4717 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4719 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4720 - PACKET3_SET_SH_REG_START;
4721 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4722 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4723
4724 /* write dispatch packet */
4725 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4726 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4727 ib.ptr[ib.length_dw++] = 1; /* y */
4728 ib.ptr[ib.length_dw++] = 1; /* z */
4729 ib.ptr[ib.length_dw++] =
4730 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4731
4732 /* write CS partial flush packet */
4733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4734 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4735
4736 /* shedule the ib on the ring */
4737 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4738 if (r) {
4739 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4740 goto fail;
4741 }
4742
4743 /* wait for the GPU to finish processing the IB */
4744 r = dma_fence_wait(f, false);
4745 if (r) {
4746 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4747 goto fail;
4748 }
4749
4750 fail:
4751 amdgpu_ib_free(adev, &ib, NULL);
4752 dma_fence_put(f);
4753
4754 return r;
4755 }
4756
4757 static int gfx_v9_0_early_init(void *handle)
4758 {
4759 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4760
4761 if (adev->asic_type == CHIP_ARCTURUS ||
4762 adev->asic_type == CHIP_ALDEBARAN)
4763 adev->gfx.num_gfx_rings = 0;
4764 else
4765 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4766 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4767 AMDGPU_MAX_COMPUTE_RINGS);
4768 gfx_v9_0_set_kiq_pm4_funcs(adev);
4769 gfx_v9_0_set_ring_funcs(adev);
4770 gfx_v9_0_set_irq_funcs(adev);
4771 gfx_v9_0_set_gds_init(adev);
4772 gfx_v9_0_set_rlc_funcs(adev);
4773
4774 return 0;
4775 }
4776
4777 static int gfx_v9_0_ecc_late_init(void *handle)
4778 {
4779 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4780 int r;
4781
4782 /*
4783 * Temp workaround to fix the issue that CP firmware fails to
4784 * update read pointer when CPDMA is writing clearing operation
4785 * to GDS in suspend/resume sequence on several cards. So just
4786 * limit this operation in cold boot sequence.
4787 */
4788 if ((!adev->in_suspend) &&
4789 (adev->gds.gds_size)) {
4790 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4791 if (r)
4792 return r;
4793 }
4794
4795 /* requires IBs so do in late init after IB pool is initialized */
4796 if (adev->asic_type == CHIP_ALDEBARAN)
4797 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4798 else
4799 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4800
4801 if (r)
4802 return r;
4803
4804 if (adev->gfx.ras_funcs &&
4805 adev->gfx.ras_funcs->ras_late_init) {
4806 r = adev->gfx.ras_funcs->ras_late_init(adev);
4807 if (r)
4808 return r;
4809 }
4810
4811 if (adev->gfx.ras_funcs &&
4812 adev->gfx.ras_funcs->enable_watchdog_timer)
4813 adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4814
4815 return 0;
4816 }
4817
4818 static int gfx_v9_0_late_init(void *handle)
4819 {
4820 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4821 int r;
4822
4823 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4824 if (r)
4825 return r;
4826
4827 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4828 if (r)
4829 return r;
4830
4831 r = gfx_v9_0_ecc_late_init(handle);
4832 if (r)
4833 return r;
4834
4835 return 0;
4836 }
4837
4838 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4839 {
4840 uint32_t rlc_setting;
4841
4842 /* if RLC is not enabled, do nothing */
4843 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4844 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4845 return false;
4846
4847 return true;
4848 }
4849
4850 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4851 {
4852 uint32_t data;
4853 unsigned i;
4854
4855 data = RLC_SAFE_MODE__CMD_MASK;
4856 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4857 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4858
4859 /* wait for RLC_SAFE_MODE */
4860 for (i = 0; i < adev->usec_timeout; i++) {
4861 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4862 break;
4863 udelay(1);
4864 }
4865 }
4866
4867 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4868 {
4869 uint32_t data;
4870
4871 data = RLC_SAFE_MODE__CMD_MASK;
4872 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4873 }
4874
4875 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4876 bool enable)
4877 {
4878 amdgpu_gfx_rlc_enter_safe_mode(adev);
4879
4880 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4881 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4882 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4883 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4884 } else {
4885 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4886 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4887 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4888 }
4889
4890 amdgpu_gfx_rlc_exit_safe_mode(adev);
4891 }
4892
4893 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4894 bool enable)
4895 {
4896 /* TODO: double check if we need to perform under safe mode */
4897 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4898
4899 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4900 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4901 else
4902 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4903
4904 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4905 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4906 else
4907 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4908
4909 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4910 }
4911
4912 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4913 bool enable)
4914 {
4915 uint32_t data, def;
4916
4917 amdgpu_gfx_rlc_enter_safe_mode(adev);
4918
4919 /* It is disabled by HW by default */
4920 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4921 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4922 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4923
4924 if (adev->asic_type != CHIP_VEGA12)
4925 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4926
4927 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4928 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4929 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4930
4931 /* only for Vega10 & Raven1 */
4932 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4933
4934 if (def != data)
4935 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4936
4937 /* MGLS is a global flag to control all MGLS in GFX */
4938 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4939 /* 2 - RLC memory Light sleep */
4940 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4941 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4942 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4943 if (def != data)
4944 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4945 }
4946 /* 3 - CP memory Light sleep */
4947 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4948 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4949 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4950 if (def != data)
4951 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4952 }
4953 }
4954 } else {
4955 /* 1 - MGCG_OVERRIDE */
4956 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4957
4958 if (adev->asic_type != CHIP_VEGA12)
4959 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4960
4961 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4962 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4963 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4964 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4965
4966 if (def != data)
4967 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4968
4969 /* 2 - disable MGLS in RLC */
4970 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4971 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4972 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4973 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4974 }
4975
4976 /* 3 - disable MGLS in CP */
4977 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4978 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4979 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4980 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4981 }
4982 }
4983
4984 amdgpu_gfx_rlc_exit_safe_mode(adev);
4985 }
4986
4987 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4988 bool enable)
4989 {
4990 uint32_t data, def;
4991
4992 if (!adev->gfx.num_gfx_rings)
4993 return;
4994
4995 amdgpu_gfx_rlc_enter_safe_mode(adev);
4996
4997 /* Enable 3D CGCG/CGLS */
4998 if (enable) {
4999 /* write cmd to clear cgcg/cgls ov */
5000 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5001 /* unset CGCG override */
5002 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5003 /* update CGCG and CGLS override bits */
5004 if (def != data)
5005 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5006
5007 /* enable 3Dcgcg FSM(0x0000363f) */
5008 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5009
5010 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5011 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5012 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5013 else
5014 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5015
5016 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5017 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5018 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5019 if (def != data)
5020 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5021
5022 /* set IDLE_POLL_COUNT(0x00900100) */
5023 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5024 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5025 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5026 if (def != data)
5027 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5028 } else {
5029 /* Disable CGCG/CGLS */
5030 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5031 /* disable cgcg, cgls should be disabled */
5032 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5033 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5034 /* disable cgcg and cgls in FSM */
5035 if (def != data)
5036 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5037 }
5038
5039 amdgpu_gfx_rlc_exit_safe_mode(adev);
5040 }
5041
5042 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5043 bool enable)
5044 {
5045 uint32_t def, data;
5046
5047 amdgpu_gfx_rlc_enter_safe_mode(adev);
5048
5049 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5050 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5051 /* unset CGCG override */
5052 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5053 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5054 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5055 else
5056 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5057 /* update CGCG and CGLS override bits */
5058 if (def != data)
5059 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5060
5061 /* enable cgcg FSM(0x0000363F) */
5062 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5063
5064 if (adev->asic_type == CHIP_ARCTURUS)
5065 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5066 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5067 else
5068 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5069 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5070 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5071 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5072 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5073 if (def != data)
5074 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5075
5076 /* set IDLE_POLL_COUNT(0x00900100) */
5077 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5078 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5079 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5080 if (def != data)
5081 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5082 } else {
5083 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5084 /* reset CGCG/CGLS bits */
5085 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5086 /* disable cgcg and cgls in FSM */
5087 if (def != data)
5088 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5089 }
5090
5091 amdgpu_gfx_rlc_exit_safe_mode(adev);
5092 }
5093
5094 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5095 bool enable)
5096 {
5097 if (enable) {
5098 /* CGCG/CGLS should be enabled after MGCG/MGLS
5099 * === MGCG + MGLS ===
5100 */
5101 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5102 /* === CGCG /CGLS for GFX 3D Only === */
5103 gfx_v9_0_update_3d_clock_gating(adev, enable);
5104 /* === CGCG + CGLS === */
5105 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5106 } else {
5107 /* CGCG/CGLS should be disabled before MGCG/MGLS
5108 * === CGCG + CGLS ===
5109 */
5110 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5111 /* === CGCG /CGLS for GFX 3D Only === */
5112 gfx_v9_0_update_3d_clock_gating(adev, enable);
5113 /* === MGCG + MGLS === */
5114 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5115 }
5116 return 0;
5117 }
5118
5119 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5120 {
5121 u32 reg, data;
5122
5123 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5124 if (amdgpu_sriov_is_pp_one_vf(adev))
5125 data = RREG32_NO_KIQ(reg);
5126 else
5127 data = RREG32(reg);
5128
5129 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5130 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5131
5132 if (amdgpu_sriov_is_pp_one_vf(adev))
5133 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5134 else
5135 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5136 }
5137
5138 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5139 uint32_t offset,
5140 struct soc15_reg_rlcg *entries, int arr_size)
5141 {
5142 int i;
5143 uint32_t reg;
5144
5145 if (!entries)
5146 return false;
5147
5148 for (i = 0; i < arr_size; i++) {
5149 const struct soc15_reg_rlcg *entry;
5150
5151 entry = &entries[i];
5152 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5153 if (offset == reg)
5154 return true;
5155 }
5156
5157 return false;
5158 }
5159
5160 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5161 {
5162 return gfx_v9_0_check_rlcg_range(adev, offset,
5163 (void *)rlcg_access_gc_9_0,
5164 ARRAY_SIZE(rlcg_access_gc_9_0));
5165 }
5166
5167 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5168 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5169 .set_safe_mode = gfx_v9_0_set_safe_mode,
5170 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5171 .init = gfx_v9_0_rlc_init,
5172 .get_csb_size = gfx_v9_0_get_csb_size,
5173 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5174 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5175 .resume = gfx_v9_0_rlc_resume,
5176 .stop = gfx_v9_0_rlc_stop,
5177 .reset = gfx_v9_0_rlc_reset,
5178 .start = gfx_v9_0_rlc_start,
5179 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5180 .sriov_wreg = gfx_v9_0_sriov_wreg,
5181 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5182 };
5183
5184 static int gfx_v9_0_set_powergating_state(void *handle,
5185 enum amd_powergating_state state)
5186 {
5187 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5188 bool enable = (state == AMD_PG_STATE_GATE);
5189
5190 switch (adev->asic_type) {
5191 case CHIP_RAVEN:
5192 case CHIP_RENOIR:
5193 if (!enable)
5194 amdgpu_gfx_off_ctrl(adev, false);
5195
5196 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5197 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5198 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5199 } else {
5200 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5201 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5202 }
5203
5204 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5205 gfx_v9_0_enable_cp_power_gating(adev, true);
5206 else
5207 gfx_v9_0_enable_cp_power_gating(adev, false);
5208
5209 /* update gfx cgpg state */
5210 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5211
5212 /* update mgcg state */
5213 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5214
5215 if (enable)
5216 amdgpu_gfx_off_ctrl(adev, true);
5217 break;
5218 case CHIP_VEGA12:
5219 amdgpu_gfx_off_ctrl(adev, enable);
5220 break;
5221 default:
5222 break;
5223 }
5224
5225 return 0;
5226 }
5227
5228 static int gfx_v9_0_set_clockgating_state(void *handle,
5229 enum amd_clockgating_state state)
5230 {
5231 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5232
5233 if (amdgpu_sriov_vf(adev))
5234 return 0;
5235
5236 switch (adev->asic_type) {
5237 case CHIP_VEGA10:
5238 case CHIP_VEGA12:
5239 case CHIP_VEGA20:
5240 case CHIP_RAVEN:
5241 case CHIP_ARCTURUS:
5242 case CHIP_RENOIR:
5243 case CHIP_ALDEBARAN:
5244 gfx_v9_0_update_gfx_clock_gating(adev,
5245 state == AMD_CG_STATE_GATE);
5246 break;
5247 default:
5248 break;
5249 }
5250 return 0;
5251 }
5252
5253 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5254 {
5255 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5256 int data;
5257
5258 if (amdgpu_sriov_vf(adev))
5259 *flags = 0;
5260
5261 /* AMD_CG_SUPPORT_GFX_MGCG */
5262 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5263 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5264 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5265
5266 /* AMD_CG_SUPPORT_GFX_CGCG */
5267 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5268 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5269 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5270
5271 /* AMD_CG_SUPPORT_GFX_CGLS */
5272 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5273 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5274
5275 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5276 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5277 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5278 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5279
5280 /* AMD_CG_SUPPORT_GFX_CP_LS */
5281 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5282 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5283 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5284
5285 if (adev->asic_type != CHIP_ARCTURUS) {
5286 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5287 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5288 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5289 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5290
5291 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5292 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5293 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5294 }
5295 }
5296
5297 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5298 {
5299 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5300 }
5301
5302 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5303 {
5304 struct amdgpu_device *adev = ring->adev;
5305 u64 wptr;
5306
5307 /* XXX check if swapping is necessary on BE */
5308 if (ring->use_doorbell) {
5309 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5310 } else {
5311 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5312 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5313 }
5314
5315 return wptr;
5316 }
5317
5318 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5319 {
5320 struct amdgpu_device *adev = ring->adev;
5321
5322 if (ring->use_doorbell) {
5323 /* XXX check if swapping is necessary on BE */
5324 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5325 WDOORBELL64(ring->doorbell_index, ring->wptr);
5326 } else {
5327 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5328 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5329 }
5330 }
5331
5332 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5333 {
5334 struct amdgpu_device *adev = ring->adev;
5335 u32 ref_and_mask, reg_mem_engine;
5336 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5337
5338 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5339 switch (ring->me) {
5340 case 1:
5341 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5342 break;
5343 case 2:
5344 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5345 break;
5346 default:
5347 return;
5348 }
5349 reg_mem_engine = 0;
5350 } else {
5351 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5352 reg_mem_engine = 1; /* pfp */
5353 }
5354
5355 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5356 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5357 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5358 ref_and_mask, ref_and_mask, 0x20);
5359 }
5360
5361 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5362 struct amdgpu_job *job,
5363 struct amdgpu_ib *ib,
5364 uint32_t flags)
5365 {
5366 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5367 u32 header, control = 0;
5368
5369 if (ib->flags & AMDGPU_IB_FLAG_CE)
5370 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5371 else
5372 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5373
5374 control |= ib->length_dw | (vmid << 24);
5375
5376 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5377 control |= INDIRECT_BUFFER_PRE_ENB(1);
5378
5379 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5380 gfx_v9_0_ring_emit_de_meta(ring);
5381 }
5382
5383 amdgpu_ring_write(ring, header);
5384 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5385 amdgpu_ring_write(ring,
5386 #ifdef __BIG_ENDIAN
5387 (2 << 0) |
5388 #endif
5389 lower_32_bits(ib->gpu_addr));
5390 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5391 amdgpu_ring_write(ring, control);
5392 }
5393
5394 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5395 struct amdgpu_job *job,
5396 struct amdgpu_ib *ib,
5397 uint32_t flags)
5398 {
5399 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5400 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5401
5402 /* Currently, there is a high possibility to get wave ID mismatch
5403 * between ME and GDS, leading to a hw deadlock, because ME generates
5404 * different wave IDs than the GDS expects. This situation happens
5405 * randomly when at least 5 compute pipes use GDS ordered append.
5406 * The wave IDs generated by ME are also wrong after suspend/resume.
5407 * Those are probably bugs somewhere else in the kernel driver.
5408 *
5409 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5410 * GDS to 0 for this ring (me/pipe).
5411 */
5412 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5413 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5414 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5415 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5416 }
5417
5418 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5419 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5420 amdgpu_ring_write(ring,
5421 #ifdef __BIG_ENDIAN
5422 (2 << 0) |
5423 #endif
5424 lower_32_bits(ib->gpu_addr));
5425 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5426 amdgpu_ring_write(ring, control);
5427 }
5428
5429 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5430 u64 seq, unsigned flags)
5431 {
5432 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5433 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5434 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5435
5436 /* RELEASE_MEM - flush caches, send int */
5437 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5438 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5439 EOP_TC_NC_ACTION_EN) :
5440 (EOP_TCL1_ACTION_EN |
5441 EOP_TC_ACTION_EN |
5442 EOP_TC_WB_ACTION_EN |
5443 EOP_TC_MD_ACTION_EN)) |
5444 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5445 EVENT_INDEX(5)));
5446 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5447
5448 /*
5449 * the address should be Qword aligned if 64bit write, Dword
5450 * aligned if only send 32bit data low (discard data high)
5451 */
5452 if (write64bit)
5453 BUG_ON(addr & 0x7);
5454 else
5455 BUG_ON(addr & 0x3);
5456 amdgpu_ring_write(ring, lower_32_bits(addr));
5457 amdgpu_ring_write(ring, upper_32_bits(addr));
5458 amdgpu_ring_write(ring, lower_32_bits(seq));
5459 amdgpu_ring_write(ring, upper_32_bits(seq));
5460 amdgpu_ring_write(ring, 0);
5461 }
5462
5463 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5464 {
5465 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5466 uint32_t seq = ring->fence_drv.sync_seq;
5467 uint64_t addr = ring->fence_drv.gpu_addr;
5468
5469 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5470 lower_32_bits(addr), upper_32_bits(addr),
5471 seq, 0xffffffff, 4);
5472 }
5473
5474 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5475 unsigned vmid, uint64_t pd_addr)
5476 {
5477 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5478
5479 /* compute doesn't have PFP */
5480 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5481 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5482 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5483 amdgpu_ring_write(ring, 0x0);
5484 }
5485 }
5486
5487 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5488 {
5489 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5490 }
5491
5492 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5493 {
5494 u64 wptr;
5495
5496 /* XXX check if swapping is necessary on BE */
5497 if (ring->use_doorbell)
5498 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5499 else
5500 BUG();
5501 return wptr;
5502 }
5503
5504 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5505 {
5506 struct amdgpu_device *adev = ring->adev;
5507
5508 /* XXX check if swapping is necessary on BE */
5509 if (ring->use_doorbell) {
5510 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5511 WDOORBELL64(ring->doorbell_index, ring->wptr);
5512 } else{
5513 BUG(); /* only DOORBELL method supported on gfx9 now */
5514 }
5515 }
5516
5517 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5518 u64 seq, unsigned int flags)
5519 {
5520 struct amdgpu_device *adev = ring->adev;
5521
5522 /* we only allocate 32bit for each seq wb address */
5523 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5524
5525 /* write fence seq to the "addr" */
5526 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5527 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5528 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5529 amdgpu_ring_write(ring, lower_32_bits(addr));
5530 amdgpu_ring_write(ring, upper_32_bits(addr));
5531 amdgpu_ring_write(ring, lower_32_bits(seq));
5532
5533 if (flags & AMDGPU_FENCE_FLAG_INT) {
5534 /* set register to trigger INT */
5535 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5536 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5537 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5538 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5539 amdgpu_ring_write(ring, 0);
5540 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5541 }
5542 }
5543
5544 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5545 {
5546 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5547 amdgpu_ring_write(ring, 0);
5548 }
5549
5550 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5551 {
5552 struct v9_ce_ib_state ce_payload = {0};
5553 uint64_t csa_addr;
5554 int cnt;
5555
5556 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5557 csa_addr = amdgpu_csa_vaddr(ring->adev);
5558
5559 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5560 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5561 WRITE_DATA_DST_SEL(8) |
5562 WR_CONFIRM) |
5563 WRITE_DATA_CACHE_POLICY(0));
5564 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5565 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5566 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5567 }
5568
5569 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5570 {
5571 struct v9_de_ib_state de_payload = {0};
5572 uint64_t csa_addr, gds_addr;
5573 int cnt;
5574
5575 csa_addr = amdgpu_csa_vaddr(ring->adev);
5576 gds_addr = csa_addr + 4096;
5577 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5578 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5579
5580 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5581 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5582 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5583 WRITE_DATA_DST_SEL(8) |
5584 WR_CONFIRM) |
5585 WRITE_DATA_CACHE_POLICY(0));
5586 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5587 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5588 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5589 }
5590
5591 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5592 bool secure)
5593 {
5594 uint32_t v = secure ? FRAME_TMZ : 0;
5595
5596 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5597 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5598 }
5599
5600 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5601 {
5602 uint32_t dw2 = 0;
5603
5604 if (amdgpu_sriov_vf(ring->adev))
5605 gfx_v9_0_ring_emit_ce_meta(ring);
5606
5607 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5608 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5609 /* set load_global_config & load_global_uconfig */
5610 dw2 |= 0x8001;
5611 /* set load_cs_sh_regs */
5612 dw2 |= 0x01000000;
5613 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5614 dw2 |= 0x10002;
5615
5616 /* set load_ce_ram if preamble presented */
5617 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5618 dw2 |= 0x10000000;
5619 } else {
5620 /* still load_ce_ram if this is the first time preamble presented
5621 * although there is no context switch happens.
5622 */
5623 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5624 dw2 |= 0x10000000;
5625 }
5626
5627 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5628 amdgpu_ring_write(ring, dw2);
5629 amdgpu_ring_write(ring, 0);
5630 }
5631
5632 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5633 {
5634 unsigned ret;
5635 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5636 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5637 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5638 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5639 ret = ring->wptr & ring->buf_mask;
5640 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5641 return ret;
5642 }
5643
5644 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5645 {
5646 unsigned cur;
5647 BUG_ON(offset > ring->buf_mask);
5648 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5649
5650 cur = (ring->wptr & ring->buf_mask) - 1;
5651 if (likely(cur > offset))
5652 ring->ring[offset] = cur - offset;
5653 else
5654 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5655 }
5656
5657 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5658 uint32_t reg_val_offs)
5659 {
5660 struct amdgpu_device *adev = ring->adev;
5661
5662 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5663 amdgpu_ring_write(ring, 0 | /* src: register*/
5664 (5 << 8) | /* dst: memory */
5665 (1 << 20)); /* write confirm */
5666 amdgpu_ring_write(ring, reg);
5667 amdgpu_ring_write(ring, 0);
5668 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5669 reg_val_offs * 4));
5670 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5671 reg_val_offs * 4));
5672 }
5673
5674 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5675 uint32_t val)
5676 {
5677 uint32_t cmd = 0;
5678
5679 switch (ring->funcs->type) {
5680 case AMDGPU_RING_TYPE_GFX:
5681 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5682 break;
5683 case AMDGPU_RING_TYPE_KIQ:
5684 cmd = (1 << 16); /* no inc addr */
5685 break;
5686 default:
5687 cmd = WR_CONFIRM;
5688 break;
5689 }
5690 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5691 amdgpu_ring_write(ring, cmd);
5692 amdgpu_ring_write(ring, reg);
5693 amdgpu_ring_write(ring, 0);
5694 amdgpu_ring_write(ring, val);
5695 }
5696
5697 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5698 uint32_t val, uint32_t mask)
5699 {
5700 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5701 }
5702
5703 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5704 uint32_t reg0, uint32_t reg1,
5705 uint32_t ref, uint32_t mask)
5706 {
5707 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5708 struct amdgpu_device *adev = ring->adev;
5709 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5710 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5711
5712 if (fw_version_ok)
5713 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5714 ref, mask, 0x20);
5715 else
5716 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5717 ref, mask);
5718 }
5719
5720 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5721 {
5722 struct amdgpu_device *adev = ring->adev;
5723 uint32_t value = 0;
5724
5725 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5726 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5727 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5728 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5729 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5730 }
5731
5732 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5733 enum amdgpu_interrupt_state state)
5734 {
5735 switch (state) {
5736 case AMDGPU_IRQ_STATE_DISABLE:
5737 case AMDGPU_IRQ_STATE_ENABLE:
5738 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5739 TIME_STAMP_INT_ENABLE,
5740 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5741 break;
5742 default:
5743 break;
5744 }
5745 }
5746
5747 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5748 int me, int pipe,
5749 enum amdgpu_interrupt_state state)
5750 {
5751 u32 mec_int_cntl, mec_int_cntl_reg;
5752
5753 /*
5754 * amdgpu controls only the first MEC. That's why this function only
5755 * handles the setting of interrupts for this specific MEC. All other
5756 * pipes' interrupts are set by amdkfd.
5757 */
5758
5759 if (me == 1) {
5760 switch (pipe) {
5761 case 0:
5762 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5763 break;
5764 case 1:
5765 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5766 break;
5767 case 2:
5768 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5769 break;
5770 case 3:
5771 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5772 break;
5773 default:
5774 DRM_DEBUG("invalid pipe %d\n", pipe);
5775 return;
5776 }
5777 } else {
5778 DRM_DEBUG("invalid me %d\n", me);
5779 return;
5780 }
5781
5782 switch (state) {
5783 case AMDGPU_IRQ_STATE_DISABLE:
5784 mec_int_cntl = RREG32(mec_int_cntl_reg);
5785 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5786 TIME_STAMP_INT_ENABLE, 0);
5787 WREG32(mec_int_cntl_reg, mec_int_cntl);
5788 break;
5789 case AMDGPU_IRQ_STATE_ENABLE:
5790 mec_int_cntl = RREG32(mec_int_cntl_reg);
5791 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5792 TIME_STAMP_INT_ENABLE, 1);
5793 WREG32(mec_int_cntl_reg, mec_int_cntl);
5794 break;
5795 default:
5796 break;
5797 }
5798 }
5799
5800 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5801 struct amdgpu_irq_src *source,
5802 unsigned type,
5803 enum amdgpu_interrupt_state state)
5804 {
5805 switch (state) {
5806 case AMDGPU_IRQ_STATE_DISABLE:
5807 case AMDGPU_IRQ_STATE_ENABLE:
5808 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5809 PRIV_REG_INT_ENABLE,
5810 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5811 break;
5812 default:
5813 break;
5814 }
5815
5816 return 0;
5817 }
5818
5819 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5820 struct amdgpu_irq_src *source,
5821 unsigned type,
5822 enum amdgpu_interrupt_state state)
5823 {
5824 switch (state) {
5825 case AMDGPU_IRQ_STATE_DISABLE:
5826 case AMDGPU_IRQ_STATE_ENABLE:
5827 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5828 PRIV_INSTR_INT_ENABLE,
5829 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5830 break;
5831 default:
5832 break;
5833 }
5834
5835 return 0;
5836 }
5837
5838 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5839 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5840 CP_ECC_ERROR_INT_ENABLE, 1)
5841
5842 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5843 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5844 CP_ECC_ERROR_INT_ENABLE, 0)
5845
5846 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5847 struct amdgpu_irq_src *source,
5848 unsigned type,
5849 enum amdgpu_interrupt_state state)
5850 {
5851 switch (state) {
5852 case AMDGPU_IRQ_STATE_DISABLE:
5853 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5854 CP_ECC_ERROR_INT_ENABLE, 0);
5855 DISABLE_ECC_ON_ME_PIPE(1, 0);
5856 DISABLE_ECC_ON_ME_PIPE(1, 1);
5857 DISABLE_ECC_ON_ME_PIPE(1, 2);
5858 DISABLE_ECC_ON_ME_PIPE(1, 3);
5859 break;
5860
5861 case AMDGPU_IRQ_STATE_ENABLE:
5862 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5863 CP_ECC_ERROR_INT_ENABLE, 1);
5864 ENABLE_ECC_ON_ME_PIPE(1, 0);
5865 ENABLE_ECC_ON_ME_PIPE(1, 1);
5866 ENABLE_ECC_ON_ME_PIPE(1, 2);
5867 ENABLE_ECC_ON_ME_PIPE(1, 3);
5868 break;
5869 default:
5870 break;
5871 }
5872
5873 return 0;
5874 }
5875
5876
5877 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5878 struct amdgpu_irq_src *src,
5879 unsigned type,
5880 enum amdgpu_interrupt_state state)
5881 {
5882 switch (type) {
5883 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5884 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5885 break;
5886 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5887 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5888 break;
5889 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5890 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5891 break;
5892 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5893 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5894 break;
5895 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5896 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5897 break;
5898 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5899 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5900 break;
5901 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5902 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5903 break;
5904 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5905 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5906 break;
5907 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5908 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5909 break;
5910 default:
5911 break;
5912 }
5913 return 0;
5914 }
5915
5916 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5917 struct amdgpu_irq_src *source,
5918 struct amdgpu_iv_entry *entry)
5919 {
5920 int i;
5921 u8 me_id, pipe_id, queue_id;
5922 struct amdgpu_ring *ring;
5923
5924 DRM_DEBUG("IH: CP EOP\n");
5925 me_id = (entry->ring_id & 0x0c) >> 2;
5926 pipe_id = (entry->ring_id & 0x03) >> 0;
5927 queue_id = (entry->ring_id & 0x70) >> 4;
5928
5929 switch (me_id) {
5930 case 0:
5931 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5932 break;
5933 case 1:
5934 case 2:
5935 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5936 ring = &adev->gfx.compute_ring[i];
5937 /* Per-queue interrupt is supported for MEC starting from VI.
5938 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5939 */
5940 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5941 amdgpu_fence_process(ring);
5942 }
5943 break;
5944 }
5945 return 0;
5946 }
5947
5948 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5949 struct amdgpu_iv_entry *entry)
5950 {
5951 u8 me_id, pipe_id, queue_id;
5952 struct amdgpu_ring *ring;
5953 int i;
5954
5955 me_id = (entry->ring_id & 0x0c) >> 2;
5956 pipe_id = (entry->ring_id & 0x03) >> 0;
5957 queue_id = (entry->ring_id & 0x70) >> 4;
5958
5959 switch (me_id) {
5960 case 0:
5961 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5962 break;
5963 case 1:
5964 case 2:
5965 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5966 ring = &adev->gfx.compute_ring[i];
5967 if (ring->me == me_id && ring->pipe == pipe_id &&
5968 ring->queue == queue_id)
5969 drm_sched_fault(&ring->sched);
5970 }
5971 break;
5972 }
5973 }
5974
5975 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5976 struct amdgpu_irq_src *source,
5977 struct amdgpu_iv_entry *entry)
5978 {
5979 DRM_ERROR("Illegal register access in command stream\n");
5980 gfx_v9_0_fault(adev, entry);
5981 return 0;
5982 }
5983
5984 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5985 struct amdgpu_irq_src *source,
5986 struct amdgpu_iv_entry *entry)
5987 {
5988 DRM_ERROR("Illegal instruction in command stream\n");
5989 gfx_v9_0_fault(adev, entry);
5990 return 0;
5991 }
5992
5993
5994 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5995 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5996 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5997 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5998 },
5999 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6000 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6001 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6002 },
6003 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6004 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6005 0, 0
6006 },
6007 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6008 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6009 0, 0
6010 },
6011 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6012 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6013 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6014 },
6015 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6016 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6017 0, 0
6018 },
6019 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6020 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6021 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6022 },
6023 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6024 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6025 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6026 },
6027 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6028 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6029 0, 0
6030 },
6031 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6032 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6033 0, 0
6034 },
6035 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6036 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6037 0, 0
6038 },
6039 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6040 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6041 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6042 },
6043 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6044 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6045 0, 0
6046 },
6047 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6048 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6049 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6050 },
6051 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6052 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6053 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6054 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6055 },
6056 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6057 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6058 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6059 0, 0
6060 },
6061 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6062 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6063 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6064 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6065 },
6066 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6067 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6068 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6069 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6070 },
6071 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6072 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6073 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6074 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6075 },
6076 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6077 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6078 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6079 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6080 },
6081 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6082 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6083 0, 0
6084 },
6085 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6086 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6087 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6088 },
6089 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6090 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6091 0, 0
6092 },
6093 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6094 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6095 0, 0
6096 },
6097 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6098 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6099 0, 0
6100 },
6101 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6102 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6103 0, 0
6104 },
6105 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6106 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6107 0, 0
6108 },
6109 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6110 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6111 0, 0
6112 },
6113 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6114 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6115 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6116 },
6117 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6118 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6119 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6120 },
6121 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6122 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6123 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6124 },
6125 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6126 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6127 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6128 },
6129 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6130 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6131 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6132 },
6133 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6134 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6135 0, 0
6136 },
6137 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6138 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6139 0, 0
6140 },
6141 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6142 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6143 0, 0
6144 },
6145 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6146 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6147 0, 0
6148 },
6149 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6150 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6151 0, 0
6152 },
6153 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6154 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6155 0, 0
6156 },
6157 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6158 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6159 0, 0
6160 },
6161 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6162 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6163 0, 0
6164 },
6165 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6166 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6167 0, 0
6168 },
6169 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6170 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6171 0, 0
6172 },
6173 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6174 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6175 0, 0
6176 },
6177 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6178 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6179 0, 0
6180 },
6181 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6182 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6183 0, 0
6184 },
6185 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6186 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6187 0, 0
6188 },
6189 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6190 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6191 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6192 },
6193 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6194 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6195 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6196 },
6197 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6198 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6199 0, 0
6200 },
6201 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6202 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6203 0, 0
6204 },
6205 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6206 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6207 0, 0
6208 },
6209 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6210 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6211 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6212 },
6213 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6214 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6215 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6216 },
6217 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6218 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6219 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6220 },
6221 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6222 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6223 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6224 },
6225 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6226 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6227 0, 0
6228 },
6229 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6230 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6231 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6232 },
6233 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6234 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6235 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6236 },
6237 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6238 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6239 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6240 },
6241 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6242 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6243 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6244 },
6245 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6246 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6247 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6248 },
6249 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6250 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6251 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6252 },
6253 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6254 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6255 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6256 },
6257 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6258 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6259 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6260 },
6261 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6262 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6263 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6264 },
6265 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6266 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6267 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6268 },
6269 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6270 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6271 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6272 },
6273 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6274 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6275 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6276 },
6277 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6278 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6279 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6280 },
6281 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6282 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6283 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6284 },
6285 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6286 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6287 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6288 },
6289 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6290 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6291 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6292 },
6293 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6294 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6295 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6296 },
6297 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6298 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6299 0, 0
6300 },
6301 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6302 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6303 0, 0
6304 },
6305 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6306 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6307 0, 0
6308 },
6309 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6310 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6311 0, 0
6312 },
6313 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6314 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6315 0, 0
6316 },
6317 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6318 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6319 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6320 },
6321 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6322 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6323 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6324 },
6325 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6326 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6327 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6328 },
6329 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6330 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6331 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6332 },
6333 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6334 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6335 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6336 },
6337 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6338 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6339 0, 0
6340 },
6341 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6342 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6343 0, 0
6344 },
6345 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6346 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6347 0, 0
6348 },
6349 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6350 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6351 0, 0
6352 },
6353 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6354 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6355 0, 0
6356 },
6357 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6358 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6359 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6360 },
6361 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6362 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6363 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6364 },
6365 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6366 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6367 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6368 },
6369 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6370 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6371 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6372 },
6373 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6374 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6375 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6376 },
6377 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6378 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6379 0, 0
6380 },
6381 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6382 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6383 0, 0
6384 },
6385 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6386 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6387 0, 0
6388 },
6389 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6390 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6391 0, 0
6392 },
6393 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6394 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6395 0, 0
6396 },
6397 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6398 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6399 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6400 },
6401 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6402 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6403 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6404 },
6405 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6406 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6407 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6408 },
6409 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6410 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6411 0, 0
6412 },
6413 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6414 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6415 0, 0
6416 },
6417 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6418 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6419 0, 0
6420 },
6421 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6422 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6423 0, 0
6424 },
6425 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6426 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6427 0, 0
6428 },
6429 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6430 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6431 0, 0
6432 }
6433 };
6434
6435 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6436 void *inject_if)
6437 {
6438 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6439 int ret;
6440 struct ta_ras_trigger_error_input block_info = { 0 };
6441
6442 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6443 return -EINVAL;
6444
6445 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6446 return -EINVAL;
6447
6448 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6449 return -EPERM;
6450
6451 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6452 info->head.type)) {
6453 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6454 ras_gfx_subblocks[info->head.sub_block_index].name,
6455 info->head.type);
6456 return -EPERM;
6457 }
6458
6459 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6460 info->head.type)) {
6461 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6462 ras_gfx_subblocks[info->head.sub_block_index].name,
6463 info->head.type);
6464 return -EPERM;
6465 }
6466
6467 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6468 block_info.sub_block_index =
6469 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6470 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6471 block_info.address = info->address;
6472 block_info.value = info->value;
6473
6474 mutex_lock(&adev->grbm_idx_mutex);
6475 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6476 mutex_unlock(&adev->grbm_idx_mutex);
6477
6478 return ret;
6479 }
6480
6481 static const char *vml2_mems[] = {
6482 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6483 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6484 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6485 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6486 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6487 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6488 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6489 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6490 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6491 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6492 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6493 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6494 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6495 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6496 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6497 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6498 };
6499
6500 static const char *vml2_walker_mems[] = {
6501 "UTC_VML2_CACHE_PDE0_MEM0",
6502 "UTC_VML2_CACHE_PDE0_MEM1",
6503 "UTC_VML2_CACHE_PDE1_MEM0",
6504 "UTC_VML2_CACHE_PDE1_MEM1",
6505 "UTC_VML2_CACHE_PDE2_MEM0",
6506 "UTC_VML2_CACHE_PDE2_MEM1",
6507 "UTC_VML2_RDIF_LOG_FIFO",
6508 };
6509
6510 static const char *atc_l2_cache_2m_mems[] = {
6511 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6512 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6513 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6514 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6515 };
6516
6517 static const char *atc_l2_cache_4k_mems[] = {
6518 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6519 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6520 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6521 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6522 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6523 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6524 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6525 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6526 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6527 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6528 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6529 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6530 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6531 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6532 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6533 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6534 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6535 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6536 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6537 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6538 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6539 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6540 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6541 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6542 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6543 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6544 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6545 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6546 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6547 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6548 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6549 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6550 };
6551
6552 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6553 struct ras_err_data *err_data)
6554 {
6555 uint32_t i, data;
6556 uint32_t sec_count, ded_count;
6557
6558 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6559 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6560 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6561 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6562 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6563 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6564 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6565 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6566
6567 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6568 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6569 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6570
6571 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6572 if (sec_count) {
6573 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6574 "SEC %d\n", i, vml2_mems[i], sec_count);
6575 err_data->ce_count += sec_count;
6576 }
6577
6578 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6579 if (ded_count) {
6580 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6581 "DED %d\n", i, vml2_mems[i], ded_count);
6582 err_data->ue_count += ded_count;
6583 }
6584 }
6585
6586 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6587 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6588 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6589
6590 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6591 SEC_COUNT);
6592 if (sec_count) {
6593 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6594 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6595 err_data->ce_count += sec_count;
6596 }
6597
6598 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6599 DED_COUNT);
6600 if (ded_count) {
6601 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6602 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6603 err_data->ue_count += ded_count;
6604 }
6605 }
6606
6607 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6608 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6609 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6610
6611 sec_count = (data & 0x00006000L) >> 0xd;
6612 if (sec_count) {
6613 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6614 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6615 sec_count);
6616 err_data->ce_count += sec_count;
6617 }
6618 }
6619
6620 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6621 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6622 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6623
6624 sec_count = (data & 0x00006000L) >> 0xd;
6625 if (sec_count) {
6626 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6627 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6628 sec_count);
6629 err_data->ce_count += sec_count;
6630 }
6631
6632 ded_count = (data & 0x00018000L) >> 0xf;
6633 if (ded_count) {
6634 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6635 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6636 ded_count);
6637 err_data->ue_count += ded_count;
6638 }
6639 }
6640
6641 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6642 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6643 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6644 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6645
6646 return 0;
6647 }
6648
6649 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6650 const struct soc15_reg_entry *reg,
6651 uint32_t se_id, uint32_t inst_id, uint32_t value,
6652 uint32_t *sec_count, uint32_t *ded_count)
6653 {
6654 uint32_t i;
6655 uint32_t sec_cnt, ded_cnt;
6656
6657 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6658 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6659 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6660 gfx_v9_0_ras_fields[i].inst != reg->inst)
6661 continue;
6662
6663 sec_cnt = (value &
6664 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6665 gfx_v9_0_ras_fields[i].sec_count_shift;
6666 if (sec_cnt) {
6667 dev_info(adev->dev, "GFX SubBlock %s, "
6668 "Instance[%d][%d], SEC %d\n",
6669 gfx_v9_0_ras_fields[i].name,
6670 se_id, inst_id,
6671 sec_cnt);
6672 *sec_count += sec_cnt;
6673 }
6674
6675 ded_cnt = (value &
6676 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6677 gfx_v9_0_ras_fields[i].ded_count_shift;
6678 if (ded_cnt) {
6679 dev_info(adev->dev, "GFX SubBlock %s, "
6680 "Instance[%d][%d], DED %d\n",
6681 gfx_v9_0_ras_fields[i].name,
6682 se_id, inst_id,
6683 ded_cnt);
6684 *ded_count += ded_cnt;
6685 }
6686 }
6687
6688 return 0;
6689 }
6690
6691 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6692 {
6693 int i, j, k;
6694
6695 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6696 return;
6697
6698 /* read back registers to clear the counters */
6699 mutex_lock(&adev->grbm_idx_mutex);
6700 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6701 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6702 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6703 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6704 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6705 }
6706 }
6707 }
6708 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6709 mutex_unlock(&adev->grbm_idx_mutex);
6710
6711 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6712 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6713 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6714 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6715 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6716 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6717 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6718 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6719
6720 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6721 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6722 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6723 }
6724
6725 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6726 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6727 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6728 }
6729
6730 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6731 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6732 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6733 }
6734
6735 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6736 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6737 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6738 }
6739
6740 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6741 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6742 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6743 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6744 }
6745
6746 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6747 void *ras_error_status)
6748 {
6749 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6750 uint32_t sec_count = 0, ded_count = 0;
6751 uint32_t i, j, k;
6752 uint32_t reg_value;
6753
6754 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6755 return -EINVAL;
6756
6757 err_data->ue_count = 0;
6758 err_data->ce_count = 0;
6759
6760 mutex_lock(&adev->grbm_idx_mutex);
6761
6762 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6763 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6764 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6765 gfx_v9_0_select_se_sh(adev, j, 0, k);
6766 reg_value =
6767 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6768 if (reg_value)
6769 gfx_v9_0_ras_error_count(adev,
6770 &gfx_v9_0_edc_counter_regs[i],
6771 j, k, reg_value,
6772 &sec_count, &ded_count);
6773 }
6774 }
6775 }
6776
6777 err_data->ce_count += sec_count;
6778 err_data->ue_count += ded_count;
6779
6780 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6781 mutex_unlock(&adev->grbm_idx_mutex);
6782
6783 gfx_v9_0_query_utc_edc_status(adev, err_data);
6784
6785 return 0;
6786 }
6787
6788 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6789 {
6790 const unsigned int cp_coher_cntl =
6791 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6792 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6793 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6794 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6795 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6796
6797 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6798 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6799 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6800 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6801 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
6802 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6803 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6804 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6805 }
6806
6807 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6808 uint32_t pipe, bool enable)
6809 {
6810 struct amdgpu_device *adev = ring->adev;
6811 uint32_t val;
6812 uint32_t wcl_cs_reg;
6813
6814 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6815 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6816
6817 switch (pipe) {
6818 case 0:
6819 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6820 break;
6821 case 1:
6822 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6823 break;
6824 case 2:
6825 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6826 break;
6827 case 3:
6828 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6829 break;
6830 default:
6831 DRM_DEBUG("invalid pipe %d\n", pipe);
6832 return;
6833 }
6834
6835 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6836
6837 }
6838 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6839 {
6840 struct amdgpu_device *adev = ring->adev;
6841 uint32_t val;
6842 int i;
6843
6844
6845 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6846 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6847 * around 25% of gpu resources.
6848 */
6849 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6850 amdgpu_ring_emit_wreg(ring,
6851 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6852 val);
6853
6854 /* Restrict waves for normal/low priority compute queues as well
6855 * to get best QoS for high priority compute jobs.
6856 *
6857 * amdgpu controls only 1st ME(0-3 CS pipes).
6858 */
6859 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6860 if (i != ring->pipe)
6861 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6862
6863 }
6864 }
6865
6866 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6867 .name = "gfx_v9_0",
6868 .early_init = gfx_v9_0_early_init,
6869 .late_init = gfx_v9_0_late_init,
6870 .sw_init = gfx_v9_0_sw_init,
6871 .sw_fini = gfx_v9_0_sw_fini,
6872 .hw_init = gfx_v9_0_hw_init,
6873 .hw_fini = gfx_v9_0_hw_fini,
6874 .suspend = gfx_v9_0_suspend,
6875 .resume = gfx_v9_0_resume,
6876 .is_idle = gfx_v9_0_is_idle,
6877 .wait_for_idle = gfx_v9_0_wait_for_idle,
6878 .soft_reset = gfx_v9_0_soft_reset,
6879 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6880 .set_powergating_state = gfx_v9_0_set_powergating_state,
6881 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6882 };
6883
6884 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6885 .type = AMDGPU_RING_TYPE_GFX,
6886 .align_mask = 0xff,
6887 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6888 .support_64bit_ptrs = true,
6889 .vmhub = AMDGPU_GFXHUB_0,
6890 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6891 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6892 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6893 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6894 5 + /* COND_EXEC */
6895 7 + /* PIPELINE_SYNC */
6896 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6897 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6898 2 + /* VM_FLUSH */
6899 8 + /* FENCE for VM_FLUSH */
6900 20 + /* GDS switch */
6901 4 + /* double SWITCH_BUFFER,
6902 the first COND_EXEC jump to the place just
6903 prior to this double SWITCH_BUFFER */
6904 5 + /* COND_EXEC */
6905 7 + /* HDP_flush */
6906 4 + /* VGT_flush */
6907 14 + /* CE_META */
6908 31 + /* DE_META */
6909 3 + /* CNTX_CTRL */
6910 5 + /* HDP_INVL */
6911 8 + 8 + /* FENCE x2 */
6912 2 + /* SWITCH_BUFFER */
6913 7, /* gfx_v9_0_emit_mem_sync */
6914 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6915 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6916 .emit_fence = gfx_v9_0_ring_emit_fence,
6917 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6918 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6919 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6920 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6921 .test_ring = gfx_v9_0_ring_test_ring,
6922 .test_ib = gfx_v9_0_ring_test_ib,
6923 .insert_nop = amdgpu_ring_insert_nop,
6924 .pad_ib = amdgpu_ring_generic_pad_ib,
6925 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6926 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6927 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6928 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6929 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6930 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6931 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6932 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6933 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6934 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6935 };
6936
6937 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6938 .type = AMDGPU_RING_TYPE_COMPUTE,
6939 .align_mask = 0xff,
6940 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6941 .support_64bit_ptrs = true,
6942 .vmhub = AMDGPU_GFXHUB_0,
6943 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6944 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6945 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6946 .emit_frame_size =
6947 20 + /* gfx_v9_0_ring_emit_gds_switch */
6948 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6949 5 + /* hdp invalidate */
6950 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6951 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6952 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6953 2 + /* gfx_v9_0_ring_emit_vm_flush */
6954 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6955 7 + /* gfx_v9_0_emit_mem_sync */
6956 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6957 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6958 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6959 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6960 .emit_fence = gfx_v9_0_ring_emit_fence,
6961 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6962 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6963 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6964 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6965 .test_ring = gfx_v9_0_ring_test_ring,
6966 .test_ib = gfx_v9_0_ring_test_ib,
6967 .insert_nop = amdgpu_ring_insert_nop,
6968 .pad_ib = amdgpu_ring_generic_pad_ib,
6969 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6970 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6971 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6972 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6973 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
6974 };
6975
6976 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6977 .type = AMDGPU_RING_TYPE_KIQ,
6978 .align_mask = 0xff,
6979 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6980 .support_64bit_ptrs = true,
6981 .vmhub = AMDGPU_GFXHUB_0,
6982 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6983 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6984 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6985 .emit_frame_size =
6986 20 + /* gfx_v9_0_ring_emit_gds_switch */
6987 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6988 5 + /* hdp invalidate */
6989 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6990 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6991 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6992 2 + /* gfx_v9_0_ring_emit_vm_flush */
6993 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6994 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6995 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6996 .test_ring = gfx_v9_0_ring_test_ring,
6997 .insert_nop = amdgpu_ring_insert_nop,
6998 .pad_ib = amdgpu_ring_generic_pad_ib,
6999 .emit_rreg = gfx_v9_0_ring_emit_rreg,
7000 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7001 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7002 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7003 };
7004
7005 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7006 {
7007 int i;
7008
7009 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7010
7011 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7012 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7013
7014 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7015 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7016 }
7017
7018 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7019 .set = gfx_v9_0_set_eop_interrupt_state,
7020 .process = gfx_v9_0_eop_irq,
7021 };
7022
7023 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7024 .set = gfx_v9_0_set_priv_reg_fault_state,
7025 .process = gfx_v9_0_priv_reg_irq,
7026 };
7027
7028 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7029 .set = gfx_v9_0_set_priv_inst_fault_state,
7030 .process = gfx_v9_0_priv_inst_irq,
7031 };
7032
7033 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7034 .set = gfx_v9_0_set_cp_ecc_error_state,
7035 .process = amdgpu_gfx_cp_ecc_error_irq,
7036 };
7037
7038
7039 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7040 {
7041 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7042 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7043
7044 adev->gfx.priv_reg_irq.num_types = 1;
7045 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7046
7047 adev->gfx.priv_inst_irq.num_types = 1;
7048 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7049
7050 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7051 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7052 }
7053
7054 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7055 {
7056 switch (adev->asic_type) {
7057 case CHIP_VEGA10:
7058 case CHIP_VEGA12:
7059 case CHIP_VEGA20:
7060 case CHIP_RAVEN:
7061 case CHIP_ARCTURUS:
7062 case CHIP_RENOIR:
7063 case CHIP_ALDEBARAN:
7064 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7065 break;
7066 default:
7067 break;
7068 }
7069 }
7070
7071 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7072 {
7073 /* init asci gds info */
7074 switch (adev->asic_type) {
7075 case CHIP_VEGA10:
7076 case CHIP_VEGA12:
7077 case CHIP_VEGA20:
7078 adev->gds.gds_size = 0x10000;
7079 break;
7080 case CHIP_RAVEN:
7081 case CHIP_ARCTURUS:
7082 adev->gds.gds_size = 0x1000;
7083 break;
7084 case CHIP_ALDEBARAN:
7085 /* aldebaran removed all the GDS internal memory,
7086 * only support GWS opcode in kernel, like barrier
7087 * semaphore.etc */
7088 adev->gds.gds_size = 0;
7089 break;
7090 default:
7091 adev->gds.gds_size = 0x10000;
7092 break;
7093 }
7094
7095 switch (adev->asic_type) {
7096 case CHIP_VEGA10:
7097 case CHIP_VEGA20:
7098 adev->gds.gds_compute_max_wave_id = 0x7ff;
7099 break;
7100 case CHIP_VEGA12:
7101 adev->gds.gds_compute_max_wave_id = 0x27f;
7102 break;
7103 case CHIP_RAVEN:
7104 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7105 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7106 else
7107 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7108 break;
7109 case CHIP_ARCTURUS:
7110 adev->gds.gds_compute_max_wave_id = 0xfff;
7111 break;
7112 case CHIP_ALDEBARAN:
7113 /* deprecated for Aldebaran, no usage at all */
7114 adev->gds.gds_compute_max_wave_id = 0;
7115 break;
7116 default:
7117 /* this really depends on the chip */
7118 adev->gds.gds_compute_max_wave_id = 0x7ff;
7119 break;
7120 }
7121
7122 adev->gds.gws_size = 64;
7123 adev->gds.oa_size = 16;
7124 }
7125
7126 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7127 u32 bitmap)
7128 {
7129 u32 data;
7130
7131 if (!bitmap)
7132 return;
7133
7134 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7135 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7136
7137 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7138 }
7139
7140 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7141 {
7142 u32 data, mask;
7143
7144 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7145 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7146
7147 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7148 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7149
7150 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7151
7152 return (~data) & mask;
7153 }
7154
7155 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7156 struct amdgpu_cu_info *cu_info)
7157 {
7158 int i, j, k, counter, active_cu_number = 0;
7159 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7160 unsigned disable_masks[4 * 4];
7161
7162 if (!adev || !cu_info)
7163 return -EINVAL;
7164
7165 /*
7166 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7167 */
7168 if (adev->gfx.config.max_shader_engines *
7169 adev->gfx.config.max_sh_per_se > 16)
7170 return -EINVAL;
7171
7172 amdgpu_gfx_parse_disable_cu(disable_masks,
7173 adev->gfx.config.max_shader_engines,
7174 adev->gfx.config.max_sh_per_se);
7175
7176 mutex_lock(&adev->grbm_idx_mutex);
7177 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7178 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7179 mask = 1;
7180 ao_bitmap = 0;
7181 counter = 0;
7182 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7183 gfx_v9_0_set_user_cu_inactive_bitmap(
7184 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7185 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7186
7187 /*
7188 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7189 * 4x4 size array, and it's usually suitable for Vega
7190 * ASICs which has 4*2 SE/SH layout.
7191 * But for Arcturus, SE/SH layout is changed to 8*1.
7192 * To mostly reduce the impact, we make it compatible
7193 * with current bitmap array as below:
7194 * SE4,SH0 --> bitmap[0][1]
7195 * SE5,SH0 --> bitmap[1][1]
7196 * SE6,SH0 --> bitmap[2][1]
7197 * SE7,SH0 --> bitmap[3][1]
7198 */
7199 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7200
7201 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7202 if (bitmap & mask) {
7203 if (counter < adev->gfx.config.max_cu_per_sh)
7204 ao_bitmap |= mask;
7205 counter ++;
7206 }
7207 mask <<= 1;
7208 }
7209 active_cu_number += counter;
7210 if (i < 2 && j < 2)
7211 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7212 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7213 }
7214 }
7215 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7216 mutex_unlock(&adev->grbm_idx_mutex);
7217
7218 cu_info->number = active_cu_number;
7219 cu_info->ao_cu_mask = ao_cu_mask;
7220 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7221
7222 return 0;
7223 }
7224
7225 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7226 {
7227 .type = AMD_IP_BLOCK_TYPE_GFX,
7228 .major = 9,
7229 .minor = 0,
7230 .rev = 0,
7231 .funcs = &gfx_v9_0_ip_funcs,
7232 };