]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'v5.3-rc3' into drm-next-5.4
[mirror_ubuntu-focal-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS 1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS 0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
110
111 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
113 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
115 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
117 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
119 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
121 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
123
124 enum ta_ras_gfx_subblock {
125 /*CPC*/
126 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
127 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
128 TA_RAS_BLOCK__GFX_CPC_UCODE,
129 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
130 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
131 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
132 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
133 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
134 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
135 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
136 /* CPF*/
137 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
138 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
139 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
140 TA_RAS_BLOCK__GFX_CPF_TAG,
141 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
142 /* CPG*/
143 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
144 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
145 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
146 TA_RAS_BLOCK__GFX_CPG_TAG,
147 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
148 /* GDS*/
149 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
150 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
151 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
152 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
153 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
154 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
155 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
156 /* SPI*/
157 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
158 /* SQ*/
159 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
160 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
161 TA_RAS_BLOCK__GFX_SQ_LDS_D,
162 TA_RAS_BLOCK__GFX_SQ_LDS_I,
163 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
164 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
165 /* SQC (3 ranges)*/
166 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
167 /* SQC range 0*/
168 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
169 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
170 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
171 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
172 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
173 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
174 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
175 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
176 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
177 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
178 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
179 /* SQC range 1*/
180 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
181 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
182 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
183 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
184 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
185 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
186 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
187 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
188 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
189 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
190 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
191 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
192 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
193 /* SQC range 2*/
194 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
195 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
196 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
197 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
198 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
199 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
201 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
203 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
204 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
205 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
206 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
207 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
208 /* TA*/
209 TA_RAS_BLOCK__GFX_TA_INDEX_START,
210 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
211 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
212 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
213 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
214 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
215 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
216 /* TCA*/
217 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
218 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
219 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
220 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
221 /* TCC (5 sub-ranges)*/
222 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
223 /* TCC range 0*/
224 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
225 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
226 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
227 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
228 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
229 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
230 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
231 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
232 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
233 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
234 /* TCC range 1*/
235 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
236 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
237 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
238 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
239 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
240 /* TCC range 2*/
241 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
242 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
243 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
244 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
245 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
246 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
247 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
248 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
249 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
250 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
251 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
252 /* TCC range 3*/
253 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
254 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
255 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
256 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
257 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
258 /* TCC range 4*/
259 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
260 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
261 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
262 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
263 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
264 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
265 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
266 /* TCI*/
267 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
268 /* TCP*/
269 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
270 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
271 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
272 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
273 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
274 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
275 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
276 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
277 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
278 /* TD*/
279 TA_RAS_BLOCK__GFX_TD_INDEX_START,
280 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
281 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
282 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
283 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
284 /* EA (3 sub-ranges)*/
285 TA_RAS_BLOCK__GFX_EA_INDEX_START,
286 /* EA range 0*/
287 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
288 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
289 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
290 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
291 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
292 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
293 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
294 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
295 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
296 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
297 /* EA range 1*/
298 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
299 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
300 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
301 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
302 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
303 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
304 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
305 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
306 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
307 /* EA range 2*/
308 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
309 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
310 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
311 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
312 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
313 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
314 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
315 /* UTC VM L2 bank*/
316 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
317 /* UTC VM walker*/
318 TA_RAS_BLOCK__UTC_VML2_WALKER,
319 /* UTC ATC L2 2MB cache*/
320 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
321 /* UTC ATC L2 4KB cache*/
322 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
323 TA_RAS_BLOCK__GFX_MAX
324 };
325
326 struct ras_gfx_subblock {
327 unsigned char *name;
328 int ta_subblock;
329 int hw_supported_error_type;
330 int sw_supported_error_type;
331 };
332
333 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
334 [AMDGPU_RAS_BLOCK__##subblock] = { \
335 #subblock, \
336 TA_RAS_BLOCK__##subblock, \
337 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
338 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
339 }
340
341 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
342 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
343 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
344 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
345 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
346 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
347 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
348 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
349 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
350 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
351 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
352 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
353 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
354 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
355 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
356 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
357 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
358 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
359 0),
360 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
361 0),
362 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
363 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
364 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
365 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
366 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
367 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
368 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
369 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
370 0, 0),
371 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
372 0),
373 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
374 0, 0),
375 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
376 0),
377 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
378 0, 0),
379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
380 0),
381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
382 1),
383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
384 0, 0, 0),
385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
386 0),
387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
388 0),
389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
390 0),
391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
392 0),
393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
394 0),
395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
396 0, 0),
397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 0),
399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
400 0),
401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
402 0, 0, 0),
403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 0),
405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
406 0),
407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
408 0),
409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 0),
411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412 0),
413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
414 0, 0),
415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 0),
417 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
418 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
419 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
420 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
421 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
422 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
423 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
424 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
425 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
426 1),
427 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
428 1),
429 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
430 1),
431 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
432 0),
433 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
434 0),
435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
436 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
438 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
440 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
444 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
447 0),
448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
450 0),
451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
452 0, 0),
453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
454 0),
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
456 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
457 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
458 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
460 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
461 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
462 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
463 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
464 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
465 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
467 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
469 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
470 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
471 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
472 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
473 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
474 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
475 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
486 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
488 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
489 };
490
491 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
492 {
493 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
494 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
495 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
496 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
497 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
498 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
499 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
500 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
501 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
502 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
513 };
514
515 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
516 {
517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
535 };
536
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
538 {
539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
550 };
551
552 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
553 {
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
578 };
579
580 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
581 {
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
589 };
590
591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
592 {
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 };
613
614 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
615 {
616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
619 };
620
621 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
622 {
623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
639 };
640
641 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
642 {
643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
656 };
657
658 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
659 {
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
668 };
669
670 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
671 {
672 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
673 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
674 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
675 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
676 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
677 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
678 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
679 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
680 };
681
682 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
683 {
684 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
685 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
686 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
687 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
688 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
689 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
690 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
691 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
692 };
693
694 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
695 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
696 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
697 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
698
699 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
700 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
701 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
702 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
703 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
704 struct amdgpu_cu_info *cu_info);
705 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
706 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
707 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
708 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
709 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
710 void *ras_error_status);
711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
712 void *inject_if);
713
714 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
715 {
716 switch (adev->asic_type) {
717 case CHIP_VEGA10:
718 soc15_program_register_sequence(adev,
719 golden_settings_gc_9_0,
720 ARRAY_SIZE(golden_settings_gc_9_0));
721 soc15_program_register_sequence(adev,
722 golden_settings_gc_9_0_vg10,
723 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
724 break;
725 case CHIP_VEGA12:
726 soc15_program_register_sequence(adev,
727 golden_settings_gc_9_2_1,
728 ARRAY_SIZE(golden_settings_gc_9_2_1));
729 soc15_program_register_sequence(adev,
730 golden_settings_gc_9_2_1_vg12,
731 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
732 break;
733 case CHIP_VEGA20:
734 soc15_program_register_sequence(adev,
735 golden_settings_gc_9_0,
736 ARRAY_SIZE(golden_settings_gc_9_0));
737 soc15_program_register_sequence(adev,
738 golden_settings_gc_9_0_vg20,
739 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
740 break;
741 case CHIP_ARCTURUS:
742 soc15_program_register_sequence(adev,
743 golden_settings_gc_9_4_1_arct,
744 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
745 break;
746 case CHIP_RAVEN:
747 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
748 ARRAY_SIZE(golden_settings_gc_9_1));
749 if (adev->rev_id >= 8)
750 soc15_program_register_sequence(adev,
751 golden_settings_gc_9_1_rv2,
752 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
753 else
754 soc15_program_register_sequence(adev,
755 golden_settings_gc_9_1_rv1,
756 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
757 break;
758 default:
759 break;
760 }
761
762 if (adev->asic_type != CHIP_ARCTURUS)
763 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
764 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
765 }
766
767 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
768 {
769 adev->gfx.scratch.num_reg = 8;
770 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
775 bool wc, uint32_t reg, uint32_t val)
776 {
777 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
778 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
779 WRITE_DATA_DST_SEL(0) |
780 (wc ? WR_CONFIRM : 0));
781 amdgpu_ring_write(ring, reg);
782 amdgpu_ring_write(ring, 0);
783 amdgpu_ring_write(ring, val);
784 }
785
786 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
787 int mem_space, int opt, uint32_t addr0,
788 uint32_t addr1, uint32_t ref, uint32_t mask,
789 uint32_t inv)
790 {
791 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
792 amdgpu_ring_write(ring,
793 /* memory (1) or register (0) */
794 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
795 WAIT_REG_MEM_OPERATION(opt) | /* wait */
796 WAIT_REG_MEM_FUNCTION(3) | /* equal */
797 WAIT_REG_MEM_ENGINE(eng_sel)));
798
799 if (mem_space)
800 BUG_ON(addr0 & 0x3); /* Dword align */
801 amdgpu_ring_write(ring, addr0);
802 amdgpu_ring_write(ring, addr1);
803 amdgpu_ring_write(ring, ref);
804 amdgpu_ring_write(ring, mask);
805 amdgpu_ring_write(ring, inv); /* poll interval */
806 }
807
808 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
809 {
810 struct amdgpu_device *adev = ring->adev;
811 uint32_t scratch;
812 uint32_t tmp = 0;
813 unsigned i;
814 int r;
815
816 r = amdgpu_gfx_scratch_get(adev, &scratch);
817 if (r)
818 return r;
819
820 WREG32(scratch, 0xCAFEDEAD);
821 r = amdgpu_ring_alloc(ring, 3);
822 if (r)
823 goto error_free_scratch;
824
825 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
826 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
827 amdgpu_ring_write(ring, 0xDEADBEEF);
828 amdgpu_ring_commit(ring);
829
830 for (i = 0; i < adev->usec_timeout; i++) {
831 tmp = RREG32(scratch);
832 if (tmp == 0xDEADBEEF)
833 break;
834 udelay(1);
835 }
836
837 if (i >= adev->usec_timeout)
838 r = -ETIMEDOUT;
839
840 error_free_scratch:
841 amdgpu_gfx_scratch_free(adev, scratch);
842 return r;
843 }
844
845 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
846 {
847 struct amdgpu_device *adev = ring->adev;
848 struct amdgpu_ib ib;
849 struct dma_fence *f = NULL;
850
851 unsigned index;
852 uint64_t gpu_addr;
853 uint32_t tmp;
854 long r;
855
856 r = amdgpu_device_wb_get(adev, &index);
857 if (r)
858 return r;
859
860 gpu_addr = adev->wb.gpu_addr + (index * 4);
861 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
862 memset(&ib, 0, sizeof(ib));
863 r = amdgpu_ib_get(adev, NULL, 16, &ib);
864 if (r)
865 goto err1;
866
867 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
868 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
869 ib.ptr[2] = lower_32_bits(gpu_addr);
870 ib.ptr[3] = upper_32_bits(gpu_addr);
871 ib.ptr[4] = 0xDEADBEEF;
872 ib.length_dw = 5;
873
874 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
875 if (r)
876 goto err2;
877
878 r = dma_fence_wait_timeout(f, false, timeout);
879 if (r == 0) {
880 r = -ETIMEDOUT;
881 goto err2;
882 } else if (r < 0) {
883 goto err2;
884 }
885
886 tmp = adev->wb.wb[index];
887 if (tmp == 0xDEADBEEF)
888 r = 0;
889 else
890 r = -EINVAL;
891
892 err2:
893 amdgpu_ib_free(adev, &ib, NULL);
894 dma_fence_put(f);
895 err1:
896 amdgpu_device_wb_free(adev, index);
897 return r;
898 }
899
900
901 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
902 {
903 release_firmware(adev->gfx.pfp_fw);
904 adev->gfx.pfp_fw = NULL;
905 release_firmware(adev->gfx.me_fw);
906 adev->gfx.me_fw = NULL;
907 release_firmware(adev->gfx.ce_fw);
908 adev->gfx.ce_fw = NULL;
909 release_firmware(adev->gfx.rlc_fw);
910 adev->gfx.rlc_fw = NULL;
911 release_firmware(adev->gfx.mec_fw);
912 adev->gfx.mec_fw = NULL;
913 release_firmware(adev->gfx.mec2_fw);
914 adev->gfx.mec2_fw = NULL;
915
916 kfree(adev->gfx.rlc.register_list_format);
917 }
918
919 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
920 {
921 const struct rlc_firmware_header_v2_1 *rlc_hdr;
922
923 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
924 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
925 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
926 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
927 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
928 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
929 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
930 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
931 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
932 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
933 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
934 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
935 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
936 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
937 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
938 }
939
940 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
941 {
942 adev->gfx.me_fw_write_wait = false;
943 adev->gfx.mec_fw_write_wait = false;
944
945 switch (adev->asic_type) {
946 case CHIP_VEGA10:
947 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
948 (adev->gfx.me_feature_version >= 42) &&
949 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
950 (adev->gfx.pfp_feature_version >= 42))
951 adev->gfx.me_fw_write_wait = true;
952
953 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
954 (adev->gfx.mec_feature_version >= 42))
955 adev->gfx.mec_fw_write_wait = true;
956 break;
957 case CHIP_VEGA12:
958 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
959 (adev->gfx.me_feature_version >= 44) &&
960 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
961 (adev->gfx.pfp_feature_version >= 44))
962 adev->gfx.me_fw_write_wait = true;
963
964 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
965 (adev->gfx.mec_feature_version >= 44))
966 adev->gfx.mec_fw_write_wait = true;
967 break;
968 case CHIP_VEGA20:
969 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
970 (adev->gfx.me_feature_version >= 44) &&
971 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
972 (adev->gfx.pfp_feature_version >= 44))
973 adev->gfx.me_fw_write_wait = true;
974
975 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
976 (adev->gfx.mec_feature_version >= 44))
977 adev->gfx.mec_fw_write_wait = true;
978 break;
979 case CHIP_RAVEN:
980 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
981 (adev->gfx.me_feature_version >= 42) &&
982 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
983 (adev->gfx.pfp_feature_version >= 42))
984 adev->gfx.me_fw_write_wait = true;
985
986 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
987 (adev->gfx.mec_feature_version >= 42))
988 adev->gfx.mec_fw_write_wait = true;
989 break;
990 default:
991 break;
992 }
993 }
994
995 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
996 {
997 switch (adev->asic_type) {
998 case CHIP_VEGA10:
999 case CHIP_VEGA12:
1000 case CHIP_VEGA20:
1001 break;
1002 case CHIP_RAVEN:
1003 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1004 break;
1005 if ((adev->gfx.rlc_fw_version != 106 &&
1006 adev->gfx.rlc_fw_version < 531) ||
1007 (adev->gfx.rlc_fw_version == 53815) ||
1008 (adev->gfx.rlc_feature_version < 1) ||
1009 !adev->gfx.rlc.is_rlc_v2_1)
1010 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1011 break;
1012 default:
1013 break;
1014 }
1015 }
1016
1017 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1018 const char *chip_name)
1019 {
1020 char fw_name[30];
1021 int err;
1022 struct amdgpu_firmware_info *info = NULL;
1023 const struct common_firmware_header *header = NULL;
1024 const struct gfx_firmware_header_v1_0 *cp_hdr;
1025
1026 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1027 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1028 if (err)
1029 goto out;
1030 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1031 if (err)
1032 goto out;
1033 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1034 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1036
1037 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1038 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1039 if (err)
1040 goto out;
1041 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1042 if (err)
1043 goto out;
1044 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1045 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1046 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1047
1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050 if (err)
1051 goto out;
1052 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1053 if (err)
1054 goto out;
1055 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1056 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1057 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1058
1059 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1060 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1061 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1062 info->fw = adev->gfx.pfp_fw;
1063 header = (const struct common_firmware_header *)info->fw->data;
1064 adev->firmware.fw_size +=
1065 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1066
1067 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1068 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1069 info->fw = adev->gfx.me_fw;
1070 header = (const struct common_firmware_header *)info->fw->data;
1071 adev->firmware.fw_size +=
1072 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1073
1074 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1075 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1076 info->fw = adev->gfx.ce_fw;
1077 header = (const struct common_firmware_header *)info->fw->data;
1078 adev->firmware.fw_size +=
1079 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1080 }
1081
1082 out:
1083 if (err) {
1084 dev_err(adev->dev,
1085 "gfx9: Failed to load firmware \"%s\"\n",
1086 fw_name);
1087 release_firmware(adev->gfx.pfp_fw);
1088 adev->gfx.pfp_fw = NULL;
1089 release_firmware(adev->gfx.me_fw);
1090 adev->gfx.me_fw = NULL;
1091 release_firmware(adev->gfx.ce_fw);
1092 adev->gfx.ce_fw = NULL;
1093 }
1094 return err;
1095 }
1096
1097 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1098 const char *chip_name)
1099 {
1100 char fw_name[30];
1101 int err;
1102 struct amdgpu_firmware_info *info = NULL;
1103 const struct common_firmware_header *header = NULL;
1104 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1105 unsigned int *tmp = NULL;
1106 unsigned int i = 0;
1107 uint16_t version_major;
1108 uint16_t version_minor;
1109 uint32_t smu_version;
1110
1111 /*
1112 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1113 * instead of picasso_rlc.bin.
1114 * Judgment method:
1115 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1116 * or revision >= 0xD8 && revision <= 0xDF
1117 * otherwise is PCO FP5
1118 */
1119 if (!strcmp(chip_name, "picasso") &&
1120 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1121 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1122 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1123 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1124 (smu_version >= 0x41e2b))
1125 /**
1126 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1127 */
1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1129 else
1130 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1131 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1132 if (err)
1133 goto out;
1134 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1135 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1136
1137 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1138 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1139 if (version_major == 2 && version_minor == 1)
1140 adev->gfx.rlc.is_rlc_v2_1 = true;
1141
1142 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1143 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1144 adev->gfx.rlc.save_and_restore_offset =
1145 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1146 adev->gfx.rlc.clear_state_descriptor_offset =
1147 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1148 adev->gfx.rlc.avail_scratch_ram_locations =
1149 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1150 adev->gfx.rlc.reg_restore_list_size =
1151 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1152 adev->gfx.rlc.reg_list_format_start =
1153 le32_to_cpu(rlc_hdr->reg_list_format_start);
1154 adev->gfx.rlc.reg_list_format_separate_start =
1155 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1156 adev->gfx.rlc.starting_offsets_start =
1157 le32_to_cpu(rlc_hdr->starting_offsets_start);
1158 adev->gfx.rlc.reg_list_format_size_bytes =
1159 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1160 adev->gfx.rlc.reg_list_size_bytes =
1161 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1162 adev->gfx.rlc.register_list_format =
1163 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1164 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1165 if (!adev->gfx.rlc.register_list_format) {
1166 err = -ENOMEM;
1167 goto out;
1168 }
1169
1170 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1171 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1172 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1173 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1174
1175 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1176
1177 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1178 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1179 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1180 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1181
1182 if (adev->gfx.rlc.is_rlc_v2_1)
1183 gfx_v9_0_init_rlc_ext_microcode(adev);
1184
1185 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1186 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188 info->fw = adev->gfx.rlc_fw;
1189 header = (const struct common_firmware_header *)info->fw->data;
1190 adev->firmware.fw_size +=
1191 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193 if (adev->gfx.rlc.is_rlc_v2_1 &&
1194 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1195 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1196 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1197 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1198 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1199 info->fw = adev->gfx.rlc_fw;
1200 adev->firmware.fw_size +=
1201 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1202
1203 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1204 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1205 info->fw = adev->gfx.rlc_fw;
1206 adev->firmware.fw_size +=
1207 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1208
1209 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1210 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1211 info->fw = adev->gfx.rlc_fw;
1212 adev->firmware.fw_size +=
1213 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1214 }
1215 }
1216
1217 out:
1218 if (err) {
1219 dev_err(adev->dev,
1220 "gfx9: Failed to load firmware \"%s\"\n",
1221 fw_name);
1222 release_firmware(adev->gfx.rlc_fw);
1223 adev->gfx.rlc_fw = NULL;
1224 }
1225 return err;
1226 }
1227
1228 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1229 const char *chip_name)
1230 {
1231 char fw_name[30];
1232 int err;
1233 struct amdgpu_firmware_info *info = NULL;
1234 const struct common_firmware_header *header = NULL;
1235 const struct gfx_firmware_header_v1_0 *cp_hdr;
1236
1237 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1238 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1239 if (err)
1240 goto out;
1241 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1242 if (err)
1243 goto out;
1244 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1245 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1246 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1247
1248
1249 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1250 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1251 if (!err) {
1252 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1253 if (err)
1254 goto out;
1255 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1256 adev->gfx.mec2_fw->data;
1257 adev->gfx.mec2_fw_version =
1258 le32_to_cpu(cp_hdr->header.ucode_version);
1259 adev->gfx.mec2_feature_version =
1260 le32_to_cpu(cp_hdr->ucode_feature_version);
1261 } else {
1262 err = 0;
1263 adev->gfx.mec2_fw = NULL;
1264 }
1265
1266 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1267 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1268 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1269 info->fw = adev->gfx.mec_fw;
1270 header = (const struct common_firmware_header *)info->fw->data;
1271 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1272 adev->firmware.fw_size +=
1273 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1274
1275 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1276 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1277 info->fw = adev->gfx.mec_fw;
1278 adev->firmware.fw_size +=
1279 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1280
1281 if (adev->gfx.mec2_fw) {
1282 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1283 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1284 info->fw = adev->gfx.mec2_fw;
1285 header = (const struct common_firmware_header *)info->fw->data;
1286 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1287 adev->firmware.fw_size +=
1288 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1289
1290 /* TODO: Determine if MEC2 JT FW loading can be removed
1291 for all GFX V9 asic and above */
1292 if (adev->asic_type != CHIP_ARCTURUS) {
1293 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1294 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1295 info->fw = adev->gfx.mec2_fw;
1296 adev->firmware.fw_size +=
1297 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1298 PAGE_SIZE);
1299 }
1300 }
1301 }
1302
1303 out:
1304 gfx_v9_0_check_if_need_gfxoff(adev);
1305 gfx_v9_0_check_fw_write_wait(adev);
1306 if (err) {
1307 dev_err(adev->dev,
1308 "gfx9: Failed to load firmware \"%s\"\n",
1309 fw_name);
1310 release_firmware(adev->gfx.mec_fw);
1311 adev->gfx.mec_fw = NULL;
1312 release_firmware(adev->gfx.mec2_fw);
1313 adev->gfx.mec2_fw = NULL;
1314 }
1315 return err;
1316 }
1317
1318 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1319 {
1320 const char *chip_name;
1321 int r;
1322
1323 DRM_DEBUG("\n");
1324
1325 switch (adev->asic_type) {
1326 case CHIP_VEGA10:
1327 chip_name = "vega10";
1328 break;
1329 case CHIP_VEGA12:
1330 chip_name = "vega12";
1331 break;
1332 case CHIP_VEGA20:
1333 chip_name = "vega20";
1334 break;
1335 case CHIP_RAVEN:
1336 if (adev->rev_id >= 8)
1337 chip_name = "raven2";
1338 else if (adev->pdev->device == 0x15d8)
1339 chip_name = "picasso";
1340 else
1341 chip_name = "raven";
1342 break;
1343 case CHIP_ARCTURUS:
1344 chip_name = "arcturus";
1345 break;
1346 default:
1347 BUG();
1348 }
1349
1350 /* No CPG in Arcturus */
1351 if (adev->asic_type != CHIP_ARCTURUS) {
1352 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1353 if (r)
1354 return r;
1355 }
1356
1357 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1358 if (r)
1359 return r;
1360
1361 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1362 if (r)
1363 return r;
1364
1365 return r;
1366 }
1367
1368 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1369 {
1370 u32 count = 0;
1371 const struct cs_section_def *sect = NULL;
1372 const struct cs_extent_def *ext = NULL;
1373
1374 /* begin clear state */
1375 count += 2;
1376 /* context control state */
1377 count += 3;
1378
1379 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1380 for (ext = sect->section; ext->extent != NULL; ++ext) {
1381 if (sect->id == SECT_CONTEXT)
1382 count += 2 + ext->reg_count;
1383 else
1384 return 0;
1385 }
1386 }
1387
1388 /* end clear state */
1389 count += 2;
1390 /* clear state */
1391 count += 2;
1392
1393 return count;
1394 }
1395
1396 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1397 volatile u32 *buffer)
1398 {
1399 u32 count = 0, i;
1400 const struct cs_section_def *sect = NULL;
1401 const struct cs_extent_def *ext = NULL;
1402
1403 if (adev->gfx.rlc.cs_data == NULL)
1404 return;
1405 if (buffer == NULL)
1406 return;
1407
1408 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1409 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1410
1411 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1412 buffer[count++] = cpu_to_le32(0x80000000);
1413 buffer[count++] = cpu_to_le32(0x80000000);
1414
1415 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1416 for (ext = sect->section; ext->extent != NULL; ++ext) {
1417 if (sect->id == SECT_CONTEXT) {
1418 buffer[count++] =
1419 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1420 buffer[count++] = cpu_to_le32(ext->reg_index -
1421 PACKET3_SET_CONTEXT_REG_START);
1422 for (i = 0; i < ext->reg_count; i++)
1423 buffer[count++] = cpu_to_le32(ext->extent[i]);
1424 } else {
1425 return;
1426 }
1427 }
1428 }
1429
1430 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1431 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1432
1433 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1434 buffer[count++] = cpu_to_le32(0);
1435 }
1436
1437 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1438 {
1439 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1440 uint32_t pg_always_on_cu_num = 2;
1441 uint32_t always_on_cu_num;
1442 uint32_t i, j, k;
1443 uint32_t mask, cu_bitmap, counter;
1444
1445 if (adev->flags & AMD_IS_APU)
1446 always_on_cu_num = 4;
1447 else if (adev->asic_type == CHIP_VEGA12)
1448 always_on_cu_num = 8;
1449 else
1450 always_on_cu_num = 12;
1451
1452 mutex_lock(&adev->grbm_idx_mutex);
1453 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1454 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1455 mask = 1;
1456 cu_bitmap = 0;
1457 counter = 0;
1458 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1459
1460 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1461 if (cu_info->bitmap[i][j] & mask) {
1462 if (counter == pg_always_on_cu_num)
1463 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1464 if (counter < always_on_cu_num)
1465 cu_bitmap |= mask;
1466 else
1467 break;
1468 counter++;
1469 }
1470 mask <<= 1;
1471 }
1472
1473 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1474 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1475 }
1476 }
1477 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1478 mutex_unlock(&adev->grbm_idx_mutex);
1479 }
1480
1481 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1482 {
1483 uint32_t data;
1484
1485 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1486 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1487 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1488 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1489 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1490
1491 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1492 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1493
1494 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1495 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1496
1497 mutex_lock(&adev->grbm_idx_mutex);
1498 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1499 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1500 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1501
1502 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1503 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1504 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1505 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1506 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1507
1508 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1509 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1510 data &= 0x0000FFFF;
1511 data |= 0x00C00000;
1512 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1513
1514 /*
1515 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1516 * programmed in gfx_v9_0_init_always_on_cu_mask()
1517 */
1518
1519 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1520 * but used for RLC_LB_CNTL configuration */
1521 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1522 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1523 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1524 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1525 mutex_unlock(&adev->grbm_idx_mutex);
1526
1527 gfx_v9_0_init_always_on_cu_mask(adev);
1528 }
1529
1530 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1531 {
1532 uint32_t data;
1533
1534 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1535 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1536 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1537 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1538 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1539
1540 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1541 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1542
1543 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1544 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1545
1546 mutex_lock(&adev->grbm_idx_mutex);
1547 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1548 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1549 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1550
1551 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1552 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1553 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1554 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1555 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1556
1557 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1558 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1559 data &= 0x0000FFFF;
1560 data |= 0x00C00000;
1561 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1562
1563 /*
1564 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1565 * programmed in gfx_v9_0_init_always_on_cu_mask()
1566 */
1567
1568 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1569 * but used for RLC_LB_CNTL configuration */
1570 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1571 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1572 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1573 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1574 mutex_unlock(&adev->grbm_idx_mutex);
1575
1576 gfx_v9_0_init_always_on_cu_mask(adev);
1577 }
1578
1579 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1580 {
1581 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1582 }
1583
1584 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1585 {
1586 return 5;
1587 }
1588
1589 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1590 {
1591 const struct cs_section_def *cs_data;
1592 int r;
1593
1594 adev->gfx.rlc.cs_data = gfx9_cs_data;
1595
1596 cs_data = adev->gfx.rlc.cs_data;
1597
1598 if (cs_data) {
1599 /* init clear state block */
1600 r = amdgpu_gfx_rlc_init_csb(adev);
1601 if (r)
1602 return r;
1603 }
1604
1605 if (adev->asic_type == CHIP_RAVEN) {
1606 /* TODO: double check the cp_table_size for RV */
1607 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1608 r = amdgpu_gfx_rlc_init_cpt(adev);
1609 if (r)
1610 return r;
1611 }
1612
1613 switch (adev->asic_type) {
1614 case CHIP_RAVEN:
1615 gfx_v9_0_init_lbpw(adev);
1616 break;
1617 case CHIP_VEGA20:
1618 gfx_v9_4_init_lbpw(adev);
1619 break;
1620 default:
1621 break;
1622 }
1623
1624 return 0;
1625 }
1626
1627 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1628 {
1629 int r;
1630
1631 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1632 if (unlikely(r != 0))
1633 return r;
1634
1635 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1636 AMDGPU_GEM_DOMAIN_VRAM);
1637 if (!r)
1638 adev->gfx.rlc.clear_state_gpu_addr =
1639 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1640
1641 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1642
1643 return r;
1644 }
1645
1646 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1647 {
1648 int r;
1649
1650 if (!adev->gfx.rlc.clear_state_obj)
1651 return;
1652
1653 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1654 if (likely(r == 0)) {
1655 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1656 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1657 }
1658 }
1659
1660 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1661 {
1662 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1663 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1664 }
1665
1666 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1667 {
1668 int r;
1669 u32 *hpd;
1670 const __le32 *fw_data;
1671 unsigned fw_size;
1672 u32 *fw;
1673 size_t mec_hpd_size;
1674
1675 const struct gfx_firmware_header_v1_0 *mec_hdr;
1676
1677 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1678
1679 /* take ownership of the relevant compute queues */
1680 amdgpu_gfx_compute_queue_acquire(adev);
1681 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1682
1683 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1684 AMDGPU_GEM_DOMAIN_VRAM,
1685 &adev->gfx.mec.hpd_eop_obj,
1686 &adev->gfx.mec.hpd_eop_gpu_addr,
1687 (void **)&hpd);
1688 if (r) {
1689 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1690 gfx_v9_0_mec_fini(adev);
1691 return r;
1692 }
1693
1694 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1695
1696 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1697 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1698
1699 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1700
1701 fw_data = (const __le32 *)
1702 (adev->gfx.mec_fw->data +
1703 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1704 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1705
1706 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1707 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1708 &adev->gfx.mec.mec_fw_obj,
1709 &adev->gfx.mec.mec_fw_gpu_addr,
1710 (void **)&fw);
1711 if (r) {
1712 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1713 gfx_v9_0_mec_fini(adev);
1714 return r;
1715 }
1716
1717 memcpy(fw, fw_data, fw_size);
1718
1719 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1720 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1721
1722 return 0;
1723 }
1724
1725 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1726 {
1727 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1728 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1729 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1730 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1731 (SQ_IND_INDEX__FORCE_READ_MASK));
1732 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1733 }
1734
1735 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1736 uint32_t wave, uint32_t thread,
1737 uint32_t regno, uint32_t num, uint32_t *out)
1738 {
1739 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1740 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1741 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1742 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1743 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1744 (SQ_IND_INDEX__FORCE_READ_MASK) |
1745 (SQ_IND_INDEX__AUTO_INCR_MASK));
1746 while (num--)
1747 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1748 }
1749
1750 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1751 {
1752 /* type 1 wave data */
1753 dst[(*no_fields)++] = 1;
1754 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1755 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1756 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1757 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1758 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1759 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1760 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1761 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1762 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1763 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1764 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1765 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1766 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1767 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1768 }
1769
1770 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1771 uint32_t wave, uint32_t start,
1772 uint32_t size, uint32_t *dst)
1773 {
1774 wave_read_regs(
1775 adev, simd, wave, 0,
1776 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1777 }
1778
1779 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1780 uint32_t wave, uint32_t thread,
1781 uint32_t start, uint32_t size,
1782 uint32_t *dst)
1783 {
1784 wave_read_regs(
1785 adev, simd, wave, thread,
1786 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1787 }
1788
1789 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1790 u32 me, u32 pipe, u32 q, u32 vm)
1791 {
1792 soc15_grbm_select(adev, me, pipe, q, vm);
1793 }
1794
1795 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1796 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1797 .select_se_sh = &gfx_v9_0_select_se_sh,
1798 .read_wave_data = &gfx_v9_0_read_wave_data,
1799 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1800 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1801 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1802 .ras_error_inject = &gfx_v9_0_ras_error_inject,
1803 .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1804 };
1805
1806 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1807 {
1808 u32 gb_addr_config;
1809 int err;
1810
1811 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1812
1813 switch (adev->asic_type) {
1814 case CHIP_VEGA10:
1815 adev->gfx.config.max_hw_contexts = 8;
1816 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1820 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1821 break;
1822 case CHIP_VEGA12:
1823 adev->gfx.config.max_hw_contexts = 8;
1824 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1825 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1826 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1827 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1828 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1829 DRM_INFO("fix gfx.config for vega12\n");
1830 break;
1831 case CHIP_VEGA20:
1832 adev->gfx.config.max_hw_contexts = 8;
1833 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1837 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1838 gb_addr_config &= ~0xf3e777ff;
1839 gb_addr_config |= 0x22014042;
1840 /* check vbios table if gpu info is not available */
1841 err = amdgpu_atomfirmware_get_gfx_info(adev);
1842 if (err)
1843 return err;
1844 break;
1845 case CHIP_RAVEN:
1846 adev->gfx.config.max_hw_contexts = 8;
1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1851 if (adev->rev_id >= 8)
1852 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1853 else
1854 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1855 break;
1856 case CHIP_ARCTURUS:
1857 adev->gfx.config.max_hw_contexts = 8;
1858 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1859 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1860 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1861 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1862 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1863 gb_addr_config &= ~0xf3e777ff;
1864 gb_addr_config |= 0x22014042;
1865 break;
1866 default:
1867 BUG();
1868 break;
1869 }
1870
1871 adev->gfx.config.gb_addr_config = gb_addr_config;
1872
1873 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1874 REG_GET_FIELD(
1875 adev->gfx.config.gb_addr_config,
1876 GB_ADDR_CONFIG,
1877 NUM_PIPES);
1878
1879 adev->gfx.config.max_tile_pipes =
1880 adev->gfx.config.gb_addr_config_fields.num_pipes;
1881
1882 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1883 REG_GET_FIELD(
1884 adev->gfx.config.gb_addr_config,
1885 GB_ADDR_CONFIG,
1886 NUM_BANKS);
1887 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1888 REG_GET_FIELD(
1889 adev->gfx.config.gb_addr_config,
1890 GB_ADDR_CONFIG,
1891 MAX_COMPRESSED_FRAGS);
1892 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1893 REG_GET_FIELD(
1894 adev->gfx.config.gb_addr_config,
1895 GB_ADDR_CONFIG,
1896 NUM_RB_PER_SE);
1897 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1898 REG_GET_FIELD(
1899 adev->gfx.config.gb_addr_config,
1900 GB_ADDR_CONFIG,
1901 NUM_SHADER_ENGINES);
1902 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1903 REG_GET_FIELD(
1904 adev->gfx.config.gb_addr_config,
1905 GB_ADDR_CONFIG,
1906 PIPE_INTERLEAVE_SIZE));
1907
1908 return 0;
1909 }
1910
1911 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1912 struct amdgpu_ngg_buf *ngg_buf,
1913 int size_se,
1914 int default_size_se)
1915 {
1916 int r;
1917
1918 if (size_se < 0) {
1919 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1920 return -EINVAL;
1921 }
1922 size_se = size_se ? size_se : default_size_se;
1923
1924 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1925 r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1926 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1927 &ngg_buf->bo,
1928 &ngg_buf->gpu_addr,
1929 NULL);
1930 if (r) {
1931 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1932 return r;
1933 }
1934 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1935
1936 return r;
1937 }
1938
1939 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1940 {
1941 int i;
1942
1943 for (i = 0; i < NGG_BUF_MAX; i++)
1944 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1945 &adev->gfx.ngg.buf[i].gpu_addr,
1946 NULL);
1947
1948 memset(&adev->gfx.ngg.buf[0], 0,
1949 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1950
1951 adev->gfx.ngg.init = false;
1952
1953 return 0;
1954 }
1955
1956 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1957 {
1958 int r;
1959
1960 if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1961 return 0;
1962
1963 /* GDS reserve memory: 64 bytes alignment */
1964 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1965 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1966 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1967 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1968
1969 /* Primitive Buffer */
1970 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1971 amdgpu_prim_buf_per_se,
1972 64 * 1024);
1973 if (r) {
1974 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1975 goto err;
1976 }
1977
1978 /* Position Buffer */
1979 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1980 amdgpu_pos_buf_per_se,
1981 256 * 1024);
1982 if (r) {
1983 dev_err(adev->dev, "Failed to create Position Buffer\n");
1984 goto err;
1985 }
1986
1987 /* Control Sideband */
1988 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1989 amdgpu_cntl_sb_buf_per_se,
1990 256);
1991 if (r) {
1992 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1993 goto err;
1994 }
1995
1996 /* Parameter Cache, not created by default */
1997 if (amdgpu_param_buf_per_se <= 0)
1998 goto out;
1999
2000 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2001 amdgpu_param_buf_per_se,
2002 512 * 1024);
2003 if (r) {
2004 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2005 goto err;
2006 }
2007
2008 out:
2009 adev->gfx.ngg.init = true;
2010 return 0;
2011 err:
2012 gfx_v9_0_ngg_fini(adev);
2013 return r;
2014 }
2015
2016 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2017 {
2018 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2019 int r;
2020 u32 data, base;
2021
2022 if (!amdgpu_ngg)
2023 return 0;
2024
2025 /* Program buffer size */
2026 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2027 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2028 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2029 adev->gfx.ngg.buf[NGG_POS].size >> 8);
2030 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2031
2032 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2033 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2034 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2035 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2036 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2037
2038 /* Program buffer base address */
2039 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2040 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2041 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2042
2043 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2044 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2045 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2046
2047 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2048 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2049 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2050
2051 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2052 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2053 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2054
2055 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2056 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2057 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2058
2059 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2060 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2061 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2062
2063 /* Clear GDS reserved memory */
2064 r = amdgpu_ring_alloc(ring, 17);
2065 if (r) {
2066 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2067 ring->name, r);
2068 return r;
2069 }
2070
2071 gfx_v9_0_write_data_to_reg(ring, 0, false,
2072 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2073 (adev->gds.gds_size +
2074 adev->gfx.ngg.gds_reserve_size));
2075
2076 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2077 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2078 PACKET3_DMA_DATA_DST_SEL(1) |
2079 PACKET3_DMA_DATA_SRC_SEL(2)));
2080 amdgpu_ring_write(ring, 0);
2081 amdgpu_ring_write(ring, 0);
2082 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2083 amdgpu_ring_write(ring, 0);
2084 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2085 adev->gfx.ngg.gds_reserve_size);
2086
2087 gfx_v9_0_write_data_to_reg(ring, 0, false,
2088 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2089
2090 amdgpu_ring_commit(ring);
2091
2092 return 0;
2093 }
2094
2095 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2096 int mec, int pipe, int queue)
2097 {
2098 int r;
2099 unsigned irq_type;
2100 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2101
2102 ring = &adev->gfx.compute_ring[ring_id];
2103
2104 /* mec0 is me1 */
2105 ring->me = mec + 1;
2106 ring->pipe = pipe;
2107 ring->queue = queue;
2108
2109 ring->ring_obj = NULL;
2110 ring->use_doorbell = true;
2111 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2112 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2113 + (ring_id * GFX9_MEC_HPD_SIZE);
2114 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2115
2116 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2117 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2118 + ring->pipe;
2119
2120 /* type-2 packets are deprecated on MEC, use type-3 instead */
2121 r = amdgpu_ring_init(adev, ring, 1024,
2122 &adev->gfx.eop_irq, irq_type);
2123 if (r)
2124 return r;
2125
2126
2127 return 0;
2128 }
2129
2130 static int gfx_v9_0_sw_init(void *handle)
2131 {
2132 int i, j, k, r, ring_id;
2133 struct amdgpu_ring *ring;
2134 struct amdgpu_kiq *kiq;
2135 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2136
2137 switch (adev->asic_type) {
2138 case CHIP_VEGA10:
2139 case CHIP_VEGA12:
2140 case CHIP_VEGA20:
2141 case CHIP_RAVEN:
2142 case CHIP_ARCTURUS:
2143 adev->gfx.mec.num_mec = 2;
2144 break;
2145 default:
2146 adev->gfx.mec.num_mec = 1;
2147 break;
2148 }
2149
2150 adev->gfx.mec.num_pipe_per_mec = 4;
2151 adev->gfx.mec.num_queue_per_pipe = 8;
2152
2153 /* EOP Event */
2154 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2155 if (r)
2156 return r;
2157
2158 /* Privileged reg */
2159 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2160 &adev->gfx.priv_reg_irq);
2161 if (r)
2162 return r;
2163
2164 /* Privileged inst */
2165 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2166 &adev->gfx.priv_inst_irq);
2167 if (r)
2168 return r;
2169
2170 /* ECC error */
2171 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2172 &adev->gfx.cp_ecc_error_irq);
2173 if (r)
2174 return r;
2175
2176 /* FUE error */
2177 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2178 &adev->gfx.cp_ecc_error_irq);
2179 if (r)
2180 return r;
2181
2182 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2183
2184 gfx_v9_0_scratch_init(adev);
2185
2186 r = gfx_v9_0_init_microcode(adev);
2187 if (r) {
2188 DRM_ERROR("Failed to load gfx firmware!\n");
2189 return r;
2190 }
2191
2192 r = adev->gfx.rlc.funcs->init(adev);
2193 if (r) {
2194 DRM_ERROR("Failed to init rlc BOs!\n");
2195 return r;
2196 }
2197
2198 r = gfx_v9_0_mec_init(adev);
2199 if (r) {
2200 DRM_ERROR("Failed to init MEC BOs!\n");
2201 return r;
2202 }
2203
2204 /* set up the gfx ring */
2205 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2206 ring = &adev->gfx.gfx_ring[i];
2207 ring->ring_obj = NULL;
2208 if (!i)
2209 sprintf(ring->name, "gfx");
2210 else
2211 sprintf(ring->name, "gfx_%d", i);
2212 ring->use_doorbell = true;
2213 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2214 r = amdgpu_ring_init(adev, ring, 1024,
2215 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2216 if (r)
2217 return r;
2218 }
2219
2220 /* set up the compute queues - allocate horizontally across pipes */
2221 ring_id = 0;
2222 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2223 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2224 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2225 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2226 continue;
2227
2228 r = gfx_v9_0_compute_ring_init(adev,
2229 ring_id,
2230 i, k, j);
2231 if (r)
2232 return r;
2233
2234 ring_id++;
2235 }
2236 }
2237 }
2238
2239 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2240 if (r) {
2241 DRM_ERROR("Failed to init KIQ BOs!\n");
2242 return r;
2243 }
2244
2245 kiq = &adev->gfx.kiq;
2246 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2247 if (r)
2248 return r;
2249
2250 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2251 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2252 if (r)
2253 return r;
2254
2255 adev->gfx.ce_ram_size = 0x8000;
2256
2257 r = gfx_v9_0_gpu_early_init(adev);
2258 if (r)
2259 return r;
2260
2261 r = gfx_v9_0_ngg_init(adev);
2262 if (r)
2263 return r;
2264
2265 return 0;
2266 }
2267
2268
2269 static int gfx_v9_0_sw_fini(void *handle)
2270 {
2271 int i;
2272 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2273
2274 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2275 adev->gfx.ras_if) {
2276 struct ras_common_if *ras_if = adev->gfx.ras_if;
2277 struct ras_ih_if ih_info = {
2278 .head = *ras_if,
2279 };
2280
2281 amdgpu_ras_debugfs_remove(adev, ras_if);
2282 amdgpu_ras_sysfs_remove(adev, ras_if);
2283 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
2284 amdgpu_ras_feature_enable(adev, ras_if, 0);
2285 kfree(ras_if);
2286 }
2287
2288 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2289 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2290 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2291 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2292
2293 amdgpu_gfx_mqd_sw_fini(adev);
2294 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2295 amdgpu_gfx_kiq_fini(adev);
2296
2297 gfx_v9_0_mec_fini(adev);
2298 gfx_v9_0_ngg_fini(adev);
2299 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2300 if (adev->asic_type == CHIP_RAVEN) {
2301 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2302 &adev->gfx.rlc.cp_table_gpu_addr,
2303 (void **)&adev->gfx.rlc.cp_table_ptr);
2304 }
2305 gfx_v9_0_free_microcode(adev);
2306
2307 return 0;
2308 }
2309
2310
2311 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2312 {
2313 /* TODO */
2314 }
2315
2316 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2317 {
2318 u32 data;
2319
2320 if (instance == 0xffffffff)
2321 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2322 else
2323 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2324
2325 if (se_num == 0xffffffff)
2326 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2327 else
2328 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2329
2330 if (sh_num == 0xffffffff)
2331 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2332 else
2333 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2334
2335 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2336 }
2337
2338 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2339 {
2340 u32 data, mask;
2341
2342 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2343 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2344
2345 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2346 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2347
2348 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2349 adev->gfx.config.max_sh_per_se);
2350
2351 return (~data) & mask;
2352 }
2353
2354 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2355 {
2356 int i, j;
2357 u32 data;
2358 u32 active_rbs = 0;
2359 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2360 adev->gfx.config.max_sh_per_se;
2361
2362 mutex_lock(&adev->grbm_idx_mutex);
2363 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2364 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2365 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2366 data = gfx_v9_0_get_rb_active_bitmap(adev);
2367 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2368 rb_bitmap_width_per_sh);
2369 }
2370 }
2371 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2372 mutex_unlock(&adev->grbm_idx_mutex);
2373
2374 adev->gfx.config.backend_enable_mask = active_rbs;
2375 adev->gfx.config.num_rbs = hweight32(active_rbs);
2376 }
2377
2378 #define DEFAULT_SH_MEM_BASES (0x6000)
2379 #define FIRST_COMPUTE_VMID (8)
2380 #define LAST_COMPUTE_VMID (16)
2381 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2382 {
2383 int i;
2384 uint32_t sh_mem_config;
2385 uint32_t sh_mem_bases;
2386
2387 /*
2388 * Configure apertures:
2389 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2390 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2391 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2392 */
2393 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2394
2395 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2396 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2397 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2398
2399 mutex_lock(&adev->srbm_mutex);
2400 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2401 soc15_grbm_select(adev, 0, 0, 0, i);
2402 /* CP and shaders */
2403 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2404 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2405 }
2406 soc15_grbm_select(adev, 0, 0, 0, 0);
2407 mutex_unlock(&adev->srbm_mutex);
2408
2409 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2410 acccess. These should be enabled by FW for target VMIDs. */
2411 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2412 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2413 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2414 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2415 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2416 }
2417 }
2418
2419 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2420 {
2421 int vmid;
2422
2423 /*
2424 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2425 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2426 * the driver can enable them for graphics. VMID0 should maintain
2427 * access so that HWS firmware can save/restore entries.
2428 */
2429 for (vmid = 1; vmid < 16; vmid++) {
2430 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2431 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2432 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2433 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2434 }
2435 }
2436
2437 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2438 {
2439 u32 tmp;
2440 int i;
2441
2442 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2443
2444 gfx_v9_0_tiling_mode_table_init(adev);
2445
2446 gfx_v9_0_setup_rb(adev);
2447 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2448 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2449
2450 /* XXX SH_MEM regs */
2451 /* where to put LDS, scratch, GPUVM in FSA64 space */
2452 mutex_lock(&adev->srbm_mutex);
2453 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2454 soc15_grbm_select(adev, 0, 0, 0, i);
2455 /* CP and shaders */
2456 if (i == 0) {
2457 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2458 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2459 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2460 !!amdgpu_noretry);
2461 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2462 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2463 } else {
2464 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2465 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2466 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2467 !!amdgpu_noretry);
2468 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2469 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2470 (adev->gmc.private_aperture_start >> 48));
2471 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2472 (adev->gmc.shared_aperture_start >> 48));
2473 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2474 }
2475 }
2476 soc15_grbm_select(adev, 0, 0, 0, 0);
2477
2478 mutex_unlock(&adev->srbm_mutex);
2479
2480 gfx_v9_0_init_compute_vmid(adev);
2481 gfx_v9_0_init_gds_vmid(adev);
2482 }
2483
2484 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2485 {
2486 u32 i, j, k;
2487 u32 mask;
2488
2489 mutex_lock(&adev->grbm_idx_mutex);
2490 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2491 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2492 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2493 for (k = 0; k < adev->usec_timeout; k++) {
2494 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2495 break;
2496 udelay(1);
2497 }
2498 if (k == adev->usec_timeout) {
2499 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2500 0xffffffff, 0xffffffff);
2501 mutex_unlock(&adev->grbm_idx_mutex);
2502 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2503 i, j);
2504 return;
2505 }
2506 }
2507 }
2508 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2509 mutex_unlock(&adev->grbm_idx_mutex);
2510
2511 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2512 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2513 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2514 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2515 for (k = 0; k < adev->usec_timeout; k++) {
2516 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2517 break;
2518 udelay(1);
2519 }
2520 }
2521
2522 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2523 bool enable)
2524 {
2525 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2526
2527 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2528 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2529 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2530 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2531
2532 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2533 }
2534
2535 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2536 {
2537 /* csib */
2538 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2539 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2540 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2541 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2542 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2543 adev->gfx.rlc.clear_state_size);
2544 }
2545
2546 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2547 int indirect_offset,
2548 int list_size,
2549 int *unique_indirect_regs,
2550 int unique_indirect_reg_count,
2551 int *indirect_start_offsets,
2552 int *indirect_start_offsets_count,
2553 int max_start_offsets_count)
2554 {
2555 int idx;
2556
2557 for (; indirect_offset < list_size; indirect_offset++) {
2558 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2559 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2560 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2561
2562 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2563 indirect_offset += 2;
2564
2565 /* look for the matching indice */
2566 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2567 if (unique_indirect_regs[idx] ==
2568 register_list_format[indirect_offset] ||
2569 !unique_indirect_regs[idx])
2570 break;
2571 }
2572
2573 BUG_ON(idx >= unique_indirect_reg_count);
2574
2575 if (!unique_indirect_regs[idx])
2576 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2577
2578 indirect_offset++;
2579 }
2580 }
2581 }
2582
2583 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2584 {
2585 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2586 int unique_indirect_reg_count = 0;
2587
2588 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2589 int indirect_start_offsets_count = 0;
2590
2591 int list_size = 0;
2592 int i = 0, j = 0;
2593 u32 tmp = 0;
2594
2595 u32 *register_list_format =
2596 kmemdup(adev->gfx.rlc.register_list_format,
2597 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2598 if (!register_list_format)
2599 return -ENOMEM;
2600
2601 /* setup unique_indirect_regs array and indirect_start_offsets array */
2602 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2603 gfx_v9_1_parse_ind_reg_list(register_list_format,
2604 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2605 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2606 unique_indirect_regs,
2607 unique_indirect_reg_count,
2608 indirect_start_offsets,
2609 &indirect_start_offsets_count,
2610 ARRAY_SIZE(indirect_start_offsets));
2611
2612 /* enable auto inc in case it is disabled */
2613 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2614 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2615 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2616
2617 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2618 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2619 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2620 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2621 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2622 adev->gfx.rlc.register_restore[i]);
2623
2624 /* load indirect register */
2625 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2626 adev->gfx.rlc.reg_list_format_start);
2627
2628 /* direct register portion */
2629 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2630 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2631 register_list_format[i]);
2632
2633 /* indirect register portion */
2634 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2635 if (register_list_format[i] == 0xFFFFFFFF) {
2636 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2637 continue;
2638 }
2639
2640 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2641 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2642
2643 for (j = 0; j < unique_indirect_reg_count; j++) {
2644 if (register_list_format[i] == unique_indirect_regs[j]) {
2645 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2646 break;
2647 }
2648 }
2649
2650 BUG_ON(j >= unique_indirect_reg_count);
2651
2652 i++;
2653 }
2654
2655 /* set save/restore list size */
2656 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2657 list_size = list_size >> 1;
2658 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2659 adev->gfx.rlc.reg_restore_list_size);
2660 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2661
2662 /* write the starting offsets to RLC scratch ram */
2663 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2664 adev->gfx.rlc.starting_offsets_start);
2665 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2666 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2667 indirect_start_offsets[i]);
2668
2669 /* load unique indirect regs*/
2670 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2671 if (unique_indirect_regs[i] != 0) {
2672 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2673 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2674 unique_indirect_regs[i] & 0x3FFFF);
2675
2676 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2677 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2678 unique_indirect_regs[i] >> 20);
2679 }
2680 }
2681
2682 kfree(register_list_format);
2683 return 0;
2684 }
2685
2686 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2687 {
2688 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2689 }
2690
2691 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2692 bool enable)
2693 {
2694 uint32_t data = 0;
2695 uint32_t default_data = 0;
2696
2697 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2698 if (enable == true) {
2699 /* enable GFXIP control over CGPG */
2700 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2701 if(default_data != data)
2702 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2703
2704 /* update status */
2705 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2706 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2707 if(default_data != data)
2708 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2709 } else {
2710 /* restore GFXIP control over GCPG */
2711 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2712 if(default_data != data)
2713 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2714 }
2715 }
2716
2717 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2718 {
2719 uint32_t data = 0;
2720
2721 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2722 AMD_PG_SUPPORT_GFX_SMG |
2723 AMD_PG_SUPPORT_GFX_DMG)) {
2724 /* init IDLE_POLL_COUNT = 60 */
2725 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2726 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2727 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2728 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2729
2730 /* init RLC PG Delay */
2731 data = 0;
2732 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2733 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2734 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2735 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2736 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2737
2738 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2739 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2740 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2741 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2742
2743 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2744 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2745 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2746 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2747
2748 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2749 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2750
2751 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2752 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2753 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2754
2755 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2756 }
2757 }
2758
2759 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2760 bool enable)
2761 {
2762 uint32_t data = 0;
2763 uint32_t default_data = 0;
2764
2765 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2766 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2767 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2768 enable ? 1 : 0);
2769 if (default_data != data)
2770 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2771 }
2772
2773 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2774 bool enable)
2775 {
2776 uint32_t data = 0;
2777 uint32_t default_data = 0;
2778
2779 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2780 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2781 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2782 enable ? 1 : 0);
2783 if(default_data != data)
2784 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2785 }
2786
2787 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2788 bool enable)
2789 {
2790 uint32_t data = 0;
2791 uint32_t default_data = 0;
2792
2793 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2794 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2795 CP_PG_DISABLE,
2796 enable ? 0 : 1);
2797 if(default_data != data)
2798 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2799 }
2800
2801 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2802 bool enable)
2803 {
2804 uint32_t data, default_data;
2805
2806 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2807 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2808 GFX_POWER_GATING_ENABLE,
2809 enable ? 1 : 0);
2810 if(default_data != data)
2811 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2812 }
2813
2814 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2815 bool enable)
2816 {
2817 uint32_t data, default_data;
2818
2819 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2820 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2821 GFX_PIPELINE_PG_ENABLE,
2822 enable ? 1 : 0);
2823 if(default_data != data)
2824 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2825
2826 if (!enable)
2827 /* read any GFX register to wake up GFX */
2828 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2829 }
2830
2831 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2832 bool enable)
2833 {
2834 uint32_t data, default_data;
2835
2836 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2837 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2838 STATIC_PER_CU_PG_ENABLE,
2839 enable ? 1 : 0);
2840 if(default_data != data)
2841 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2842 }
2843
2844 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2845 bool enable)
2846 {
2847 uint32_t data, default_data;
2848
2849 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2850 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2851 DYN_PER_CU_PG_ENABLE,
2852 enable ? 1 : 0);
2853 if(default_data != data)
2854 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2855 }
2856
2857 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2858 {
2859 gfx_v9_0_init_csb(adev);
2860
2861 /*
2862 * Rlc save restore list is workable since v2_1.
2863 * And it's needed by gfxoff feature.
2864 */
2865 if (adev->gfx.rlc.is_rlc_v2_1) {
2866 gfx_v9_1_init_rlc_save_restore_list(adev);
2867 gfx_v9_0_enable_save_restore_machine(adev);
2868 }
2869
2870 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2871 AMD_PG_SUPPORT_GFX_SMG |
2872 AMD_PG_SUPPORT_GFX_DMG |
2873 AMD_PG_SUPPORT_CP |
2874 AMD_PG_SUPPORT_GDS |
2875 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2876 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2877 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2878 gfx_v9_0_init_gfx_power_gating(adev);
2879 }
2880 }
2881
2882 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2883 {
2884 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2885 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2886 gfx_v9_0_wait_for_rlc_serdes(adev);
2887 }
2888
2889 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2890 {
2891 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2892 udelay(50);
2893 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2894 udelay(50);
2895 }
2896
2897 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2898 {
2899 #ifdef AMDGPU_RLC_DEBUG_RETRY
2900 u32 rlc_ucode_ver;
2901 #endif
2902
2903 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2904 udelay(50);
2905
2906 /* carrizo do enable cp interrupt after cp inited */
2907 if (!(adev->flags & AMD_IS_APU)) {
2908 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2909 udelay(50);
2910 }
2911
2912 #ifdef AMDGPU_RLC_DEBUG_RETRY
2913 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2914 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2915 if(rlc_ucode_ver == 0x108) {
2916 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2917 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2918 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2919 * default is 0x9C4 to create a 100us interval */
2920 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2921 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2922 * to disable the page fault retry interrupts, default is
2923 * 0x100 (256) */
2924 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2925 }
2926 #endif
2927 }
2928
2929 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2930 {
2931 const struct rlc_firmware_header_v2_0 *hdr;
2932 const __le32 *fw_data;
2933 unsigned i, fw_size;
2934
2935 if (!adev->gfx.rlc_fw)
2936 return -EINVAL;
2937
2938 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2939 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2940
2941 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2942 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2943 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2944
2945 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2946 RLCG_UCODE_LOADING_START_ADDRESS);
2947 for (i = 0; i < fw_size; i++)
2948 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2949 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2950
2951 return 0;
2952 }
2953
2954 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2955 {
2956 int r;
2957
2958 if (amdgpu_sriov_vf(adev)) {
2959 gfx_v9_0_init_csb(adev);
2960 return 0;
2961 }
2962
2963 adev->gfx.rlc.funcs->stop(adev);
2964
2965 /* disable CG */
2966 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2967
2968 gfx_v9_0_init_pg(adev);
2969
2970 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2971 /* legacy rlc firmware loading */
2972 r = gfx_v9_0_rlc_load_microcode(adev);
2973 if (r)
2974 return r;
2975 }
2976
2977 switch (adev->asic_type) {
2978 case CHIP_RAVEN:
2979 if (amdgpu_lbpw == 0)
2980 gfx_v9_0_enable_lbpw(adev, false);
2981 else
2982 gfx_v9_0_enable_lbpw(adev, true);
2983 break;
2984 case CHIP_VEGA20:
2985 if (amdgpu_lbpw > 0)
2986 gfx_v9_0_enable_lbpw(adev, true);
2987 else
2988 gfx_v9_0_enable_lbpw(adev, false);
2989 break;
2990 default:
2991 break;
2992 }
2993
2994 adev->gfx.rlc.funcs->start(adev);
2995
2996 return 0;
2997 }
2998
2999 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3000 {
3001 int i;
3002 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3003
3004 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3005 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3006 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3007 if (!enable) {
3008 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3009 adev->gfx.gfx_ring[i].sched.ready = false;
3010 }
3011 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3012 udelay(50);
3013 }
3014
3015 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3016 {
3017 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3018 const struct gfx_firmware_header_v1_0 *ce_hdr;
3019 const struct gfx_firmware_header_v1_0 *me_hdr;
3020 const __le32 *fw_data;
3021 unsigned i, fw_size;
3022
3023 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3024 return -EINVAL;
3025
3026 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3027 adev->gfx.pfp_fw->data;
3028 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3029 adev->gfx.ce_fw->data;
3030 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3031 adev->gfx.me_fw->data;
3032
3033 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3034 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3035 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3036
3037 gfx_v9_0_cp_gfx_enable(adev, false);
3038
3039 /* PFP */
3040 fw_data = (const __le32 *)
3041 (adev->gfx.pfp_fw->data +
3042 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3043 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3044 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3045 for (i = 0; i < fw_size; i++)
3046 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3047 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3048
3049 /* CE */
3050 fw_data = (const __le32 *)
3051 (adev->gfx.ce_fw->data +
3052 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3053 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3054 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3055 for (i = 0; i < fw_size; i++)
3056 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3057 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3058
3059 /* ME */
3060 fw_data = (const __le32 *)
3061 (adev->gfx.me_fw->data +
3062 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3063 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3064 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3065 for (i = 0; i < fw_size; i++)
3066 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3067 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3068
3069 return 0;
3070 }
3071
3072 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3073 {
3074 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3075 const struct cs_section_def *sect = NULL;
3076 const struct cs_extent_def *ext = NULL;
3077 int r, i, tmp;
3078
3079 /* init the CP */
3080 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3081 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3082
3083 gfx_v9_0_cp_gfx_enable(adev, true);
3084
3085 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3086 if (r) {
3087 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3088 return r;
3089 }
3090
3091 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3092 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3093
3094 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3095 amdgpu_ring_write(ring, 0x80000000);
3096 amdgpu_ring_write(ring, 0x80000000);
3097
3098 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3099 for (ext = sect->section; ext->extent != NULL; ++ext) {
3100 if (sect->id == SECT_CONTEXT) {
3101 amdgpu_ring_write(ring,
3102 PACKET3(PACKET3_SET_CONTEXT_REG,
3103 ext->reg_count));
3104 amdgpu_ring_write(ring,
3105 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3106 for (i = 0; i < ext->reg_count; i++)
3107 amdgpu_ring_write(ring, ext->extent[i]);
3108 }
3109 }
3110 }
3111
3112 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3113 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3114
3115 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3116 amdgpu_ring_write(ring, 0);
3117
3118 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3119 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3120 amdgpu_ring_write(ring, 0x8000);
3121 amdgpu_ring_write(ring, 0x8000);
3122
3123 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3124 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3125 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3126 amdgpu_ring_write(ring, tmp);
3127 amdgpu_ring_write(ring, 0);
3128
3129 amdgpu_ring_commit(ring);
3130
3131 return 0;
3132 }
3133
3134 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3135 {
3136 struct amdgpu_ring *ring;
3137 u32 tmp;
3138 u32 rb_bufsz;
3139 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3140
3141 /* Set the write pointer delay */
3142 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3143
3144 /* set the RB to use vmid 0 */
3145 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3146
3147 /* Set ring buffer size */
3148 ring = &adev->gfx.gfx_ring[0];
3149 rb_bufsz = order_base_2(ring->ring_size / 8);
3150 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3151 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3152 #ifdef __BIG_ENDIAN
3153 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3154 #endif
3155 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3156
3157 /* Initialize the ring buffer's write pointers */
3158 ring->wptr = 0;
3159 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3160 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3161
3162 /* set the wb address wether it's enabled or not */
3163 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3164 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3165 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3166
3167 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3168 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3169 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3170
3171 mdelay(1);
3172 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3173
3174 rb_addr = ring->gpu_addr >> 8;
3175 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3176 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3177
3178 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3179 if (ring->use_doorbell) {
3180 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3181 DOORBELL_OFFSET, ring->doorbell_index);
3182 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3183 DOORBELL_EN, 1);
3184 } else {
3185 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3186 }
3187 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3188
3189 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3190 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3191 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3192
3193 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3194 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3195
3196
3197 /* start the ring */
3198 gfx_v9_0_cp_gfx_start(adev);
3199 ring->sched.ready = true;
3200
3201 return 0;
3202 }
3203
3204 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3205 {
3206 int i;
3207
3208 if (enable) {
3209 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3210 } else {
3211 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3212 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3213 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3214 adev->gfx.compute_ring[i].sched.ready = false;
3215 adev->gfx.kiq.ring.sched.ready = false;
3216 }
3217 udelay(50);
3218 }
3219
3220 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3221 {
3222 const struct gfx_firmware_header_v1_0 *mec_hdr;
3223 const __le32 *fw_data;
3224 unsigned i;
3225 u32 tmp;
3226
3227 if (!adev->gfx.mec_fw)
3228 return -EINVAL;
3229
3230 gfx_v9_0_cp_compute_enable(adev, false);
3231
3232 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3233 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3234
3235 fw_data = (const __le32 *)
3236 (adev->gfx.mec_fw->data +
3237 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3238 tmp = 0;
3239 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3240 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3241 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3242
3243 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3244 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3245 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3246 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3247
3248 /* MEC1 */
3249 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3250 mec_hdr->jt_offset);
3251 for (i = 0; i < mec_hdr->jt_size; i++)
3252 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3253 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3254
3255 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3256 adev->gfx.mec_fw_version);
3257 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3258
3259 return 0;
3260 }
3261
3262 /* KIQ functions */
3263 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3264 {
3265 uint32_t tmp;
3266 struct amdgpu_device *adev = ring->adev;
3267
3268 /* tell RLC which is KIQ queue */
3269 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3270 tmp &= 0xffffff00;
3271 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3272 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3273 tmp |= 0x80;
3274 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3275 }
3276
3277 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3278 {
3279 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3280 uint64_t queue_mask = 0;
3281 int r, i;
3282
3283 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3284 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3285 continue;
3286
3287 /* This situation may be hit in the future if a new HW
3288 * generation exposes more than 64 queues. If so, the
3289 * definition of queue_mask needs updating */
3290 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3291 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3292 break;
3293 }
3294
3295 queue_mask |= (1ull << i);
3296 }
3297
3298 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3299 if (r) {
3300 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3301 return r;
3302 }
3303
3304 /* set resources */
3305 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3306 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3307 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3308 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3309 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3310 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3311 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3312 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3313 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3314 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3315 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3316 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3317 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3318
3319 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3320 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3321 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3322 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3323 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3324 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3325 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3326 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3327 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3328 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3329 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3330 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3331 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3332 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3333 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3334 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3335 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3336 }
3337
3338 r = amdgpu_ring_test_helper(kiq_ring);
3339 if (r)
3340 DRM_ERROR("KCQ enable failed\n");
3341
3342 return r;
3343 }
3344
3345 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3346 {
3347 struct amdgpu_device *adev = ring->adev;
3348 struct v9_mqd *mqd = ring->mqd_ptr;
3349 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3350 uint32_t tmp;
3351
3352 mqd->header = 0xC0310800;
3353 mqd->compute_pipelinestat_enable = 0x00000001;
3354 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3355 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3356 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3357 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3358 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3359 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3360 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3361 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3362 mqd->compute_misc_reserved = 0x00000003;
3363
3364 mqd->dynamic_cu_mask_addr_lo =
3365 lower_32_bits(ring->mqd_gpu_addr
3366 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3367 mqd->dynamic_cu_mask_addr_hi =
3368 upper_32_bits(ring->mqd_gpu_addr
3369 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3370
3371 eop_base_addr = ring->eop_gpu_addr >> 8;
3372 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3373 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3374
3375 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3376 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3377 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3378 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3379
3380 mqd->cp_hqd_eop_control = tmp;
3381
3382 /* enable doorbell? */
3383 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3384
3385 if (ring->use_doorbell) {
3386 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3387 DOORBELL_OFFSET, ring->doorbell_index);
3388 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3389 DOORBELL_EN, 1);
3390 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3391 DOORBELL_SOURCE, 0);
3392 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3393 DOORBELL_HIT, 0);
3394 } else {
3395 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3396 DOORBELL_EN, 0);
3397 }
3398
3399 mqd->cp_hqd_pq_doorbell_control = tmp;
3400
3401 /* disable the queue if it's active */
3402 ring->wptr = 0;
3403 mqd->cp_hqd_dequeue_request = 0;
3404 mqd->cp_hqd_pq_rptr = 0;
3405 mqd->cp_hqd_pq_wptr_lo = 0;
3406 mqd->cp_hqd_pq_wptr_hi = 0;
3407
3408 /* set the pointer to the MQD */
3409 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3410 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3411
3412 /* set MQD vmid to 0 */
3413 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3414 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3415 mqd->cp_mqd_control = tmp;
3416
3417 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3418 hqd_gpu_addr = ring->gpu_addr >> 8;
3419 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3420 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3421
3422 /* set up the HQD, this is similar to CP_RB0_CNTL */
3423 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3424 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3425 (order_base_2(ring->ring_size / 4) - 1));
3426 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3427 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3428 #ifdef __BIG_ENDIAN
3429 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3430 #endif
3431 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3432 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3433 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3434 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3435 mqd->cp_hqd_pq_control = tmp;
3436
3437 /* set the wb address whether it's enabled or not */
3438 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3439 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3440 mqd->cp_hqd_pq_rptr_report_addr_hi =
3441 upper_32_bits(wb_gpu_addr) & 0xffff;
3442
3443 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3444 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3445 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3446 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3447
3448 tmp = 0;
3449 /* enable the doorbell if requested */
3450 if (ring->use_doorbell) {
3451 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3453 DOORBELL_OFFSET, ring->doorbell_index);
3454
3455 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456 DOORBELL_EN, 1);
3457 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458 DOORBELL_SOURCE, 0);
3459 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3460 DOORBELL_HIT, 0);
3461 }
3462
3463 mqd->cp_hqd_pq_doorbell_control = tmp;
3464
3465 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3466 ring->wptr = 0;
3467 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3468
3469 /* set the vmid for the queue */
3470 mqd->cp_hqd_vmid = 0;
3471
3472 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3473 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3474 mqd->cp_hqd_persistent_state = tmp;
3475
3476 /* set MIN_IB_AVAIL_SIZE */
3477 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3478 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3479 mqd->cp_hqd_ib_control = tmp;
3480
3481 /* activate the queue */
3482 mqd->cp_hqd_active = 1;
3483
3484 return 0;
3485 }
3486
3487 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3488 {
3489 struct amdgpu_device *adev = ring->adev;
3490 struct v9_mqd *mqd = ring->mqd_ptr;
3491 int j;
3492
3493 /* disable wptr polling */
3494 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3495
3496 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3497 mqd->cp_hqd_eop_base_addr_lo);
3498 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3499 mqd->cp_hqd_eop_base_addr_hi);
3500
3501 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3502 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3503 mqd->cp_hqd_eop_control);
3504
3505 /* enable doorbell? */
3506 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3507 mqd->cp_hqd_pq_doorbell_control);
3508
3509 /* disable the queue if it's active */
3510 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3511 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3512 for (j = 0; j < adev->usec_timeout; j++) {
3513 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3514 break;
3515 udelay(1);
3516 }
3517 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3518 mqd->cp_hqd_dequeue_request);
3519 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3520 mqd->cp_hqd_pq_rptr);
3521 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3522 mqd->cp_hqd_pq_wptr_lo);
3523 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3524 mqd->cp_hqd_pq_wptr_hi);
3525 }
3526
3527 /* set the pointer to the MQD */
3528 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3529 mqd->cp_mqd_base_addr_lo);
3530 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3531 mqd->cp_mqd_base_addr_hi);
3532
3533 /* set MQD vmid to 0 */
3534 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3535 mqd->cp_mqd_control);
3536
3537 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3538 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3539 mqd->cp_hqd_pq_base_lo);
3540 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3541 mqd->cp_hqd_pq_base_hi);
3542
3543 /* set up the HQD, this is similar to CP_RB0_CNTL */
3544 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3545 mqd->cp_hqd_pq_control);
3546
3547 /* set the wb address whether it's enabled or not */
3548 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3549 mqd->cp_hqd_pq_rptr_report_addr_lo);
3550 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3551 mqd->cp_hqd_pq_rptr_report_addr_hi);
3552
3553 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3554 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3555 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3556 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3557 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3558
3559 /* enable the doorbell if requested */
3560 if (ring->use_doorbell) {
3561 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3562 (adev->doorbell_index.kiq * 2) << 2);
3563 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3564 (adev->doorbell_index.userqueue_end * 2) << 2);
3565 }
3566
3567 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3568 mqd->cp_hqd_pq_doorbell_control);
3569
3570 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3571 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3572 mqd->cp_hqd_pq_wptr_lo);
3573 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3574 mqd->cp_hqd_pq_wptr_hi);
3575
3576 /* set the vmid for the queue */
3577 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3578
3579 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3580 mqd->cp_hqd_persistent_state);
3581
3582 /* activate the queue */
3583 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3584 mqd->cp_hqd_active);
3585
3586 if (ring->use_doorbell)
3587 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3588
3589 return 0;
3590 }
3591
3592 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3593 {
3594 struct amdgpu_device *adev = ring->adev;
3595 int j;
3596
3597 /* disable the queue if it's active */
3598 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3599
3600 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3601
3602 for (j = 0; j < adev->usec_timeout; j++) {
3603 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3604 break;
3605 udelay(1);
3606 }
3607
3608 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3609 DRM_DEBUG("KIQ dequeue request failed.\n");
3610
3611 /* Manual disable if dequeue request times out */
3612 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3613 }
3614
3615 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3616 0);
3617 }
3618
3619 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3620 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3621 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3622 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3623 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3624 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3625 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3626 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3627
3628 return 0;
3629 }
3630
3631 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3632 {
3633 struct amdgpu_device *adev = ring->adev;
3634 struct v9_mqd *mqd = ring->mqd_ptr;
3635 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3636
3637 gfx_v9_0_kiq_setting(ring);
3638
3639 if (adev->in_gpu_reset) { /* for GPU_RESET case */
3640 /* reset MQD to a clean status */
3641 if (adev->gfx.mec.mqd_backup[mqd_idx])
3642 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3643
3644 /* reset ring buffer */
3645 ring->wptr = 0;
3646 amdgpu_ring_clear_ring(ring);
3647
3648 mutex_lock(&adev->srbm_mutex);
3649 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3650 gfx_v9_0_kiq_init_register(ring);
3651 soc15_grbm_select(adev, 0, 0, 0, 0);
3652 mutex_unlock(&adev->srbm_mutex);
3653 } else {
3654 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3655 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3656 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3657 mutex_lock(&adev->srbm_mutex);
3658 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3659 gfx_v9_0_mqd_init(ring);
3660 gfx_v9_0_kiq_init_register(ring);
3661 soc15_grbm_select(adev, 0, 0, 0, 0);
3662 mutex_unlock(&adev->srbm_mutex);
3663
3664 if (adev->gfx.mec.mqd_backup[mqd_idx])
3665 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3666 }
3667
3668 return 0;
3669 }
3670
3671 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3672 {
3673 struct amdgpu_device *adev = ring->adev;
3674 struct v9_mqd *mqd = ring->mqd_ptr;
3675 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3676
3677 if (!adev->in_gpu_reset && !adev->in_suspend) {
3678 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3679 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3680 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3681 mutex_lock(&adev->srbm_mutex);
3682 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3683 gfx_v9_0_mqd_init(ring);
3684 soc15_grbm_select(adev, 0, 0, 0, 0);
3685 mutex_unlock(&adev->srbm_mutex);
3686
3687 if (adev->gfx.mec.mqd_backup[mqd_idx])
3688 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3689 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3690 /* reset MQD to a clean status */
3691 if (adev->gfx.mec.mqd_backup[mqd_idx])
3692 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3693
3694 /* reset ring buffer */
3695 ring->wptr = 0;
3696 amdgpu_ring_clear_ring(ring);
3697 } else {
3698 amdgpu_ring_clear_ring(ring);
3699 }
3700
3701 return 0;
3702 }
3703
3704 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3705 {
3706 struct amdgpu_ring *ring;
3707 int r;
3708
3709 ring = &adev->gfx.kiq.ring;
3710
3711 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3712 if (unlikely(r != 0))
3713 return r;
3714
3715 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3716 if (unlikely(r != 0))
3717 return r;
3718
3719 gfx_v9_0_kiq_init_queue(ring);
3720 amdgpu_bo_kunmap(ring->mqd_obj);
3721 ring->mqd_ptr = NULL;
3722 amdgpu_bo_unreserve(ring->mqd_obj);
3723 ring->sched.ready = true;
3724 return 0;
3725 }
3726
3727 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3728 {
3729 struct amdgpu_ring *ring = NULL;
3730 int r = 0, i;
3731
3732 gfx_v9_0_cp_compute_enable(adev, true);
3733
3734 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3735 ring = &adev->gfx.compute_ring[i];
3736
3737 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3738 if (unlikely(r != 0))
3739 goto done;
3740 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3741 if (!r) {
3742 r = gfx_v9_0_kcq_init_queue(ring);
3743 amdgpu_bo_kunmap(ring->mqd_obj);
3744 ring->mqd_ptr = NULL;
3745 }
3746 amdgpu_bo_unreserve(ring->mqd_obj);
3747 if (r)
3748 goto done;
3749 }
3750
3751 r = gfx_v9_0_kiq_kcq_enable(adev);
3752 done:
3753 return r;
3754 }
3755
3756 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3757 {
3758 int r, i;
3759 struct amdgpu_ring *ring;
3760
3761 if (!(adev->flags & AMD_IS_APU))
3762 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3763
3764 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3765 if (adev->asic_type != CHIP_ARCTURUS) {
3766 /* legacy firmware loading */
3767 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3768 if (r)
3769 return r;
3770 }
3771
3772 r = gfx_v9_0_cp_compute_load_microcode(adev);
3773 if (r)
3774 return r;
3775 }
3776
3777 r = gfx_v9_0_kiq_resume(adev);
3778 if (r)
3779 return r;
3780
3781 if (adev->asic_type != CHIP_ARCTURUS) {
3782 r = gfx_v9_0_cp_gfx_resume(adev);
3783 if (r)
3784 return r;
3785 }
3786
3787 r = gfx_v9_0_kcq_resume(adev);
3788 if (r)
3789 return r;
3790
3791 if (adev->asic_type != CHIP_ARCTURUS) {
3792 ring = &adev->gfx.gfx_ring[0];
3793 r = amdgpu_ring_test_helper(ring);
3794 if (r)
3795 return r;
3796 }
3797
3798 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3799 ring = &adev->gfx.compute_ring[i];
3800 amdgpu_ring_test_helper(ring);
3801 }
3802
3803 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3804
3805 return 0;
3806 }
3807
3808 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3809 {
3810 if (adev->asic_type != CHIP_ARCTURUS)
3811 gfx_v9_0_cp_gfx_enable(adev, enable);
3812 gfx_v9_0_cp_compute_enable(adev, enable);
3813 }
3814
3815 static int gfx_v9_0_hw_init(void *handle)
3816 {
3817 int r;
3818 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3819
3820 if (!amdgpu_sriov_vf(adev))
3821 gfx_v9_0_init_golden_registers(adev);
3822
3823 gfx_v9_0_constants_init(adev);
3824
3825 r = gfx_v9_0_csb_vram_pin(adev);
3826 if (r)
3827 return r;
3828
3829 r = adev->gfx.rlc.funcs->resume(adev);
3830 if (r)
3831 return r;
3832
3833 r = gfx_v9_0_cp_resume(adev);
3834 if (r)
3835 return r;
3836
3837 if (adev->asic_type != CHIP_ARCTURUS) {
3838 r = gfx_v9_0_ngg_en(adev);
3839 if (r)
3840 return r;
3841 }
3842
3843 return r;
3844 }
3845
3846 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3847 {
3848 int r, i;
3849 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3850
3851 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3852 if (r)
3853 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3854
3855 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3856 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3857
3858 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3859 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3860 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3861 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3862 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3863 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3864 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3865 amdgpu_ring_write(kiq_ring, 0);
3866 amdgpu_ring_write(kiq_ring, 0);
3867 amdgpu_ring_write(kiq_ring, 0);
3868 }
3869 r = amdgpu_ring_test_helper(kiq_ring);
3870 if (r)
3871 DRM_ERROR("KCQ disable failed\n");
3872
3873 return r;
3874 }
3875
3876 static int gfx_v9_0_hw_fini(void *handle)
3877 {
3878 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3879
3880 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3881 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3882 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3883
3884 /* disable KCQ to avoid CPC touch memory not valid anymore */
3885 gfx_v9_0_kcq_disable(adev);
3886
3887 if (amdgpu_sriov_vf(adev)) {
3888 gfx_v9_0_cp_gfx_enable(adev, false);
3889 /* must disable polling for SRIOV when hw finished, otherwise
3890 * CPC engine may still keep fetching WB address which is already
3891 * invalid after sw finished and trigger DMAR reading error in
3892 * hypervisor side.
3893 */
3894 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3895 return 0;
3896 }
3897
3898 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3899 * otherwise KIQ is hanging when binding back
3900 */
3901 if (!adev->in_gpu_reset && !adev->in_suspend) {
3902 mutex_lock(&adev->srbm_mutex);
3903 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3904 adev->gfx.kiq.ring.pipe,
3905 adev->gfx.kiq.ring.queue, 0);
3906 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3907 soc15_grbm_select(adev, 0, 0, 0, 0);
3908 mutex_unlock(&adev->srbm_mutex);
3909 }
3910
3911 gfx_v9_0_cp_enable(adev, false);
3912 adev->gfx.rlc.funcs->stop(adev);
3913
3914 gfx_v9_0_csb_vram_unpin(adev);
3915
3916 return 0;
3917 }
3918
3919 static int gfx_v9_0_suspend(void *handle)
3920 {
3921 return gfx_v9_0_hw_fini(handle);
3922 }
3923
3924 static int gfx_v9_0_resume(void *handle)
3925 {
3926 return gfx_v9_0_hw_init(handle);
3927 }
3928
3929 static bool gfx_v9_0_is_idle(void *handle)
3930 {
3931 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3932
3933 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3934 GRBM_STATUS, GUI_ACTIVE))
3935 return false;
3936 else
3937 return true;
3938 }
3939
3940 static int gfx_v9_0_wait_for_idle(void *handle)
3941 {
3942 unsigned i;
3943 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3944
3945 for (i = 0; i < adev->usec_timeout; i++) {
3946 if (gfx_v9_0_is_idle(handle))
3947 return 0;
3948 udelay(1);
3949 }
3950 return -ETIMEDOUT;
3951 }
3952
3953 static int gfx_v9_0_soft_reset(void *handle)
3954 {
3955 u32 grbm_soft_reset = 0;
3956 u32 tmp;
3957 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3958
3959 /* GRBM_STATUS */
3960 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3961 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3962 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3963 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3964 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3965 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3966 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3967 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3968 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3969 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3970 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3971 }
3972
3973 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3974 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3975 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3976 }
3977
3978 /* GRBM_STATUS2 */
3979 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3980 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3981 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3982 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3983
3984
3985 if (grbm_soft_reset) {
3986 /* stop the rlc */
3987 adev->gfx.rlc.funcs->stop(adev);
3988
3989 if (adev->asic_type != CHIP_ARCTURUS)
3990 /* Disable GFX parsing/prefetching */
3991 gfx_v9_0_cp_gfx_enable(adev, false);
3992
3993 /* Disable MEC parsing/prefetching */
3994 gfx_v9_0_cp_compute_enable(adev, false);
3995
3996 if (grbm_soft_reset) {
3997 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3998 tmp |= grbm_soft_reset;
3999 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4000 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4001 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4002
4003 udelay(50);
4004
4005 tmp &= ~grbm_soft_reset;
4006 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4007 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4008 }
4009
4010 /* Wait a little for things to settle down */
4011 udelay(50);
4012 }
4013 return 0;
4014 }
4015
4016 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4017 {
4018 uint64_t clock;
4019
4020 mutex_lock(&adev->gfx.gpu_clock_mutex);
4021 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4022 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4023 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4024 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4025 return clock;
4026 }
4027
4028 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4029 uint32_t vmid,
4030 uint32_t gds_base, uint32_t gds_size,
4031 uint32_t gws_base, uint32_t gws_size,
4032 uint32_t oa_base, uint32_t oa_size)
4033 {
4034 struct amdgpu_device *adev = ring->adev;
4035
4036 /* GDS Base */
4037 gfx_v9_0_write_data_to_reg(ring, 0, false,
4038 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4039 gds_base);
4040
4041 /* GDS Size */
4042 gfx_v9_0_write_data_to_reg(ring, 0, false,
4043 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4044 gds_size);
4045
4046 /* GWS */
4047 gfx_v9_0_write_data_to_reg(ring, 0, false,
4048 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4049 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4050
4051 /* OA */
4052 gfx_v9_0_write_data_to_reg(ring, 0, false,
4053 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4054 (1 << (oa_size + oa_base)) - (1 << oa_base));
4055 }
4056
4057 static const u32 vgpr_init_compute_shader[] =
4058 {
4059 0xb07c0000, 0xbe8000ff,
4060 0x000000f8, 0xbf110800,
4061 0x7e000280, 0x7e020280,
4062 0x7e040280, 0x7e060280,
4063 0x7e080280, 0x7e0a0280,
4064 0x7e0c0280, 0x7e0e0280,
4065 0x80808800, 0xbe803200,
4066 0xbf84fff5, 0xbf9c0000,
4067 0xd28c0001, 0x0001007f,
4068 0xd28d0001, 0x0002027e,
4069 0x10020288, 0xb8810904,
4070 0xb7814000, 0xd1196a01,
4071 0x00000301, 0xbe800087,
4072 0xbefc00c1, 0xd89c4000,
4073 0x00020201, 0xd89cc080,
4074 0x00040401, 0x320202ff,
4075 0x00000800, 0x80808100,
4076 0xbf84fff8, 0x7e020280,
4077 0xbf810000, 0x00000000,
4078 };
4079
4080 static const u32 sgpr_init_compute_shader[] =
4081 {
4082 0xb07c0000, 0xbe8000ff,
4083 0x0000005f, 0xbee50080,
4084 0xbe812c65, 0xbe822c65,
4085 0xbe832c65, 0xbe842c65,
4086 0xbe852c65, 0xb77c0005,
4087 0x80808500, 0xbf84fff8,
4088 0xbe800080, 0xbf810000,
4089 };
4090
4091 static const struct soc15_reg_entry vgpr_init_regs[] = {
4092 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4093 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4094 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4095 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4096 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4097 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4098 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4099 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4100 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4101 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4102 };
4103
4104 static const struct soc15_reg_entry sgpr_init_regs[] = {
4105 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4106 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4107 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4108 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4109 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4110 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4111 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4112 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4113 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4114 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4115 };
4116
4117 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4118 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4119 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4120 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4121 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4122 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4123 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4124 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4125 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4126 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4127 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4128 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4129 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4130 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4131 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4132 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4133 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4134 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4135 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4136 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4137 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4138 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4139 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4140 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4141 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4142 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4143 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4144 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4145 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4146 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4147 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4148 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4149 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4150 };
4151
4152 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4153 {
4154 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4155 int i, r;
4156
4157 r = amdgpu_ring_alloc(ring, 7);
4158 if (r) {
4159 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4160 ring->name, r);
4161 return r;
4162 }
4163
4164 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4165 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4166
4167 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4168 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4169 PACKET3_DMA_DATA_DST_SEL(1) |
4170 PACKET3_DMA_DATA_SRC_SEL(2) |
4171 PACKET3_DMA_DATA_ENGINE(0)));
4172 amdgpu_ring_write(ring, 0);
4173 amdgpu_ring_write(ring, 0);
4174 amdgpu_ring_write(ring, 0);
4175 amdgpu_ring_write(ring, 0);
4176 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4177 adev->gds.gds_size);
4178
4179 amdgpu_ring_commit(ring);
4180
4181 for (i = 0; i < adev->usec_timeout; i++) {
4182 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4183 break;
4184 udelay(1);
4185 }
4186
4187 if (i >= adev->usec_timeout)
4188 r = -ETIMEDOUT;
4189
4190 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4191
4192 return r;
4193 }
4194
4195 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4196 {
4197 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4198 struct amdgpu_ib ib;
4199 struct dma_fence *f = NULL;
4200 int r, i, j, k;
4201 unsigned total_size, vgpr_offset, sgpr_offset;
4202 u64 gpu_addr;
4203
4204 /* only support when RAS is enabled */
4205 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4206 return 0;
4207
4208 /* bail if the compute ring is not ready */
4209 if (!ring->sched.ready)
4210 return 0;
4211
4212 total_size =
4213 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4214 total_size +=
4215 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4216 total_size = ALIGN(total_size, 256);
4217 vgpr_offset = total_size;
4218 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4219 sgpr_offset = total_size;
4220 total_size += sizeof(sgpr_init_compute_shader);
4221
4222 /* allocate an indirect buffer to put the commands in */
4223 memset(&ib, 0, sizeof(ib));
4224 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4225 if (r) {
4226 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4227 return r;
4228 }
4229
4230 /* load the compute shaders */
4231 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4232 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4233
4234 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4235 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4236
4237 /* init the ib length to 0 */
4238 ib.length_dw = 0;
4239
4240 /* VGPR */
4241 /* write the register state for the compute dispatch */
4242 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4243 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4244 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4245 - PACKET3_SET_SH_REG_START;
4246 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4247 }
4248 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4249 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4250 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4251 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4252 - PACKET3_SET_SH_REG_START;
4253 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4254 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4255
4256 /* write dispatch packet */
4257 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4258 ib.ptr[ib.length_dw++] = 128; /* x */
4259 ib.ptr[ib.length_dw++] = 1; /* y */
4260 ib.ptr[ib.length_dw++] = 1; /* z */
4261 ib.ptr[ib.length_dw++] =
4262 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4263
4264 /* write CS partial flush packet */
4265 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4266 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4267
4268 /* SGPR */
4269 /* write the register state for the compute dispatch */
4270 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4271 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4272 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4273 - PACKET3_SET_SH_REG_START;
4274 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4275 }
4276 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4277 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4278 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4279 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4280 - PACKET3_SET_SH_REG_START;
4281 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4282 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4283
4284 /* write dispatch packet */
4285 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4286 ib.ptr[ib.length_dw++] = 128; /* x */
4287 ib.ptr[ib.length_dw++] = 1; /* y */
4288 ib.ptr[ib.length_dw++] = 1; /* z */
4289 ib.ptr[ib.length_dw++] =
4290 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4291
4292 /* write CS partial flush packet */
4293 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4294 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4295
4296 /* shedule the ib on the ring */
4297 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4298 if (r) {
4299 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4300 goto fail;
4301 }
4302
4303 /* wait for the GPU to finish processing the IB */
4304 r = dma_fence_wait(f, false);
4305 if (r) {
4306 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4307 goto fail;
4308 }
4309
4310 /* read back registers to clear the counters */
4311 mutex_lock(&adev->grbm_idx_mutex);
4312 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4313 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4314 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4315 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4316 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4317 }
4318 }
4319 }
4320 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4321 mutex_unlock(&adev->grbm_idx_mutex);
4322
4323 fail:
4324 amdgpu_ib_free(adev, &ib, NULL);
4325 dma_fence_put(f);
4326
4327 return r;
4328 }
4329
4330 static int gfx_v9_0_early_init(void *handle)
4331 {
4332 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4333
4334 if (adev->asic_type == CHIP_ARCTURUS)
4335 adev->gfx.num_gfx_rings = 0;
4336 else
4337 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4338 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4339 gfx_v9_0_set_ring_funcs(adev);
4340 gfx_v9_0_set_irq_funcs(adev);
4341 gfx_v9_0_set_gds_init(adev);
4342 gfx_v9_0_set_rlc_funcs(adev);
4343
4344 return 0;
4345 }
4346
4347 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4348 struct ras_err_data *err_data,
4349 struct amdgpu_iv_entry *entry);
4350
4351 static int gfx_v9_0_ecc_late_init(void *handle)
4352 {
4353 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4354 struct ras_common_if **ras_if = &adev->gfx.ras_if;
4355 struct ras_ih_if ih_info = {
4356 .cb = gfx_v9_0_process_ras_data_cb,
4357 };
4358 struct ras_fs_if fs_info = {
4359 .sysfs_name = "gfx_err_count",
4360 .debugfs_name = "gfx_err_inject",
4361 };
4362 struct ras_common_if ras_block = {
4363 .block = AMDGPU_RAS_BLOCK__GFX,
4364 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4365 .sub_block_index = 0,
4366 .name = "gfx",
4367 };
4368 int r;
4369
4370 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4371 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4372 return 0;
4373 }
4374
4375 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4376 if (r)
4377 return r;
4378
4379 /* requires IBs so do in late init after IB pool is initialized */
4380 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4381 if (r)
4382 return r;
4383
4384 /* handle resume path. */
4385 if (*ras_if) {
4386 /* resend ras TA enable cmd during resume.
4387 * prepare to handle failure.
4388 */
4389 ih_info.head = **ras_if;
4390 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4391 if (r) {
4392 if (r == -EAGAIN) {
4393 /* request a gpu reset. will run again. */
4394 amdgpu_ras_request_reset_on_boot(adev,
4395 AMDGPU_RAS_BLOCK__GFX);
4396 return 0;
4397 }
4398 /* fail to enable ras, cleanup all. */
4399 goto irq;
4400 }
4401 /* enable successfully. continue. */
4402 goto resume;
4403 }
4404
4405 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4406 if (!*ras_if)
4407 return -ENOMEM;
4408
4409 **ras_if = ras_block;
4410
4411 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4412 if (r) {
4413 if (r == -EAGAIN) {
4414 amdgpu_ras_request_reset_on_boot(adev,
4415 AMDGPU_RAS_BLOCK__GFX);
4416 r = 0;
4417 }
4418 goto feature;
4419 }
4420
4421 ih_info.head = **ras_if;
4422 fs_info.head = **ras_if;
4423
4424 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4425 if (r)
4426 goto interrupt;
4427
4428 amdgpu_ras_debugfs_create(adev, &fs_info);
4429
4430 r = amdgpu_ras_sysfs_create(adev, &fs_info);
4431 if (r)
4432 goto sysfs;
4433 resume:
4434 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4435 if (r)
4436 goto irq;
4437
4438 return 0;
4439 irq:
4440 amdgpu_ras_sysfs_remove(adev, *ras_if);
4441 sysfs:
4442 amdgpu_ras_debugfs_remove(adev, *ras_if);
4443 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4444 interrupt:
4445 amdgpu_ras_feature_enable(adev, *ras_if, 0);
4446 feature:
4447 kfree(*ras_if);
4448 *ras_if = NULL;
4449 return r;
4450 }
4451
4452 static int gfx_v9_0_late_init(void *handle)
4453 {
4454 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4455 int r;
4456
4457 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4458 if (r)
4459 return r;
4460
4461 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4462 if (r)
4463 return r;
4464
4465 r = gfx_v9_0_ecc_late_init(handle);
4466 if (r)
4467 return r;
4468
4469 return 0;
4470 }
4471
4472 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4473 {
4474 uint32_t rlc_setting;
4475
4476 /* if RLC is not enabled, do nothing */
4477 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4478 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4479 return false;
4480
4481 return true;
4482 }
4483
4484 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4485 {
4486 uint32_t data;
4487 unsigned i;
4488
4489 data = RLC_SAFE_MODE__CMD_MASK;
4490 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4491 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4492
4493 /* wait for RLC_SAFE_MODE */
4494 for (i = 0; i < adev->usec_timeout; i++) {
4495 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4496 break;
4497 udelay(1);
4498 }
4499 }
4500
4501 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4502 {
4503 uint32_t data;
4504
4505 data = RLC_SAFE_MODE__CMD_MASK;
4506 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4507 }
4508
4509 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4510 bool enable)
4511 {
4512 amdgpu_gfx_rlc_enter_safe_mode(adev);
4513
4514 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4515 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4516 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4517 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4518 } else {
4519 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4520 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4521 }
4522
4523 amdgpu_gfx_rlc_exit_safe_mode(adev);
4524 }
4525
4526 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4527 bool enable)
4528 {
4529 /* TODO: double check if we need to perform under safe mode */
4530 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4531
4532 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4533 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4534 else
4535 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4536
4537 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4538 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4539 else
4540 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4541
4542 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4543 }
4544
4545 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4546 bool enable)
4547 {
4548 uint32_t data, def;
4549
4550 amdgpu_gfx_rlc_enter_safe_mode(adev);
4551
4552 /* It is disabled by HW by default */
4553 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4554 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4555 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4556
4557 if (adev->asic_type != CHIP_VEGA12)
4558 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4559
4560 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4561 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4562 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4563
4564 /* only for Vega10 & Raven1 */
4565 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4566
4567 if (def != data)
4568 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4569
4570 /* MGLS is a global flag to control all MGLS in GFX */
4571 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4572 /* 2 - RLC memory Light sleep */
4573 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4574 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4575 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4576 if (def != data)
4577 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4578 }
4579 /* 3 - CP memory Light sleep */
4580 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4581 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4582 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4583 if (def != data)
4584 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4585 }
4586 }
4587 } else {
4588 /* 1 - MGCG_OVERRIDE */
4589 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4590
4591 if (adev->asic_type != CHIP_VEGA12)
4592 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4593
4594 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4595 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4596 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4597 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4598
4599 if (def != data)
4600 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4601
4602 /* 2 - disable MGLS in RLC */
4603 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4604 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4605 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4606 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4607 }
4608
4609 /* 3 - disable MGLS in CP */
4610 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4611 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4612 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4613 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4614 }
4615 }
4616
4617 amdgpu_gfx_rlc_exit_safe_mode(adev);
4618 }
4619
4620 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4621 bool enable)
4622 {
4623 uint32_t data, def;
4624
4625 amdgpu_gfx_rlc_enter_safe_mode(adev);
4626
4627 /* Enable 3D CGCG/CGLS */
4628 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4629 /* write cmd to clear cgcg/cgls ov */
4630 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4631 /* unset CGCG override */
4632 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4633 /* update CGCG and CGLS override bits */
4634 if (def != data)
4635 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4636
4637 /* enable 3Dcgcg FSM(0x0000363f) */
4638 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4639
4640 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4641 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4642 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4643 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4644 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4645 if (def != data)
4646 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4647
4648 /* set IDLE_POLL_COUNT(0x00900100) */
4649 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4650 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4651 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4652 if (def != data)
4653 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4654 } else {
4655 /* Disable CGCG/CGLS */
4656 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4657 /* disable cgcg, cgls should be disabled */
4658 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4659 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4660 /* disable cgcg and cgls in FSM */
4661 if (def != data)
4662 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4663 }
4664
4665 amdgpu_gfx_rlc_exit_safe_mode(adev);
4666 }
4667
4668 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4669 bool enable)
4670 {
4671 uint32_t def, data;
4672
4673 amdgpu_gfx_rlc_enter_safe_mode(adev);
4674
4675 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4676 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4677 /* unset CGCG override */
4678 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4679 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4680 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4681 else
4682 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4683 /* update CGCG and CGLS override bits */
4684 if (def != data)
4685 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4686
4687 /* enable cgcg FSM(0x0000363F) */
4688 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4689
4690 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4691 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4692 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4693 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4694 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4695 if (def != data)
4696 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4697
4698 /* set IDLE_POLL_COUNT(0x00900100) */
4699 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4700 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4701 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4702 if (def != data)
4703 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4704 } else {
4705 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4706 /* reset CGCG/CGLS bits */
4707 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4708 /* disable cgcg and cgls in FSM */
4709 if (def != data)
4710 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4711 }
4712
4713 amdgpu_gfx_rlc_exit_safe_mode(adev);
4714 }
4715
4716 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4717 bool enable)
4718 {
4719 if (enable) {
4720 /* CGCG/CGLS should be enabled after MGCG/MGLS
4721 * === MGCG + MGLS ===
4722 */
4723 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4724 /* === CGCG /CGLS for GFX 3D Only === */
4725 gfx_v9_0_update_3d_clock_gating(adev, enable);
4726 /* === CGCG + CGLS === */
4727 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4728 } else {
4729 /* CGCG/CGLS should be disabled before MGCG/MGLS
4730 * === CGCG + CGLS ===
4731 */
4732 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4733 /* === CGCG /CGLS for GFX 3D Only === */
4734 gfx_v9_0_update_3d_clock_gating(adev, enable);
4735 /* === MGCG + MGLS === */
4736 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4737 }
4738 return 0;
4739 }
4740
4741 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4742 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4743 .set_safe_mode = gfx_v9_0_set_safe_mode,
4744 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4745 .init = gfx_v9_0_rlc_init,
4746 .get_csb_size = gfx_v9_0_get_csb_size,
4747 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4748 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4749 .resume = gfx_v9_0_rlc_resume,
4750 .stop = gfx_v9_0_rlc_stop,
4751 .reset = gfx_v9_0_rlc_reset,
4752 .start = gfx_v9_0_rlc_start
4753 };
4754
4755 static int gfx_v9_0_set_powergating_state(void *handle,
4756 enum amd_powergating_state state)
4757 {
4758 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4759 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4760
4761 switch (adev->asic_type) {
4762 case CHIP_RAVEN:
4763 if (!enable) {
4764 amdgpu_gfx_off_ctrl(adev, false);
4765 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4766 }
4767 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4768 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4769 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4770 } else {
4771 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4772 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4773 }
4774
4775 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4776 gfx_v9_0_enable_cp_power_gating(adev, true);
4777 else
4778 gfx_v9_0_enable_cp_power_gating(adev, false);
4779
4780 /* update gfx cgpg state */
4781 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4782
4783 /* update mgcg state */
4784 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4785
4786 if (enable)
4787 amdgpu_gfx_off_ctrl(adev, true);
4788 break;
4789 case CHIP_VEGA12:
4790 if (!enable) {
4791 amdgpu_gfx_off_ctrl(adev, false);
4792 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4793 } else {
4794 amdgpu_gfx_off_ctrl(adev, true);
4795 }
4796 break;
4797 default:
4798 break;
4799 }
4800
4801 return 0;
4802 }
4803
4804 static int gfx_v9_0_set_clockgating_state(void *handle,
4805 enum amd_clockgating_state state)
4806 {
4807 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4808
4809 if (amdgpu_sriov_vf(adev))
4810 return 0;
4811
4812 switch (adev->asic_type) {
4813 case CHIP_VEGA10:
4814 case CHIP_VEGA12:
4815 case CHIP_VEGA20:
4816 case CHIP_RAVEN:
4817 gfx_v9_0_update_gfx_clock_gating(adev,
4818 state == AMD_CG_STATE_GATE ? true : false);
4819 break;
4820 default:
4821 break;
4822 }
4823 return 0;
4824 }
4825
4826 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4827 {
4828 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4829 int data;
4830
4831 if (amdgpu_sriov_vf(adev))
4832 *flags = 0;
4833
4834 /* AMD_CG_SUPPORT_GFX_MGCG */
4835 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4836 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4837 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4838
4839 /* AMD_CG_SUPPORT_GFX_CGCG */
4840 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4841 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4842 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4843
4844 /* AMD_CG_SUPPORT_GFX_CGLS */
4845 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4846 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4847
4848 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4849 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4850 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4851 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4852
4853 /* AMD_CG_SUPPORT_GFX_CP_LS */
4854 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4855 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4856 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4857
4858 if (adev->asic_type != CHIP_ARCTURUS) {
4859 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4860 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4861 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4862 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4863
4864 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4865 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4866 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4867 }
4868 }
4869
4870 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4871 {
4872 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4873 }
4874
4875 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4876 {
4877 struct amdgpu_device *adev = ring->adev;
4878 u64 wptr;
4879
4880 /* XXX check if swapping is necessary on BE */
4881 if (ring->use_doorbell) {
4882 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4883 } else {
4884 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4885 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4886 }
4887
4888 return wptr;
4889 }
4890
4891 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4892 {
4893 struct amdgpu_device *adev = ring->adev;
4894
4895 if (ring->use_doorbell) {
4896 /* XXX check if swapping is necessary on BE */
4897 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4898 WDOORBELL64(ring->doorbell_index, ring->wptr);
4899 } else {
4900 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4901 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4902 }
4903 }
4904
4905 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4906 {
4907 struct amdgpu_device *adev = ring->adev;
4908 u32 ref_and_mask, reg_mem_engine;
4909 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4910
4911 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4912 switch (ring->me) {
4913 case 1:
4914 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4915 break;
4916 case 2:
4917 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4918 break;
4919 default:
4920 return;
4921 }
4922 reg_mem_engine = 0;
4923 } else {
4924 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4925 reg_mem_engine = 1; /* pfp */
4926 }
4927
4928 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4929 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4930 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4931 ref_and_mask, ref_and_mask, 0x20);
4932 }
4933
4934 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4935 struct amdgpu_job *job,
4936 struct amdgpu_ib *ib,
4937 uint32_t flags)
4938 {
4939 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4940 u32 header, control = 0;
4941
4942 if (ib->flags & AMDGPU_IB_FLAG_CE)
4943 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4944 else
4945 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4946
4947 control |= ib->length_dw | (vmid << 24);
4948
4949 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4950 control |= INDIRECT_BUFFER_PRE_ENB(1);
4951
4952 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4953 gfx_v9_0_ring_emit_de_meta(ring);
4954 }
4955
4956 amdgpu_ring_write(ring, header);
4957 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4958 amdgpu_ring_write(ring,
4959 #ifdef __BIG_ENDIAN
4960 (2 << 0) |
4961 #endif
4962 lower_32_bits(ib->gpu_addr));
4963 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4964 amdgpu_ring_write(ring, control);
4965 }
4966
4967 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4968 struct amdgpu_job *job,
4969 struct amdgpu_ib *ib,
4970 uint32_t flags)
4971 {
4972 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4973 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4974
4975 /* Currently, there is a high possibility to get wave ID mismatch
4976 * between ME and GDS, leading to a hw deadlock, because ME generates
4977 * different wave IDs than the GDS expects. This situation happens
4978 * randomly when at least 5 compute pipes use GDS ordered append.
4979 * The wave IDs generated by ME are also wrong after suspend/resume.
4980 * Those are probably bugs somewhere else in the kernel driver.
4981 *
4982 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4983 * GDS to 0 for this ring (me/pipe).
4984 */
4985 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4986 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4987 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4988 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4989 }
4990
4991 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4992 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4993 amdgpu_ring_write(ring,
4994 #ifdef __BIG_ENDIAN
4995 (2 << 0) |
4996 #endif
4997 lower_32_bits(ib->gpu_addr));
4998 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4999 amdgpu_ring_write(ring, control);
5000 }
5001
5002 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5003 u64 seq, unsigned flags)
5004 {
5005 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5006 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5007 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5008
5009 /* RELEASE_MEM - flush caches, send int */
5010 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5011 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5012 EOP_TC_NC_ACTION_EN) :
5013 (EOP_TCL1_ACTION_EN |
5014 EOP_TC_ACTION_EN |
5015 EOP_TC_WB_ACTION_EN |
5016 EOP_TC_MD_ACTION_EN)) |
5017 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5018 EVENT_INDEX(5)));
5019 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5020
5021 /*
5022 * the address should be Qword aligned if 64bit write, Dword
5023 * aligned if only send 32bit data low (discard data high)
5024 */
5025 if (write64bit)
5026 BUG_ON(addr & 0x7);
5027 else
5028 BUG_ON(addr & 0x3);
5029 amdgpu_ring_write(ring, lower_32_bits(addr));
5030 amdgpu_ring_write(ring, upper_32_bits(addr));
5031 amdgpu_ring_write(ring, lower_32_bits(seq));
5032 amdgpu_ring_write(ring, upper_32_bits(seq));
5033 amdgpu_ring_write(ring, 0);
5034 }
5035
5036 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5037 {
5038 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5039 uint32_t seq = ring->fence_drv.sync_seq;
5040 uint64_t addr = ring->fence_drv.gpu_addr;
5041
5042 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5043 lower_32_bits(addr), upper_32_bits(addr),
5044 seq, 0xffffffff, 4);
5045 }
5046
5047 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5048 unsigned vmid, uint64_t pd_addr)
5049 {
5050 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5051
5052 /* compute doesn't have PFP */
5053 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5054 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5055 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5056 amdgpu_ring_write(ring, 0x0);
5057 }
5058 }
5059
5060 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5061 {
5062 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5063 }
5064
5065 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5066 {
5067 u64 wptr;
5068
5069 /* XXX check if swapping is necessary on BE */
5070 if (ring->use_doorbell)
5071 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5072 else
5073 BUG();
5074 return wptr;
5075 }
5076
5077 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5078 bool acquire)
5079 {
5080 struct amdgpu_device *adev = ring->adev;
5081 int pipe_num, tmp, reg;
5082 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5083
5084 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5085
5086 /* first me only has 2 entries, GFX and HP3D */
5087 if (ring->me > 0)
5088 pipe_num -= 2;
5089
5090 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5091 tmp = RREG32(reg);
5092 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5093 WREG32(reg, tmp);
5094 }
5095
5096 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5097 struct amdgpu_ring *ring,
5098 bool acquire)
5099 {
5100 int i, pipe;
5101 bool reserve;
5102 struct amdgpu_ring *iring;
5103
5104 mutex_lock(&adev->gfx.pipe_reserve_mutex);
5105 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5106 if (acquire)
5107 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5108 else
5109 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5110
5111 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5112 /* Clear all reservations - everyone reacquires all resources */
5113 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5114 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5115 true);
5116
5117 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5118 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5119 true);
5120 } else {
5121 /* Lower all pipes without a current reservation */
5122 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5123 iring = &adev->gfx.gfx_ring[i];
5124 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5125 iring->me,
5126 iring->pipe,
5127 0);
5128 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5129 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5130 }
5131
5132 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5133 iring = &adev->gfx.compute_ring[i];
5134 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5135 iring->me,
5136 iring->pipe,
5137 0);
5138 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5139 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5140 }
5141 }
5142
5143 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5144 }
5145
5146 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5147 struct amdgpu_ring *ring,
5148 bool acquire)
5149 {
5150 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5151 uint32_t queue_priority = acquire ? 0xf : 0x0;
5152
5153 mutex_lock(&adev->srbm_mutex);
5154 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5155
5156 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5157 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5158
5159 soc15_grbm_select(adev, 0, 0, 0, 0);
5160 mutex_unlock(&adev->srbm_mutex);
5161 }
5162
5163 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5164 enum drm_sched_priority priority)
5165 {
5166 struct amdgpu_device *adev = ring->adev;
5167 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5168
5169 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5170 return;
5171
5172 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5173 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5174 }
5175
5176 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5177 {
5178 struct amdgpu_device *adev = ring->adev;
5179
5180 /* XXX check if swapping is necessary on BE */
5181 if (ring->use_doorbell) {
5182 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5183 WDOORBELL64(ring->doorbell_index, ring->wptr);
5184 } else{
5185 BUG(); /* only DOORBELL method supported on gfx9 now */
5186 }
5187 }
5188
5189 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5190 u64 seq, unsigned int flags)
5191 {
5192 struct amdgpu_device *adev = ring->adev;
5193
5194 /* we only allocate 32bit for each seq wb address */
5195 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5196
5197 /* write fence seq to the "addr" */
5198 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5199 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5200 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5201 amdgpu_ring_write(ring, lower_32_bits(addr));
5202 amdgpu_ring_write(ring, upper_32_bits(addr));
5203 amdgpu_ring_write(ring, lower_32_bits(seq));
5204
5205 if (flags & AMDGPU_FENCE_FLAG_INT) {
5206 /* set register to trigger INT */
5207 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5208 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5209 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5210 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5211 amdgpu_ring_write(ring, 0);
5212 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5213 }
5214 }
5215
5216 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5217 {
5218 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5219 amdgpu_ring_write(ring, 0);
5220 }
5221
5222 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5223 {
5224 struct v9_ce_ib_state ce_payload = {0};
5225 uint64_t csa_addr;
5226 int cnt;
5227
5228 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5229 csa_addr = amdgpu_csa_vaddr(ring->adev);
5230
5231 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5232 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5233 WRITE_DATA_DST_SEL(8) |
5234 WR_CONFIRM) |
5235 WRITE_DATA_CACHE_POLICY(0));
5236 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5237 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5238 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5239 }
5240
5241 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5242 {
5243 struct v9_de_ib_state de_payload = {0};
5244 uint64_t csa_addr, gds_addr;
5245 int cnt;
5246
5247 csa_addr = amdgpu_csa_vaddr(ring->adev);
5248 gds_addr = csa_addr + 4096;
5249 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5250 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5251
5252 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5253 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5254 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5255 WRITE_DATA_DST_SEL(8) |
5256 WR_CONFIRM) |
5257 WRITE_DATA_CACHE_POLICY(0));
5258 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5259 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5260 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5261 }
5262
5263 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5264 {
5265 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5266 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5267 }
5268
5269 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5270 {
5271 uint32_t dw2 = 0;
5272
5273 if (amdgpu_sriov_vf(ring->adev))
5274 gfx_v9_0_ring_emit_ce_meta(ring);
5275
5276 gfx_v9_0_ring_emit_tmz(ring, true);
5277
5278 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5279 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5280 /* set load_global_config & load_global_uconfig */
5281 dw2 |= 0x8001;
5282 /* set load_cs_sh_regs */
5283 dw2 |= 0x01000000;
5284 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5285 dw2 |= 0x10002;
5286
5287 /* set load_ce_ram if preamble presented */
5288 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5289 dw2 |= 0x10000000;
5290 } else {
5291 /* still load_ce_ram if this is the first time preamble presented
5292 * although there is no context switch happens.
5293 */
5294 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5295 dw2 |= 0x10000000;
5296 }
5297
5298 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5299 amdgpu_ring_write(ring, dw2);
5300 amdgpu_ring_write(ring, 0);
5301 }
5302
5303 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5304 {
5305 unsigned ret;
5306 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5307 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5308 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5309 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5310 ret = ring->wptr & ring->buf_mask;
5311 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5312 return ret;
5313 }
5314
5315 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5316 {
5317 unsigned cur;
5318 BUG_ON(offset > ring->buf_mask);
5319 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5320
5321 cur = (ring->wptr & ring->buf_mask) - 1;
5322 if (likely(cur > offset))
5323 ring->ring[offset] = cur - offset;
5324 else
5325 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5326 }
5327
5328 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5329 {
5330 struct amdgpu_device *adev = ring->adev;
5331
5332 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5333 amdgpu_ring_write(ring, 0 | /* src: register*/
5334 (5 << 8) | /* dst: memory */
5335 (1 << 20)); /* write confirm */
5336 amdgpu_ring_write(ring, reg);
5337 amdgpu_ring_write(ring, 0);
5338 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5339 adev->virt.reg_val_offs * 4));
5340 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5341 adev->virt.reg_val_offs * 4));
5342 }
5343
5344 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5345 uint32_t val)
5346 {
5347 uint32_t cmd = 0;
5348
5349 switch (ring->funcs->type) {
5350 case AMDGPU_RING_TYPE_GFX:
5351 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5352 break;
5353 case AMDGPU_RING_TYPE_KIQ:
5354 cmd = (1 << 16); /* no inc addr */
5355 break;
5356 default:
5357 cmd = WR_CONFIRM;
5358 break;
5359 }
5360 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5361 amdgpu_ring_write(ring, cmd);
5362 amdgpu_ring_write(ring, reg);
5363 amdgpu_ring_write(ring, 0);
5364 amdgpu_ring_write(ring, val);
5365 }
5366
5367 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5368 uint32_t val, uint32_t mask)
5369 {
5370 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5371 }
5372
5373 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5374 uint32_t reg0, uint32_t reg1,
5375 uint32_t ref, uint32_t mask)
5376 {
5377 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5378 struct amdgpu_device *adev = ring->adev;
5379 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5380 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5381
5382 if (fw_version_ok)
5383 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5384 ref, mask, 0x20);
5385 else
5386 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5387 ref, mask);
5388 }
5389
5390 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5391 {
5392 struct amdgpu_device *adev = ring->adev;
5393 uint32_t value = 0;
5394
5395 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5396 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5397 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5398 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5399 WREG32(mmSQ_CMD, value);
5400 }
5401
5402 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5403 enum amdgpu_interrupt_state state)
5404 {
5405 switch (state) {
5406 case AMDGPU_IRQ_STATE_DISABLE:
5407 case AMDGPU_IRQ_STATE_ENABLE:
5408 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5409 TIME_STAMP_INT_ENABLE,
5410 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5411 break;
5412 default:
5413 break;
5414 }
5415 }
5416
5417 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5418 int me, int pipe,
5419 enum amdgpu_interrupt_state state)
5420 {
5421 u32 mec_int_cntl, mec_int_cntl_reg;
5422
5423 /*
5424 * amdgpu controls only the first MEC. That's why this function only
5425 * handles the setting of interrupts for this specific MEC. All other
5426 * pipes' interrupts are set by amdkfd.
5427 */
5428
5429 if (me == 1) {
5430 switch (pipe) {
5431 case 0:
5432 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5433 break;
5434 case 1:
5435 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5436 break;
5437 case 2:
5438 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5439 break;
5440 case 3:
5441 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5442 break;
5443 default:
5444 DRM_DEBUG("invalid pipe %d\n", pipe);
5445 return;
5446 }
5447 } else {
5448 DRM_DEBUG("invalid me %d\n", me);
5449 return;
5450 }
5451
5452 switch (state) {
5453 case AMDGPU_IRQ_STATE_DISABLE:
5454 mec_int_cntl = RREG32(mec_int_cntl_reg);
5455 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5456 TIME_STAMP_INT_ENABLE, 0);
5457 WREG32(mec_int_cntl_reg, mec_int_cntl);
5458 break;
5459 case AMDGPU_IRQ_STATE_ENABLE:
5460 mec_int_cntl = RREG32(mec_int_cntl_reg);
5461 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5462 TIME_STAMP_INT_ENABLE, 1);
5463 WREG32(mec_int_cntl_reg, mec_int_cntl);
5464 break;
5465 default:
5466 break;
5467 }
5468 }
5469
5470 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5471 struct amdgpu_irq_src *source,
5472 unsigned type,
5473 enum amdgpu_interrupt_state state)
5474 {
5475 switch (state) {
5476 case AMDGPU_IRQ_STATE_DISABLE:
5477 case AMDGPU_IRQ_STATE_ENABLE:
5478 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5479 PRIV_REG_INT_ENABLE,
5480 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5481 break;
5482 default:
5483 break;
5484 }
5485
5486 return 0;
5487 }
5488
5489 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5490 struct amdgpu_irq_src *source,
5491 unsigned type,
5492 enum amdgpu_interrupt_state state)
5493 {
5494 switch (state) {
5495 case AMDGPU_IRQ_STATE_DISABLE:
5496 case AMDGPU_IRQ_STATE_ENABLE:
5497 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5498 PRIV_INSTR_INT_ENABLE,
5499 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5500 default:
5501 break;
5502 }
5503
5504 return 0;
5505 }
5506
5507 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5508 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5509 CP_ECC_ERROR_INT_ENABLE, 1)
5510
5511 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5512 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5513 CP_ECC_ERROR_INT_ENABLE, 0)
5514
5515 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5516 struct amdgpu_irq_src *source,
5517 unsigned type,
5518 enum amdgpu_interrupt_state state)
5519 {
5520 switch (state) {
5521 case AMDGPU_IRQ_STATE_DISABLE:
5522 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5523 CP_ECC_ERROR_INT_ENABLE, 0);
5524 DISABLE_ECC_ON_ME_PIPE(1, 0);
5525 DISABLE_ECC_ON_ME_PIPE(1, 1);
5526 DISABLE_ECC_ON_ME_PIPE(1, 2);
5527 DISABLE_ECC_ON_ME_PIPE(1, 3);
5528 break;
5529
5530 case AMDGPU_IRQ_STATE_ENABLE:
5531 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5532 CP_ECC_ERROR_INT_ENABLE, 1);
5533 ENABLE_ECC_ON_ME_PIPE(1, 0);
5534 ENABLE_ECC_ON_ME_PIPE(1, 1);
5535 ENABLE_ECC_ON_ME_PIPE(1, 2);
5536 ENABLE_ECC_ON_ME_PIPE(1, 3);
5537 break;
5538 default:
5539 break;
5540 }
5541
5542 return 0;
5543 }
5544
5545
5546 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5547 struct amdgpu_irq_src *src,
5548 unsigned type,
5549 enum amdgpu_interrupt_state state)
5550 {
5551 switch (type) {
5552 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5553 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5554 break;
5555 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5556 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5557 break;
5558 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5559 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5560 break;
5561 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5562 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5563 break;
5564 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5565 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5566 break;
5567 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5568 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5569 break;
5570 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5571 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5572 break;
5573 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5574 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5575 break;
5576 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5577 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5578 break;
5579 default:
5580 break;
5581 }
5582 return 0;
5583 }
5584
5585 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5586 struct amdgpu_irq_src *source,
5587 struct amdgpu_iv_entry *entry)
5588 {
5589 int i;
5590 u8 me_id, pipe_id, queue_id;
5591 struct amdgpu_ring *ring;
5592
5593 DRM_DEBUG("IH: CP EOP\n");
5594 me_id = (entry->ring_id & 0x0c) >> 2;
5595 pipe_id = (entry->ring_id & 0x03) >> 0;
5596 queue_id = (entry->ring_id & 0x70) >> 4;
5597
5598 switch (me_id) {
5599 case 0:
5600 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5601 break;
5602 case 1:
5603 case 2:
5604 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5605 ring = &adev->gfx.compute_ring[i];
5606 /* Per-queue interrupt is supported for MEC starting from VI.
5607 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5608 */
5609 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5610 amdgpu_fence_process(ring);
5611 }
5612 break;
5613 }
5614 return 0;
5615 }
5616
5617 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5618 struct amdgpu_iv_entry *entry)
5619 {
5620 u8 me_id, pipe_id, queue_id;
5621 struct amdgpu_ring *ring;
5622 int i;
5623
5624 me_id = (entry->ring_id & 0x0c) >> 2;
5625 pipe_id = (entry->ring_id & 0x03) >> 0;
5626 queue_id = (entry->ring_id & 0x70) >> 4;
5627
5628 switch (me_id) {
5629 case 0:
5630 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5631 break;
5632 case 1:
5633 case 2:
5634 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5635 ring = &adev->gfx.compute_ring[i];
5636 if (ring->me == me_id && ring->pipe == pipe_id &&
5637 ring->queue == queue_id)
5638 drm_sched_fault(&ring->sched);
5639 }
5640 break;
5641 }
5642 }
5643
5644 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5645 struct amdgpu_irq_src *source,
5646 struct amdgpu_iv_entry *entry)
5647 {
5648 DRM_ERROR("Illegal register access in command stream\n");
5649 gfx_v9_0_fault(adev, entry);
5650 return 0;
5651 }
5652
5653 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5654 struct amdgpu_irq_src *source,
5655 struct amdgpu_iv_entry *entry)
5656 {
5657 DRM_ERROR("Illegal instruction in command stream\n");
5658 gfx_v9_0_fault(adev, entry);
5659 return 0;
5660 }
5661
5662 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5663 struct ras_err_data *err_data,
5664 struct amdgpu_iv_entry *entry)
5665 {
5666 /* TODO ue will trigger an interrupt. */
5667 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5668 if (adev->gfx.funcs->query_ras_error_count)
5669 adev->gfx.funcs->query_ras_error_count(adev, err_data);
5670 amdgpu_ras_reset_gpu(adev, 0);
5671 return AMDGPU_RAS_SUCCESS;
5672 }
5673
5674 static const struct {
5675 const char *name;
5676 uint32_t ip;
5677 uint32_t inst;
5678 uint32_t seg;
5679 uint32_t reg_offset;
5680 uint32_t per_se_instance;
5681 int32_t num_instance;
5682 uint32_t sec_count_mask;
5683 uint32_t ded_count_mask;
5684 } gfx_ras_edc_regs[] = {
5685 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5686 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5687 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5688 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5689 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5690 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5691 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5692 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5693 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5694 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5695 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5696 REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5697 REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5698 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5699 REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5700 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5701 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5702 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5703 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5704 REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5705 REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5706 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5707 REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5708 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5709 REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5710 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5711 REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5712 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5713 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5714 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5715 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5716 REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5717 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5718 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5719 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5720 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5721 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5722 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5723 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5724 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5725 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5726 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5727 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5728 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5729 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5730 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5731 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5732 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5733 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5734 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5735 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5736 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5737 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5738 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5739 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5740 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5741 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5742 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5743 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5744 REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5745 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5746 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5747 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5748 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5749 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5750 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5751 REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5752 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5753 REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5754 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5755 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5756 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5757 REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5758 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5759 REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5760 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5761 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5762 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5763 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5764 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5765 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5766 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5767 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5768 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5769 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5770 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5771 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5772 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5773 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5774 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5775 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5776 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5777 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5778 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5779 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5780 REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5781 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5782 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5783 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5784 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5785 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5786 REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5787 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5788 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5789 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5790 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5791 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5792 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5793 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5794 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5795 0 },
5796 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5797 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5798 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5799 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5800 0 },
5801 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5802 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5803 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5804 REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5805 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5806 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5807 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5808 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5809 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5810 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5811 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5812 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5813 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5814 REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5815 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5816 REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5817 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5818 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5819 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5820 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5821 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5822 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5823 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5824 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5825 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5826 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5827 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5828 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5829 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5830 REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5831 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5832 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5833 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5834 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5835 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5836 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5837 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5838 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5839 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5840 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5841 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5842 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5843 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5844 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5845 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5846 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5847 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5848 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5849 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5850 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5851 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5852 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5853 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5854 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5855 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5856 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5857 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5858 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5859 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5860 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5861 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5862 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5863 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5864 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5865 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5866 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5867 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5868 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5869 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5870 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5871 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5872 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5873 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5874 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5875 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5876 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5877 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5878 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5879 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5880 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5881 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5882 { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5883 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5884 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5885 0 },
5886 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5887 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5888 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5889 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5890 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5891 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5892 { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5893 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5894 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5895 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5896 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5897 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5898 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5899 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5900 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5901 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5902 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5903 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5904 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5905 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5906 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5907 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5908 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5909 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5910 { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5911 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5912 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5913 0 },
5914 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5915 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5916 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5917 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5918 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5919 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5920 { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5921 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5922 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5923 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5924 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5925 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5926 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5927 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5928 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5929 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5930 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5931 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5932 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5933 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5934 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5935 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5936 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5937 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
5938 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5939 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
5940 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5941 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
5942 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5943 REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
5944 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5945 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
5946 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5947 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
5948 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5949 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5950 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
5951 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5952 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5953 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
5954 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5955 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5956 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
5957 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5958 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
5959 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5960 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
5961 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5962 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
5963 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5964 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
5965 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5966 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
5967 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5968 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
5969 };
5970
5971 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5972 void *inject_if)
5973 {
5974 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5975 int ret;
5976 struct ta_ras_trigger_error_input block_info = { 0 };
5977
5978 if (adev->asic_type != CHIP_VEGA20)
5979 return -EINVAL;
5980
5981 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5982 return -EPERM;
5983
5984 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5985 info->head.type)) {
5986 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5987 ras_gfx_subblocks[info->head.sub_block_index].name,
5988 info->head.type);
5989 return -EPERM;
5990 }
5991
5992 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5993 info->head.type)) {
5994 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5995 ras_gfx_subblocks[info->head.sub_block_index].name,
5996 info->head.type);
5997 return -EPERM;
5998 }
5999
6000 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6001 block_info.sub_block_index =
6002 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6003 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6004 block_info.address = info->address;
6005 block_info.value = info->value;
6006
6007 mutex_lock(&adev->grbm_idx_mutex);
6008 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6009 mutex_unlock(&adev->grbm_idx_mutex);
6010
6011 return ret;
6012 }
6013
6014 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6015 void *ras_error_status)
6016 {
6017 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6018 uint32_t sec_count, ded_count;
6019 uint32_t i;
6020 uint32_t reg_value;
6021 uint32_t se_id, instance_id;
6022
6023 if (adev->asic_type != CHIP_VEGA20)
6024 return -EINVAL;
6025
6026 err_data->ue_count = 0;
6027 err_data->ce_count = 0;
6028
6029 mutex_lock(&adev->grbm_idx_mutex);
6030 for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6031 for (instance_id = 0; instance_id < 256; instance_id++) {
6032 for (i = 0;
6033 i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6034 i++) {
6035 if (se_id != 0 &&
6036 !gfx_ras_edc_regs[i].per_se_instance)
6037 continue;
6038 if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6039 continue;
6040
6041 gfx_v9_0_select_se_sh(adev, se_id, 0,
6042 instance_id);
6043
6044 reg_value = RREG32(
6045 adev->reg_offset[gfx_ras_edc_regs[i].ip]
6046 [gfx_ras_edc_regs[i].inst]
6047 [gfx_ras_edc_regs[i].seg] +
6048 gfx_ras_edc_regs[i].reg_offset);
6049 sec_count = reg_value &
6050 gfx_ras_edc_regs[i].sec_count_mask;
6051 ded_count = reg_value &
6052 gfx_ras_edc_regs[i].ded_count_mask;
6053 if (sec_count) {
6054 DRM_INFO(
6055 "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6056 se_id, instance_id,
6057 gfx_ras_edc_regs[i].name,
6058 sec_count);
6059 err_data->ce_count++;
6060 }
6061
6062 if (ded_count) {
6063 DRM_INFO(
6064 "Instance[%d][%d]: SubBlock %s, DED %d\n",
6065 se_id, instance_id,
6066 gfx_ras_edc_regs[i].name,
6067 ded_count);
6068 err_data->ue_count++;
6069 }
6070 }
6071 }
6072 }
6073 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6074 mutex_unlock(&adev->grbm_idx_mutex);
6075
6076 return 0;
6077 }
6078
6079 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6080 struct amdgpu_irq_src *source,
6081 struct amdgpu_iv_entry *entry)
6082 {
6083 struct ras_common_if *ras_if = adev->gfx.ras_if;
6084 struct ras_dispatch_if ih_data = {
6085 .entry = entry,
6086 };
6087
6088 if (!ras_if)
6089 return 0;
6090
6091 ih_data.head = *ras_if;
6092
6093 DRM_ERROR("CP ECC ERROR IRQ\n");
6094 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6095 return 0;
6096 }
6097
6098 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6099 .name = "gfx_v9_0",
6100 .early_init = gfx_v9_0_early_init,
6101 .late_init = gfx_v9_0_late_init,
6102 .sw_init = gfx_v9_0_sw_init,
6103 .sw_fini = gfx_v9_0_sw_fini,
6104 .hw_init = gfx_v9_0_hw_init,
6105 .hw_fini = gfx_v9_0_hw_fini,
6106 .suspend = gfx_v9_0_suspend,
6107 .resume = gfx_v9_0_resume,
6108 .is_idle = gfx_v9_0_is_idle,
6109 .wait_for_idle = gfx_v9_0_wait_for_idle,
6110 .soft_reset = gfx_v9_0_soft_reset,
6111 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6112 .set_powergating_state = gfx_v9_0_set_powergating_state,
6113 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6114 };
6115
6116 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6117 .type = AMDGPU_RING_TYPE_GFX,
6118 .align_mask = 0xff,
6119 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6120 .support_64bit_ptrs = true,
6121 .vmhub = AMDGPU_GFXHUB_0,
6122 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6123 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6124 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6125 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6126 5 + /* COND_EXEC */
6127 7 + /* PIPELINE_SYNC */
6128 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6129 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6130 2 + /* VM_FLUSH */
6131 8 + /* FENCE for VM_FLUSH */
6132 20 + /* GDS switch */
6133 4 + /* double SWITCH_BUFFER,
6134 the first COND_EXEC jump to the place just
6135 prior to this double SWITCH_BUFFER */
6136 5 + /* COND_EXEC */
6137 7 + /* HDP_flush */
6138 4 + /* VGT_flush */
6139 14 + /* CE_META */
6140 31 + /* DE_META */
6141 3 + /* CNTX_CTRL */
6142 5 + /* HDP_INVL */
6143 8 + 8 + /* FENCE x2 */
6144 2, /* SWITCH_BUFFER */
6145 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6146 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6147 .emit_fence = gfx_v9_0_ring_emit_fence,
6148 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6149 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6150 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6151 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6152 .test_ring = gfx_v9_0_ring_test_ring,
6153 .test_ib = gfx_v9_0_ring_test_ib,
6154 .insert_nop = amdgpu_ring_insert_nop,
6155 .pad_ib = amdgpu_ring_generic_pad_ib,
6156 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6157 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6158 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6159 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6160 .emit_tmz = gfx_v9_0_ring_emit_tmz,
6161 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6162 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6163 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6164 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6165 };
6166
6167 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6168 .type = AMDGPU_RING_TYPE_COMPUTE,
6169 .align_mask = 0xff,
6170 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6171 .support_64bit_ptrs = true,
6172 .vmhub = AMDGPU_GFXHUB_0,
6173 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6174 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6175 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6176 .emit_frame_size =
6177 20 + /* gfx_v9_0_ring_emit_gds_switch */
6178 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6179 5 + /* hdp invalidate */
6180 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6181 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6182 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6183 2 + /* gfx_v9_0_ring_emit_vm_flush */
6184 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6185 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6186 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6187 .emit_fence = gfx_v9_0_ring_emit_fence,
6188 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6189 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6190 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6191 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6192 .test_ring = gfx_v9_0_ring_test_ring,
6193 .test_ib = gfx_v9_0_ring_test_ib,
6194 .insert_nop = amdgpu_ring_insert_nop,
6195 .pad_ib = amdgpu_ring_generic_pad_ib,
6196 .set_priority = gfx_v9_0_ring_set_priority_compute,
6197 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6198 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6199 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6200 };
6201
6202 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6203 .type = AMDGPU_RING_TYPE_KIQ,
6204 .align_mask = 0xff,
6205 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6206 .support_64bit_ptrs = true,
6207 .vmhub = AMDGPU_GFXHUB_0,
6208 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6209 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6210 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6211 .emit_frame_size =
6212 20 + /* gfx_v9_0_ring_emit_gds_switch */
6213 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6214 5 + /* hdp invalidate */
6215 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6216 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6217 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6218 2 + /* gfx_v9_0_ring_emit_vm_flush */
6219 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6220 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6221 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6222 .test_ring = gfx_v9_0_ring_test_ring,
6223 .insert_nop = amdgpu_ring_insert_nop,
6224 .pad_ib = amdgpu_ring_generic_pad_ib,
6225 .emit_rreg = gfx_v9_0_ring_emit_rreg,
6226 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6227 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6228 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6229 };
6230
6231 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6232 {
6233 int i;
6234
6235 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6236
6237 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6238 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6239
6240 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6241 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6242 }
6243
6244 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6245 .set = gfx_v9_0_set_eop_interrupt_state,
6246 .process = gfx_v9_0_eop_irq,
6247 };
6248
6249 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6250 .set = gfx_v9_0_set_priv_reg_fault_state,
6251 .process = gfx_v9_0_priv_reg_irq,
6252 };
6253
6254 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6255 .set = gfx_v9_0_set_priv_inst_fault_state,
6256 .process = gfx_v9_0_priv_inst_irq,
6257 };
6258
6259 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6260 .set = gfx_v9_0_set_cp_ecc_error_state,
6261 .process = gfx_v9_0_cp_ecc_error_irq,
6262 };
6263
6264
6265 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6266 {
6267 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6268 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6269
6270 adev->gfx.priv_reg_irq.num_types = 1;
6271 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6272
6273 adev->gfx.priv_inst_irq.num_types = 1;
6274 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6275
6276 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6277 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6278 }
6279
6280 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6281 {
6282 switch (adev->asic_type) {
6283 case CHIP_VEGA10:
6284 case CHIP_VEGA12:
6285 case CHIP_VEGA20:
6286 case CHIP_RAVEN:
6287 case CHIP_ARCTURUS:
6288 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6289 break;
6290 default:
6291 break;
6292 }
6293 }
6294
6295 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6296 {
6297 /* init asci gds info */
6298 switch (adev->asic_type) {
6299 case CHIP_VEGA10:
6300 case CHIP_VEGA12:
6301 case CHIP_VEGA20:
6302 adev->gds.gds_size = 0x10000;
6303 break;
6304 case CHIP_RAVEN:
6305 case CHIP_ARCTURUS:
6306 adev->gds.gds_size = 0x1000;
6307 break;
6308 default:
6309 adev->gds.gds_size = 0x10000;
6310 break;
6311 }
6312
6313 switch (adev->asic_type) {
6314 case CHIP_VEGA10:
6315 case CHIP_VEGA20:
6316 adev->gds.gds_compute_max_wave_id = 0x7ff;
6317 break;
6318 case CHIP_VEGA12:
6319 adev->gds.gds_compute_max_wave_id = 0x27f;
6320 break;
6321 case CHIP_RAVEN:
6322 if (adev->rev_id >= 0x8)
6323 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6324 else
6325 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6326 break;
6327 case CHIP_ARCTURUS:
6328 adev->gds.gds_compute_max_wave_id = 0xfff;
6329 break;
6330 default:
6331 /* this really depends on the chip */
6332 adev->gds.gds_compute_max_wave_id = 0x7ff;
6333 break;
6334 }
6335
6336 adev->gds.gws_size = 64;
6337 adev->gds.oa_size = 16;
6338 }
6339
6340 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6341 u32 bitmap)
6342 {
6343 u32 data;
6344
6345 if (!bitmap)
6346 return;
6347
6348 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6349 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6350
6351 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6352 }
6353
6354 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6355 {
6356 u32 data, mask;
6357
6358 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6359 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6360
6361 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6362 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6363
6364 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6365
6366 return (~data) & mask;
6367 }
6368
6369 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6370 struct amdgpu_cu_info *cu_info)
6371 {
6372 int i, j, k, counter, active_cu_number = 0;
6373 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6374 unsigned disable_masks[4 * 4];
6375
6376 if (!adev || !cu_info)
6377 return -EINVAL;
6378
6379 /*
6380 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6381 */
6382 if (adev->gfx.config.max_shader_engines *
6383 adev->gfx.config.max_sh_per_se > 16)
6384 return -EINVAL;
6385
6386 amdgpu_gfx_parse_disable_cu(disable_masks,
6387 adev->gfx.config.max_shader_engines,
6388 adev->gfx.config.max_sh_per_se);
6389
6390 mutex_lock(&adev->grbm_idx_mutex);
6391 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6392 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6393 mask = 1;
6394 ao_bitmap = 0;
6395 counter = 0;
6396 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6397 gfx_v9_0_set_user_cu_inactive_bitmap(
6398 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6399 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6400
6401 /*
6402 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6403 * 4x4 size array, and it's usually suitable for Vega
6404 * ASICs which has 4*2 SE/SH layout.
6405 * But for Arcturus, SE/SH layout is changed to 8*1.
6406 * To mostly reduce the impact, we make it compatible
6407 * with current bitmap array as below:
6408 * SE4,SH0 --> bitmap[0][1]
6409 * SE5,SH0 --> bitmap[1][1]
6410 * SE6,SH0 --> bitmap[2][1]
6411 * SE7,SH0 --> bitmap[3][1]
6412 */
6413 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6414
6415 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6416 if (bitmap & mask) {
6417 if (counter < adev->gfx.config.max_cu_per_sh)
6418 ao_bitmap |= mask;
6419 counter ++;
6420 }
6421 mask <<= 1;
6422 }
6423 active_cu_number += counter;
6424 if (i < 2 && j < 2)
6425 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6426 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6427 }
6428 }
6429 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6430 mutex_unlock(&adev->grbm_idx_mutex);
6431
6432 cu_info->number = active_cu_number;
6433 cu_info->ao_cu_mask = ao_cu_mask;
6434 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6435
6436 return 0;
6437 }
6438
6439 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6440 {
6441 .type = AMD_IP_BLOCK_TYPE_GFX,
6442 .major = 9,
6443 .minor = 0,
6444 .rev = 0,
6445 .funcs = &gfx_v9_0_ip_funcs,
6446 };