2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #define GFX8_NUM_GFX_RINGS 1
53 #define GFX8_NUM_COMPUTE_RINGS 8
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
59 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
69 MODULE_FIRMWARE("radeon/carrizo_ce.bin");
70 MODULE_FIRMWARE("radeon/carrizo_pfp.bin");
71 MODULE_FIRMWARE("radeon/carrizo_me.bin");
72 MODULE_FIRMWARE("radeon/carrizo_mec.bin");
73 MODULE_FIRMWARE("radeon/carrizo_mec2.bin");
74 MODULE_FIRMWARE("radeon/carrizo_rlc.bin");
76 MODULE_FIRMWARE("radeon/tonga_ce.bin");
77 MODULE_FIRMWARE("radeon/tonga_pfp.bin");
78 MODULE_FIRMWARE("radeon/tonga_me.bin");
79 MODULE_FIRMWARE("radeon/tonga_mec.bin");
80 MODULE_FIRMWARE("radeon/tonga_mec2.bin");
81 MODULE_FIRMWARE("radeon/tonga_rlc.bin");
83 MODULE_FIRMWARE("radeon/topaz_ce.bin");
84 MODULE_FIRMWARE("radeon/topaz_pfp.bin");
85 MODULE_FIRMWARE("radeon/topaz_me.bin");
86 MODULE_FIRMWARE("radeon/topaz_mec.bin");
87 MODULE_FIRMWARE("radeon/topaz_mec2.bin");
88 MODULE_FIRMWARE("radeon/topaz_rlc.bin");
90 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset
[] =
92 {mmGDS_VMID0_BASE
, mmGDS_VMID0_SIZE
, mmGDS_GWS_VMID0
, mmGDS_OA_VMID0
},
93 {mmGDS_VMID1_BASE
, mmGDS_VMID1_SIZE
, mmGDS_GWS_VMID1
, mmGDS_OA_VMID1
},
94 {mmGDS_VMID2_BASE
, mmGDS_VMID2_SIZE
, mmGDS_GWS_VMID2
, mmGDS_OA_VMID2
},
95 {mmGDS_VMID3_BASE
, mmGDS_VMID3_SIZE
, mmGDS_GWS_VMID3
, mmGDS_OA_VMID3
},
96 {mmGDS_VMID4_BASE
, mmGDS_VMID4_SIZE
, mmGDS_GWS_VMID4
, mmGDS_OA_VMID4
},
97 {mmGDS_VMID5_BASE
, mmGDS_VMID5_SIZE
, mmGDS_GWS_VMID5
, mmGDS_OA_VMID5
},
98 {mmGDS_VMID6_BASE
, mmGDS_VMID6_SIZE
, mmGDS_GWS_VMID6
, mmGDS_OA_VMID6
},
99 {mmGDS_VMID7_BASE
, mmGDS_VMID7_SIZE
, mmGDS_GWS_VMID7
, mmGDS_OA_VMID7
},
100 {mmGDS_VMID8_BASE
, mmGDS_VMID8_SIZE
, mmGDS_GWS_VMID8
, mmGDS_OA_VMID8
},
101 {mmGDS_VMID9_BASE
, mmGDS_VMID9_SIZE
, mmGDS_GWS_VMID9
, mmGDS_OA_VMID9
},
102 {mmGDS_VMID10_BASE
, mmGDS_VMID10_SIZE
, mmGDS_GWS_VMID10
, mmGDS_OA_VMID10
},
103 {mmGDS_VMID11_BASE
, mmGDS_VMID11_SIZE
, mmGDS_GWS_VMID11
, mmGDS_OA_VMID11
},
104 {mmGDS_VMID12_BASE
, mmGDS_VMID12_SIZE
, mmGDS_GWS_VMID12
, mmGDS_OA_VMID12
},
105 {mmGDS_VMID13_BASE
, mmGDS_VMID13_SIZE
, mmGDS_GWS_VMID13
, mmGDS_OA_VMID13
},
106 {mmGDS_VMID14_BASE
, mmGDS_VMID14_SIZE
, mmGDS_GWS_VMID14
, mmGDS_OA_VMID14
},
107 {mmGDS_VMID15_BASE
, mmGDS_VMID15_SIZE
, mmGDS_GWS_VMID15
, mmGDS_OA_VMID15
}
110 static const u32 golden_settings_tonga_a11
[] =
112 mmCB_HW_CONTROL
, 0xfffdf3cf, 0x00007208,
113 mmCB_HW_CONTROL_3
, 0x00000040, 0x00000040,
114 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
115 mmGB_GPU_ID
, 0x0000000f, 0x00000000,
116 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
117 mmPA_SC_FIFO_DEPTH_CNTL
, 0x000003ff, 0x000000fc,
118 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
119 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
120 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
121 mmTCP_ADDR_CONFIG
, 0x000003ff, 0x000002fb,
122 mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x0000543b,
123 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0xa9210876,
126 static const u32 tonga_golden_common_all
[] =
128 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
129 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x16000012,
130 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x0000002A,
131 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22011003,
132 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
133 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
134 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00007FBF,
135 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00007FAF
138 static const u32 tonga_mgcg_cgcg_init
[] =
140 mmRLC_CGTT_MGCG_OVERRIDE
, 0xffffffff, 0xffffffff,
141 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
142 mmCB_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
143 mmCGTT_BCI_CLK_CTRL
, 0xffffffff, 0x00000100,
144 mmCGTT_CP_CLK_CTRL
, 0xffffffff, 0x00000100,
145 mmCGTT_CPC_CLK_CTRL
, 0xffffffff, 0x00000100,
146 mmCGTT_CPF_CLK_CTRL
, 0xffffffff, 0x40000100,
147 mmCGTT_GDS_CLK_CTRL
, 0xffffffff, 0x00000100,
148 mmCGTT_IA_CLK_CTRL
, 0xffffffff, 0x06000100,
149 mmCGTT_PA_CLK_CTRL
, 0xffffffff, 0x00000100,
150 mmCGTT_WD_CLK_CTRL
, 0xffffffff, 0x06000100,
151 mmCGTT_PC_CLK_CTRL
, 0xffffffff, 0x00000100,
152 mmCGTT_RLC_CLK_CTRL
, 0xffffffff, 0x00000100,
153 mmCGTT_SC_CLK_CTRL
, 0xffffffff, 0x00000100,
154 mmCGTT_SPI_CLK_CTRL
, 0xffffffff, 0x00000100,
155 mmCGTT_SQ_CLK_CTRL
, 0xffffffff, 0x00000100,
156 mmCGTT_SQG_CLK_CTRL
, 0xffffffff, 0x00000100,
157 mmCGTT_SX_CLK_CTRL0
, 0xffffffff, 0x00000100,
158 mmCGTT_SX_CLK_CTRL1
, 0xffffffff, 0x00000100,
159 mmCGTT_SX_CLK_CTRL2
, 0xffffffff, 0x00000100,
160 mmCGTT_SX_CLK_CTRL3
, 0xffffffff, 0x00000100,
161 mmCGTT_SX_CLK_CTRL4
, 0xffffffff, 0x00000100,
162 mmCGTT_TCI_CLK_CTRL
, 0xffffffff, 0x00000100,
163 mmCGTT_TCP_CLK_CTRL
, 0xffffffff, 0x00000100,
164 mmCGTT_VGT_CLK_CTRL
, 0xffffffff, 0x06000100,
165 mmDB_CGTT_CLK_CTRL_0
, 0xffffffff, 0x00000100,
166 mmTA_CGTT_CTRL
, 0xffffffff, 0x00000100,
167 mmTCA_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
168 mmTCC_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
169 mmTD_CGTT_CTRL
, 0xffffffff, 0x00000100,
170 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
171 mmCGTS_CU0_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
172 mmCGTS_CU0_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
173 mmCGTS_CU0_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
174 mmCGTS_CU0_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
175 mmCGTS_CU0_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
176 mmCGTS_CU1_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
177 mmCGTS_CU1_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
178 mmCGTS_CU1_TA_CTRL_REG
, 0xffffffff, 0x00040007,
179 mmCGTS_CU1_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
180 mmCGTS_CU1_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
181 mmCGTS_CU2_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
182 mmCGTS_CU2_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
183 mmCGTS_CU2_TA_CTRL_REG
, 0xffffffff, 0x00040007,
184 mmCGTS_CU2_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
185 mmCGTS_CU2_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
186 mmCGTS_CU3_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
187 mmCGTS_CU3_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
188 mmCGTS_CU3_TA_CTRL_REG
, 0xffffffff, 0x00040007,
189 mmCGTS_CU3_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
190 mmCGTS_CU3_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
191 mmCGTS_CU4_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
192 mmCGTS_CU4_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
193 mmCGTS_CU4_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
194 mmCGTS_CU4_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
195 mmCGTS_CU4_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
196 mmCGTS_CU5_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
197 mmCGTS_CU5_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
198 mmCGTS_CU5_TA_CTRL_REG
, 0xffffffff, 0x00040007,
199 mmCGTS_CU5_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
200 mmCGTS_CU5_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
201 mmCGTS_CU6_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
202 mmCGTS_CU6_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
203 mmCGTS_CU6_TA_CTRL_REG
, 0xffffffff, 0x00040007,
204 mmCGTS_CU6_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
205 mmCGTS_CU6_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
206 mmCGTS_CU7_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
207 mmCGTS_CU7_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
208 mmCGTS_CU7_TA_CTRL_REG
, 0xffffffff, 0x00040007,
209 mmCGTS_CU7_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
210 mmCGTS_CU7_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
211 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96e00200,
212 mmCP_RB_WPTR_POLL_CNTL
, 0xffffffff, 0x00900100,
213 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003c,
214 mmCP_MEM_SLP_CNTL
, 0x00000001, 0x00000001,
217 static const u32 golden_settings_iceland_a11
[] =
219 mmCB_HW_CONTROL_3
, 0x00000040, 0x00000040,
220 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
221 mmDB_DEBUG3
, 0xc0000000, 0xc0000000,
222 mmGB_GPU_ID
, 0x0000000f, 0x00000000,
223 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
224 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
225 mmPA_SC_RASTER_CONFIG
, 0x3f3fffff, 0x00000002,
226 mmPA_SC_RASTER_CONFIG_1
, 0x0000003f, 0x00000000,
227 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
228 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
229 mmTCP_ADDR_CONFIG
, 0x000003ff, 0x000000f1,
230 mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000,
231 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00000010,
234 static const u32 iceland_golden_common_all
[] =
236 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
237 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x00000002,
238 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x00000000,
239 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22010001,
240 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
241 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
242 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00007FBF,
243 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00007FAF
246 static const u32 iceland_mgcg_cgcg_init
[] =
248 mmRLC_CGTT_MGCG_OVERRIDE
, 0xffffffff, 0xffffffff,
249 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
250 mmCB_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
251 mmCGTT_BCI_CLK_CTRL
, 0xffffffff, 0x00000100,
252 mmCGTT_CP_CLK_CTRL
, 0xffffffff, 0xc0000100,
253 mmCGTT_CPC_CLK_CTRL
, 0xffffffff, 0xc0000100,
254 mmCGTT_CPF_CLK_CTRL
, 0xffffffff, 0xc0000100,
255 mmCGTT_GDS_CLK_CTRL
, 0xffffffff, 0x00000100,
256 mmCGTT_IA_CLK_CTRL
, 0xffffffff, 0x06000100,
257 mmCGTT_PA_CLK_CTRL
, 0xffffffff, 0x00000100,
258 mmCGTT_WD_CLK_CTRL
, 0xffffffff, 0x06000100,
259 mmCGTT_PC_CLK_CTRL
, 0xffffffff, 0x00000100,
260 mmCGTT_RLC_CLK_CTRL
, 0xffffffff, 0x00000100,
261 mmCGTT_SC_CLK_CTRL
, 0xffffffff, 0x00000100,
262 mmCGTT_SPI_CLK_CTRL
, 0xffffffff, 0x00000100,
263 mmCGTT_SQ_CLK_CTRL
, 0xffffffff, 0x00000100,
264 mmCGTT_SQG_CLK_CTRL
, 0xffffffff, 0x00000100,
265 mmCGTT_SX_CLK_CTRL0
, 0xffffffff, 0x00000100,
266 mmCGTT_SX_CLK_CTRL1
, 0xffffffff, 0x00000100,
267 mmCGTT_SX_CLK_CTRL2
, 0xffffffff, 0x00000100,
268 mmCGTT_SX_CLK_CTRL3
, 0xffffffff, 0x00000100,
269 mmCGTT_SX_CLK_CTRL4
, 0xffffffff, 0x00000100,
270 mmCGTT_TCI_CLK_CTRL
, 0xffffffff, 0xff000100,
271 mmCGTT_TCP_CLK_CTRL
, 0xffffffff, 0x00000100,
272 mmCGTT_VGT_CLK_CTRL
, 0xffffffff, 0x06000100,
273 mmDB_CGTT_CLK_CTRL_0
, 0xffffffff, 0x00000100,
274 mmTA_CGTT_CTRL
, 0xffffffff, 0x00000100,
275 mmTCA_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
276 mmTCC_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
277 mmTD_CGTT_CTRL
, 0xffffffff, 0x00000100,
278 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
279 mmCGTS_CU0_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
280 mmCGTS_CU0_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
281 mmCGTS_CU0_TA_SQC_CTRL_REG
, 0xffffffff, 0x0f840f87,
282 mmCGTS_CU0_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
283 mmCGTS_CU0_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
284 mmCGTS_CU1_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
285 mmCGTS_CU1_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
286 mmCGTS_CU1_TA_CTRL_REG
, 0xffffffff, 0x00040007,
287 mmCGTS_CU1_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
288 mmCGTS_CU1_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
289 mmCGTS_CU2_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
290 mmCGTS_CU2_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
291 mmCGTS_CU2_TA_CTRL_REG
, 0xffffffff, 0x00040007,
292 mmCGTS_CU2_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
293 mmCGTS_CU2_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
294 mmCGTS_CU3_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
295 mmCGTS_CU3_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
296 mmCGTS_CU3_TA_CTRL_REG
, 0xffffffff, 0x00040007,
297 mmCGTS_CU3_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
298 mmCGTS_CU3_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
299 mmCGTS_CU4_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
300 mmCGTS_CU4_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
301 mmCGTS_CU4_TA_SQC_CTRL_REG
, 0xffffffff, 0x0f840f87,
302 mmCGTS_CU4_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
303 mmCGTS_CU4_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
304 mmCGTS_CU5_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
305 mmCGTS_CU5_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
306 mmCGTS_CU5_TA_CTRL_REG
, 0xffffffff, 0x00040007,
307 mmCGTS_CU5_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
308 mmCGTS_CU5_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
309 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96e00200,
310 mmCP_RB_WPTR_POLL_CNTL
, 0xffffffff, 0x00900100,
311 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003c,
314 static const u32 cz_golden_settings_a11
[] =
316 mmCB_HW_CONTROL_3
, 0x00000040, 0x00000040,
317 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
318 mmGB_GPU_ID
, 0x0000000f, 0x00000000,
319 mmPA_SC_ENHANCE
, 0xffffffff, 0x00000001,
320 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
321 mmTA_CNTL_AUX
, 0x000f000f, 0x00010000,
322 mmTCP_ADDR_CONFIG
, 0x0000000f, 0x000000f3,
323 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00001302
326 static const u32 cz_golden_common_all
[] =
328 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
329 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x00000002,
330 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x00000000,
331 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22010001,
332 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
333 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
334 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00007FBF,
335 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00007FAF
338 static const u32 cz_mgcg_cgcg_init
[] =
340 mmRLC_CGTT_MGCG_OVERRIDE
, 0xffffffff, 0xffffffff,
341 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
342 mmCB_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
343 mmCGTT_BCI_CLK_CTRL
, 0xffffffff, 0x00000100,
344 mmCGTT_CP_CLK_CTRL
, 0xffffffff, 0x00000100,
345 mmCGTT_CPC_CLK_CTRL
, 0xffffffff, 0x00000100,
346 mmCGTT_CPF_CLK_CTRL
, 0xffffffff, 0x00000100,
347 mmCGTT_GDS_CLK_CTRL
, 0xffffffff, 0x00000100,
348 mmCGTT_IA_CLK_CTRL
, 0xffffffff, 0x06000100,
349 mmCGTT_PA_CLK_CTRL
, 0xffffffff, 0x00000100,
350 mmCGTT_WD_CLK_CTRL
, 0xffffffff, 0x06000100,
351 mmCGTT_PC_CLK_CTRL
, 0xffffffff, 0x00000100,
352 mmCGTT_RLC_CLK_CTRL
, 0xffffffff, 0x00000100,
353 mmCGTT_SC_CLK_CTRL
, 0xffffffff, 0x00000100,
354 mmCGTT_SPI_CLK_CTRL
, 0xffffffff, 0x00000100,
355 mmCGTT_SQ_CLK_CTRL
, 0xffffffff, 0x00000100,
356 mmCGTT_SQG_CLK_CTRL
, 0xffffffff, 0x00000100,
357 mmCGTT_SX_CLK_CTRL0
, 0xffffffff, 0x00000100,
358 mmCGTT_SX_CLK_CTRL1
, 0xffffffff, 0x00000100,
359 mmCGTT_SX_CLK_CTRL2
, 0xffffffff, 0x00000100,
360 mmCGTT_SX_CLK_CTRL3
, 0xffffffff, 0x00000100,
361 mmCGTT_SX_CLK_CTRL4
, 0xffffffff, 0x00000100,
362 mmCGTT_TCI_CLK_CTRL
, 0xffffffff, 0x00000100,
363 mmCGTT_TCP_CLK_CTRL
, 0xffffffff, 0x00000100,
364 mmCGTT_VGT_CLK_CTRL
, 0xffffffff, 0x06000100,
365 mmDB_CGTT_CLK_CTRL_0
, 0xffffffff, 0x00000100,
366 mmTA_CGTT_CTRL
, 0xffffffff, 0x00000100,
367 mmTCA_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
368 mmTCC_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
369 mmTD_CGTT_CTRL
, 0xffffffff, 0x00000100,
370 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
371 mmCGTS_CU0_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
372 mmCGTS_CU0_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
373 mmCGTS_CU0_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
374 mmCGTS_CU0_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
375 mmCGTS_CU0_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
376 mmCGTS_CU1_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
377 mmCGTS_CU1_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
378 mmCGTS_CU1_TA_CTRL_REG
, 0xffffffff, 0x00040007,
379 mmCGTS_CU1_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
380 mmCGTS_CU1_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
381 mmCGTS_CU2_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
382 mmCGTS_CU2_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
383 mmCGTS_CU2_TA_CTRL_REG
, 0xffffffff, 0x00040007,
384 mmCGTS_CU2_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
385 mmCGTS_CU2_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
386 mmCGTS_CU3_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
387 mmCGTS_CU3_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
388 mmCGTS_CU3_TA_CTRL_REG
, 0xffffffff, 0x00040007,
389 mmCGTS_CU3_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
390 mmCGTS_CU3_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
391 mmCGTS_CU4_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
392 mmCGTS_CU4_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
393 mmCGTS_CU4_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
394 mmCGTS_CU4_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
395 mmCGTS_CU4_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
396 mmCGTS_CU5_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
397 mmCGTS_CU5_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
398 mmCGTS_CU5_TA_CTRL_REG
, 0xffffffff, 0x00040007,
399 mmCGTS_CU5_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
400 mmCGTS_CU5_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
401 mmCGTS_CU6_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
402 mmCGTS_CU6_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
403 mmCGTS_CU6_TA_CTRL_REG
, 0xffffffff, 0x00040007,
404 mmCGTS_CU6_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
405 mmCGTS_CU6_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
406 mmCGTS_CU7_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
407 mmCGTS_CU7_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
408 mmCGTS_CU7_TA_CTRL_REG
, 0xffffffff, 0x00040007,
409 mmCGTS_CU7_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
410 mmCGTS_CU7_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
411 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96e00200,
412 mmCP_RB_WPTR_POLL_CNTL
, 0xffffffff, 0x00900100,
413 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003f,
414 mmCP_MEM_SLP_CNTL
, 0x00000001, 0x00000001,
417 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device
*adev
);
418 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device
*adev
);
419 static void gfx_v8_0_set_gds_init(struct amdgpu_device
*adev
);
421 static void gfx_v8_0_init_golden_registers(struct amdgpu_device
*adev
)
423 switch (adev
->asic_type
) {
425 amdgpu_program_register_sequence(adev
,
426 iceland_mgcg_cgcg_init
,
427 (const u32
)ARRAY_SIZE(iceland_mgcg_cgcg_init
));
428 amdgpu_program_register_sequence(adev
,
429 golden_settings_iceland_a11
,
430 (const u32
)ARRAY_SIZE(golden_settings_iceland_a11
));
431 amdgpu_program_register_sequence(adev
,
432 iceland_golden_common_all
,
433 (const u32
)ARRAY_SIZE(iceland_golden_common_all
));
436 amdgpu_program_register_sequence(adev
,
437 tonga_mgcg_cgcg_init
,
438 (const u32
)ARRAY_SIZE(tonga_mgcg_cgcg_init
));
439 amdgpu_program_register_sequence(adev
,
440 golden_settings_tonga_a11
,
441 (const u32
)ARRAY_SIZE(golden_settings_tonga_a11
));
442 amdgpu_program_register_sequence(adev
,
443 tonga_golden_common_all
,
444 (const u32
)ARRAY_SIZE(tonga_golden_common_all
));
447 amdgpu_program_register_sequence(adev
,
449 (const u32
)ARRAY_SIZE(cz_mgcg_cgcg_init
));
450 amdgpu_program_register_sequence(adev
,
451 cz_golden_settings_a11
,
452 (const u32
)ARRAY_SIZE(cz_golden_settings_a11
));
453 amdgpu_program_register_sequence(adev
,
454 cz_golden_common_all
,
455 (const u32
)ARRAY_SIZE(cz_golden_common_all
));
462 static void gfx_v8_0_scratch_init(struct amdgpu_device
*adev
)
466 adev
->gfx
.scratch
.num_reg
= 7;
467 adev
->gfx
.scratch
.reg_base
= mmSCRATCH_REG0
;
468 for (i
= 0; i
< adev
->gfx
.scratch
.num_reg
; i
++) {
469 adev
->gfx
.scratch
.free
[i
] = true;
470 adev
->gfx
.scratch
.reg
[i
] = adev
->gfx
.scratch
.reg_base
+ i
;
474 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring
*ring
)
476 struct amdgpu_device
*adev
= ring
->adev
;
482 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
484 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r
);
487 WREG32(scratch
, 0xCAFEDEAD);
488 r
= amdgpu_ring_lock(ring
, 3);
490 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
492 amdgpu_gfx_scratch_free(adev
, scratch
);
495 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
496 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
497 amdgpu_ring_write(ring
, 0xDEADBEEF);
498 amdgpu_ring_unlock_commit(ring
);
500 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
501 tmp
= RREG32(scratch
);
502 if (tmp
== 0xDEADBEEF)
506 if (i
< adev
->usec_timeout
) {
507 DRM_INFO("ring test on %d succeeded in %d usecs\n",
510 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
511 ring
->idx
, scratch
, tmp
);
514 amdgpu_gfx_scratch_free(adev
, scratch
);
518 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring
*ring
)
520 struct amdgpu_device
*adev
= ring
->adev
;
527 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
529 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r
);
532 WREG32(scratch
, 0xCAFEDEAD);
533 r
= amdgpu_ib_get(ring
, NULL
, 256, &ib
);
535 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r
);
536 amdgpu_gfx_scratch_free(adev
, scratch
);
539 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
540 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
));
541 ib
.ptr
[2] = 0xDEADBEEF;
543 r
= amdgpu_ib_schedule(adev
, 1, &ib
, AMDGPU_FENCE_OWNER_UNDEFINED
);
545 amdgpu_gfx_scratch_free(adev
, scratch
);
546 amdgpu_ib_free(adev
, &ib
);
547 DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r
);
550 r
= amdgpu_fence_wait(ib
.fence
, false);
552 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r
);
553 amdgpu_gfx_scratch_free(adev
, scratch
);
554 amdgpu_ib_free(adev
, &ib
);
557 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
558 tmp
= RREG32(scratch
);
559 if (tmp
== 0xDEADBEEF)
563 if (i
< adev
->usec_timeout
) {
564 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
565 ib
.fence
->ring
->idx
, i
);
567 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
571 amdgpu_gfx_scratch_free(adev
, scratch
);
572 amdgpu_ib_free(adev
, &ib
);
576 static int gfx_v8_0_init_microcode(struct amdgpu_device
*adev
)
578 const char *chip_name
;
581 struct amdgpu_firmware_info
*info
= NULL
;
582 const struct common_firmware_header
*header
= NULL
;
586 switch (adev
->asic_type
) {
594 chip_name
= "carrizo";
600 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_pfp.bin", chip_name
);
601 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
604 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
608 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_me.bin", chip_name
);
609 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
612 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
616 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_ce.bin", chip_name
);
617 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
620 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
624 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_rlc.bin", chip_name
);
625 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
628 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
630 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mec.bin", chip_name
);
631 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
634 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
638 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mec2.bin", chip_name
);
639 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
641 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
646 adev
->gfx
.mec2_fw
= NULL
;
649 if (adev
->firmware
.smu_load
) {
650 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
651 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
652 info
->fw
= adev
->gfx
.pfp_fw
;
653 header
= (const struct common_firmware_header
*)info
->fw
->data
;
654 adev
->firmware
.fw_size
+=
655 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
657 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
658 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
659 info
->fw
= adev
->gfx
.me_fw
;
660 header
= (const struct common_firmware_header
*)info
->fw
->data
;
661 adev
->firmware
.fw_size
+=
662 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
664 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
665 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
666 info
->fw
= adev
->gfx
.ce_fw
;
667 header
= (const struct common_firmware_header
*)info
->fw
->data
;
668 adev
->firmware
.fw_size
+=
669 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
671 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
672 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
673 info
->fw
= adev
->gfx
.rlc_fw
;
674 header
= (const struct common_firmware_header
*)info
->fw
->data
;
675 adev
->firmware
.fw_size
+=
676 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
678 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
679 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
680 info
->fw
= adev
->gfx
.mec_fw
;
681 header
= (const struct common_firmware_header
*)info
->fw
->data
;
682 adev
->firmware
.fw_size
+=
683 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
685 if (adev
->gfx
.mec2_fw
) {
686 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
687 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
688 info
->fw
= adev
->gfx
.mec2_fw
;
689 header
= (const struct common_firmware_header
*)info
->fw
->data
;
690 adev
->firmware
.fw_size
+=
691 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
699 "gfx8: Failed to load firmware \"%s\"\n",
701 release_firmware(adev
->gfx
.pfp_fw
);
702 adev
->gfx
.pfp_fw
= NULL
;
703 release_firmware(adev
->gfx
.me_fw
);
704 adev
->gfx
.me_fw
= NULL
;
705 release_firmware(adev
->gfx
.ce_fw
);
706 adev
->gfx
.ce_fw
= NULL
;
707 release_firmware(adev
->gfx
.rlc_fw
);
708 adev
->gfx
.rlc_fw
= NULL
;
709 release_firmware(adev
->gfx
.mec_fw
);
710 adev
->gfx
.mec_fw
= NULL
;
711 release_firmware(adev
->gfx
.mec2_fw
);
712 adev
->gfx
.mec2_fw
= NULL
;
717 static void gfx_v8_0_mec_fini(struct amdgpu_device
*adev
)
721 if (adev
->gfx
.mec
.hpd_eop_obj
) {
722 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.hpd_eop_obj
, false);
723 if (unlikely(r
!= 0))
724 dev_warn(adev
->dev
, "(%d) reserve HPD EOP bo failed\n", r
);
725 amdgpu_bo_unpin(adev
->gfx
.mec
.hpd_eop_obj
);
726 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
728 amdgpu_bo_unref(&adev
->gfx
.mec
.hpd_eop_obj
);
729 adev
->gfx
.mec
.hpd_eop_obj
= NULL
;
733 #define MEC_HPD_SIZE 2048
735 static int gfx_v8_0_mec_init(struct amdgpu_device
*adev
)
741 * we assign only 1 pipe because all other pipes will
744 adev
->gfx
.mec
.num_mec
= 1;
745 adev
->gfx
.mec
.num_pipe
= 1;
746 adev
->gfx
.mec
.num_queue
= adev
->gfx
.mec
.num_mec
* adev
->gfx
.mec
.num_pipe
* 8;
748 if (adev
->gfx
.mec
.hpd_eop_obj
== NULL
) {
749 r
= amdgpu_bo_create(adev
,
750 adev
->gfx
.mec
.num_mec
*adev
->gfx
.mec
.num_pipe
* MEC_HPD_SIZE
* 2,
752 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
,
753 &adev
->gfx
.mec
.hpd_eop_obj
);
755 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
760 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.hpd_eop_obj
, false);
761 if (unlikely(r
!= 0)) {
762 gfx_v8_0_mec_fini(adev
);
765 r
= amdgpu_bo_pin(adev
->gfx
.mec
.hpd_eop_obj
, AMDGPU_GEM_DOMAIN_GTT
,
766 &adev
->gfx
.mec
.hpd_eop_gpu_addr
);
768 dev_warn(adev
->dev
, "(%d) pin HDP EOP bo failed\n", r
);
769 gfx_v8_0_mec_fini(adev
);
772 r
= amdgpu_bo_kmap(adev
->gfx
.mec
.hpd_eop_obj
, (void **)&hpd
);
774 dev_warn(adev
->dev
, "(%d) map HDP EOP bo failed\n", r
);
775 gfx_v8_0_mec_fini(adev
);
779 memset(hpd
, 0, adev
->gfx
.mec
.num_mec
*adev
->gfx
.mec
.num_pipe
* MEC_HPD_SIZE
* 2);
781 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
782 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
787 static int gfx_v8_0_sw_init(struct amdgpu_device
*adev
)
790 struct amdgpu_ring
*ring
;
793 r
= amdgpu_irq_add_id(adev
, 181, &adev
->gfx
.eop_irq
);
798 r
= amdgpu_irq_add_id(adev
, 184, &adev
->gfx
.priv_reg_irq
);
802 /* Privileged inst */
803 r
= amdgpu_irq_add_id(adev
, 185, &adev
->gfx
.priv_inst_irq
);
807 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
809 gfx_v8_0_scratch_init(adev
);
811 r
= gfx_v8_0_init_microcode(adev
);
813 DRM_ERROR("Failed to load gfx firmware!\n");
817 r
= gfx_v8_0_mec_init(adev
);
819 DRM_ERROR("Failed to init MEC BOs!\n");
823 r
= amdgpu_wb_get(adev
, &adev
->gfx
.ce_sync_offs
);
825 DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r
);
829 /* set up the gfx ring */
830 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
831 ring
= &adev
->gfx
.gfx_ring
[i
];
832 ring
->ring_obj
= NULL
;
833 sprintf(ring
->name
, "gfx");
834 /* no gfx doorbells on iceland */
835 if (adev
->asic_type
!= CHIP_TOPAZ
) {
836 ring
->use_doorbell
= true;
837 ring
->doorbell_index
= AMDGPU_DOORBELL_GFX_RING0
;
840 r
= amdgpu_ring_init(adev
, ring
, 1024 * 1024,
841 PACKET3(PACKET3_NOP
, 0x3FFF), 0xf,
842 &adev
->gfx
.eop_irq
, AMDGPU_CP_IRQ_GFX_EOP
,
843 AMDGPU_RING_TYPE_GFX
);
848 /* set up the compute queues */
849 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
852 /* max 32 queues per MEC */
853 if ((i
>= 32) || (i
>= AMDGPU_MAX_COMPUTE_RINGS
)) {
854 DRM_ERROR("Too many (%d) compute rings!\n", i
);
857 ring
= &adev
->gfx
.compute_ring
[i
];
858 ring
->ring_obj
= NULL
;
859 ring
->use_doorbell
= true;
860 ring
->doorbell_index
= AMDGPU_DOORBELL_MEC_RING0
+ i
;
861 ring
->me
= 1; /* first MEC */
864 sprintf(ring
->name
, "comp %d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
865 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ring
->pipe
;
866 /* type-2 packets are deprecated on MEC, use type-3 instead */
867 r
= amdgpu_ring_init(adev
, ring
, 1024 * 1024,
868 PACKET3(PACKET3_NOP
, 0x3FFF), 0xf,
869 &adev
->gfx
.eop_irq
, irq_type
,
870 AMDGPU_RING_TYPE_COMPUTE
);
875 /* reserve GDS, GWS and OA resource for gfx */
876 r
= amdgpu_bo_create(adev
, adev
->gds
.mem
.gfx_partition_size
,
878 AMDGPU_GEM_DOMAIN_GDS
, 0,
879 NULL
, &adev
->gds
.gds_gfx_bo
);
883 r
= amdgpu_bo_create(adev
, adev
->gds
.gws
.gfx_partition_size
,
885 AMDGPU_GEM_DOMAIN_GWS
, 0,
886 NULL
, &adev
->gds
.gws_gfx_bo
);
890 r
= amdgpu_bo_create(adev
, adev
->gds
.oa
.gfx_partition_size
,
892 AMDGPU_GEM_DOMAIN_OA
, 0,
893 NULL
, &adev
->gds
.oa_gfx_bo
);
900 static int gfx_v8_0_sw_fini(struct amdgpu_device
*adev
)
904 amdgpu_bo_unref(&adev
->gds
.oa_gfx_bo
);
905 amdgpu_bo_unref(&adev
->gds
.gws_gfx_bo
);
906 amdgpu_bo_unref(&adev
->gds
.gds_gfx_bo
);
908 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
909 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
910 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
911 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
913 amdgpu_wb_free(adev
, adev
->gfx
.ce_sync_offs
);
915 gfx_v8_0_mec_fini(adev
);
920 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
922 const u32 num_tile_mode_states
= 32;
923 const u32 num_secondary_tile_mode_states
= 16;
924 u32 reg_offset
, gb_tile_moden
, split_equal_to_row_size
;
926 switch (adev
->gfx
.config
.mem_row_size_in_kb
) {
928 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_1KB
;
932 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_2KB
;
935 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_4KB
;
939 switch (adev
->asic_type
) {
941 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
942 switch (reg_offset
) {
944 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
945 PIPE_CONFIG(ADDR_SURF_P2
) |
946 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
947 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
950 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
951 PIPE_CONFIG(ADDR_SURF_P2
) |
952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
953 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
956 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
957 PIPE_CONFIG(ADDR_SURF_P2
) |
958 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
959 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
962 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
963 PIPE_CONFIG(ADDR_SURF_P2
) |
964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
965 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
968 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
969 PIPE_CONFIG(ADDR_SURF_P2
) |
970 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
971 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
974 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
975 PIPE_CONFIG(ADDR_SURF_P2
) |
976 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
980 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
981 PIPE_CONFIG(ADDR_SURF_P2
) |
982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
986 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
987 PIPE_CONFIG(ADDR_SURF_P2
));
990 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
991 PIPE_CONFIG(ADDR_SURF_P2
) |
992 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
993 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
996 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
997 PIPE_CONFIG(ADDR_SURF_P2
) |
998 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1002 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1003 PIPE_CONFIG(ADDR_SURF_P2
) |
1004 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1005 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1008 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1009 PIPE_CONFIG(ADDR_SURF_P2
) |
1010 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1014 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1015 PIPE_CONFIG(ADDR_SURF_P2
) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1020 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
1021 PIPE_CONFIG(ADDR_SURF_P2
) |
1022 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1026 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1027 PIPE_CONFIG(ADDR_SURF_P2
) |
1028 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1029 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1032 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
1033 PIPE_CONFIG(ADDR_SURF_P2
) |
1034 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1038 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
1039 PIPE_CONFIG(ADDR_SURF_P2
) |
1040 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1044 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
1045 PIPE_CONFIG(ADDR_SURF_P2
) |
1046 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1050 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
1051 PIPE_CONFIG(ADDR_SURF_P2
) |
1052 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1056 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
1057 PIPE_CONFIG(ADDR_SURF_P2
) |
1058 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1062 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
1063 PIPE_CONFIG(ADDR_SURF_P2
) |
1064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1068 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
1069 PIPE_CONFIG(ADDR_SURF_P2
) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1074 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
1075 PIPE_CONFIG(ADDR_SURF_P2
) |
1076 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1080 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1081 PIPE_CONFIG(ADDR_SURF_P2
) |
1082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1086 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1087 PIPE_CONFIG(ADDR_SURF_P2
) |
1088 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1092 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1093 PIPE_CONFIG(ADDR_SURF_P2
) |
1094 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1107 adev
->gfx
.config
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1108 WREG32(mmGB_TILE_MODE0
+ reg_offset
, gb_tile_moden
);
1110 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1111 switch (reg_offset
) {
1113 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1116 NUM_BANKS(ADDR_SURF_8_BANK
));
1119 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1120 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1121 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1122 NUM_BANKS(ADDR_SURF_8_BANK
));
1125 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1128 NUM_BANKS(ADDR_SURF_8_BANK
));
1131 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1134 NUM_BANKS(ADDR_SURF_8_BANK
));
1137 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1140 NUM_BANKS(ADDR_SURF_8_BANK
));
1143 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1146 NUM_BANKS(ADDR_SURF_8_BANK
));
1149 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1152 NUM_BANKS(ADDR_SURF_8_BANK
));
1155 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1158 NUM_BANKS(ADDR_SURF_16_BANK
));
1161 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1164 NUM_BANKS(ADDR_SURF_16_BANK
));
1167 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1170 NUM_BANKS(ADDR_SURF_16_BANK
));
1173 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1176 NUM_BANKS(ADDR_SURF_16_BANK
));
1179 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1182 NUM_BANKS(ADDR_SURF_16_BANK
));
1185 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1188 NUM_BANKS(ADDR_SURF_16_BANK
));
1191 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1194 NUM_BANKS(ADDR_SURF_8_BANK
));
1203 adev
->gfx
.config
.macrotile_mode_array
[reg_offset
] = gb_tile_moden
;
1204 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, gb_tile_moden
);
1207 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1208 switch (reg_offset
) {
1210 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1211 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1216 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1217 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
1219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1222 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1223 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1228 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1229 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1234 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1235 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1240 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1241 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
1243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1246 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1247 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1248 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
1249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1252 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1253 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
1255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1258 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1259 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
));
1262 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1263 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1268 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1269 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1270 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1274 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1275 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1276 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1280 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1281 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1282 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1286 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1287 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1292 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1293 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1294 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1298 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
1299 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1304 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1305 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1306 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1310 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1311 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1316 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
1317 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1322 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
1323 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1324 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1328 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
1329 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1330 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1334 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
1335 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1336 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1340 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
1341 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1342 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1346 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
1347 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1348 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1352 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
1353 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1358 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
1359 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1364 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
1365 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1366 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1370 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1371 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1376 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1377 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1378 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1382 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1383 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1384 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1388 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1389 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1390 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1397 adev
->gfx
.config
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1398 WREG32(mmGB_TILE_MODE0
+ reg_offset
, gb_tile_moden
);
1400 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1401 switch (reg_offset
) {
1403 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1406 NUM_BANKS(ADDR_SURF_16_BANK
));
1409 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1412 NUM_BANKS(ADDR_SURF_16_BANK
));
1415 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1418 NUM_BANKS(ADDR_SURF_16_BANK
));
1421 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1424 NUM_BANKS(ADDR_SURF_16_BANK
));
1427 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1430 NUM_BANKS(ADDR_SURF_16_BANK
));
1433 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1436 NUM_BANKS(ADDR_SURF_16_BANK
));
1439 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1442 NUM_BANKS(ADDR_SURF_16_BANK
));
1445 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1448 NUM_BANKS(ADDR_SURF_16_BANK
));
1451 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1454 NUM_BANKS(ADDR_SURF_16_BANK
));
1457 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1460 NUM_BANKS(ADDR_SURF_16_BANK
));
1463 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1466 NUM_BANKS(ADDR_SURF_16_BANK
));
1469 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1472 NUM_BANKS(ADDR_SURF_8_BANK
));
1475 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1478 NUM_BANKS(ADDR_SURF_4_BANK
));
1481 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1484 NUM_BANKS(ADDR_SURF_4_BANK
));
1493 adev
->gfx
.config
.macrotile_mode_array
[reg_offset
] = gb_tile_moden
;
1494 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, gb_tile_moden
);
1499 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1500 switch (reg_offset
) {
1502 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1503 PIPE_CONFIG(ADDR_SURF_P2
) |
1504 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1508 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1509 PIPE_CONFIG(ADDR_SURF_P2
) |
1510 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
1511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1514 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1515 PIPE_CONFIG(ADDR_SURF_P2
) |
1516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1520 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1521 PIPE_CONFIG(ADDR_SURF_P2
) |
1522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1526 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1527 PIPE_CONFIG(ADDR_SURF_P2
) |
1528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
1529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1532 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1533 PIPE_CONFIG(ADDR_SURF_P2
) |
1534 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
1535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1538 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1539 PIPE_CONFIG(ADDR_SURF_P2
) |
1540 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
1541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1544 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1545 PIPE_CONFIG(ADDR_SURF_P2
));
1548 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1549 PIPE_CONFIG(ADDR_SURF_P2
) |
1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1554 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1555 PIPE_CONFIG(ADDR_SURF_P2
) |
1556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1560 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1561 PIPE_CONFIG(ADDR_SURF_P2
) |
1562 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1566 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1567 PIPE_CONFIG(ADDR_SURF_P2
) |
1568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1572 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1573 PIPE_CONFIG(ADDR_SURF_P2
) |
1574 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1578 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
1579 PIPE_CONFIG(ADDR_SURF_P2
) |
1580 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1584 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1585 PIPE_CONFIG(ADDR_SURF_P2
) |
1586 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1590 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
1591 PIPE_CONFIG(ADDR_SURF_P2
) |
1592 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1596 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
1597 PIPE_CONFIG(ADDR_SURF_P2
) |
1598 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1602 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
1603 PIPE_CONFIG(ADDR_SURF_P2
) |
1604 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1608 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
1609 PIPE_CONFIG(ADDR_SURF_P2
) |
1610 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1614 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
1615 PIPE_CONFIG(ADDR_SURF_P2
) |
1616 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1620 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
1621 PIPE_CONFIG(ADDR_SURF_P2
) |
1622 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1626 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
1627 PIPE_CONFIG(ADDR_SURF_P2
) |
1628 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1632 gb_tile_moden
= (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
1633 PIPE_CONFIG(ADDR_SURF_P2
) |
1634 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
1635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
1638 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1639 PIPE_CONFIG(ADDR_SURF_P2
) |
1640 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1641 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1644 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1645 PIPE_CONFIG(ADDR_SURF_P2
) |
1646 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1650 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1651 PIPE_CONFIG(ADDR_SURF_P2
) |
1652 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
1665 adev
->gfx
.config
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1666 WREG32(mmGB_TILE_MODE0
+ reg_offset
, gb_tile_moden
);
1668 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1669 switch (reg_offset
) {
1671 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1674 NUM_BANKS(ADDR_SURF_8_BANK
));
1677 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1678 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1679 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1680 NUM_BANKS(ADDR_SURF_8_BANK
));
1683 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1686 NUM_BANKS(ADDR_SURF_8_BANK
));
1689 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1690 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1691 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1692 NUM_BANKS(ADDR_SURF_8_BANK
));
1695 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1696 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1697 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1698 NUM_BANKS(ADDR_SURF_8_BANK
));
1701 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1704 NUM_BANKS(ADDR_SURF_8_BANK
));
1707 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1708 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1709 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1710 NUM_BANKS(ADDR_SURF_8_BANK
));
1713 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1716 NUM_BANKS(ADDR_SURF_16_BANK
));
1719 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1720 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1721 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1722 NUM_BANKS(ADDR_SURF_16_BANK
));
1725 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1728 NUM_BANKS(ADDR_SURF_16_BANK
));
1731 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1732 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1733 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1734 NUM_BANKS(ADDR_SURF_16_BANK
));
1737 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1740 NUM_BANKS(ADDR_SURF_16_BANK
));
1743 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1746 NUM_BANKS(ADDR_SURF_16_BANK
));
1749 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1752 NUM_BANKS(ADDR_SURF_8_BANK
));
1761 adev
->gfx
.config
.macrotile_mode_array
[reg_offset
] = gb_tile_moden
;
1762 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, gb_tile_moden
);
1767 static u32
gfx_v8_0_create_bitmask(u32 bit_width
)
1771 for (i
= 0; i
< bit_width
; i
++) {
1778 void gfx_v8_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
)
1780 u32 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
, 1);
1782 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff)) {
1783 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1784 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1785 } else if (se_num
== 0xffffffff) {
1786 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1787 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1788 } else if (sh_num
== 0xffffffff) {
1789 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1790 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1792 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1793 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1795 WREG32(mmGRBM_GFX_INDEX
, data
);
1798 static u32
gfx_v8_0_get_rb_disabled(struct amdgpu_device
*adev
,
1799 u32 max_rb_num_per_se
,
1804 data
= RREG32(mmCC_RB_BACKEND_DISABLE
);
1806 data
&= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK
;
1810 data
|= RREG32(mmGC_USER_RB_BACKEND_DISABLE
);
1812 data
>>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT
;
1814 mask
= gfx_v8_0_create_bitmask(max_rb_num_per_se
/ sh_per_se
);
1819 static void gfx_v8_0_setup_rb(struct amdgpu_device
*adev
,
1820 u32 se_num
, u32 sh_per_se
,
1821 u32 max_rb_num_per_se
)
1825 u32 disabled_rbs
= 0;
1826 u32 enabled_rbs
= 0;
1828 mutex_lock(&adev
->grbm_idx_mutex
);
1829 for (i
= 0; i
< se_num
; i
++) {
1830 for (j
= 0; j
< sh_per_se
; j
++) {
1831 gfx_v8_0_select_se_sh(adev
, i
, j
);
1832 data
= gfx_v8_0_get_rb_disabled(adev
,
1833 max_rb_num_per_se
, sh_per_se
);
1834 disabled_rbs
|= data
<< ((i
* sh_per_se
+ j
) *
1835 RB_BITMAP_WIDTH_PER_SH
);
1838 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff);
1839 mutex_unlock(&adev
->grbm_idx_mutex
);
1842 for (i
= 0; i
< max_rb_num_per_se
* se_num
; i
++) {
1843 if (!(disabled_rbs
& mask
))
1844 enabled_rbs
|= mask
;
1848 adev
->gfx
.config
.backend_enable_mask
= enabled_rbs
;
1850 mutex_lock(&adev
->grbm_idx_mutex
);
1851 for (i
= 0; i
< se_num
; i
++) {
1852 gfx_v8_0_select_se_sh(adev
, i
, 0xffffffff);
1854 for (j
= 0; j
< sh_per_se
; j
++) {
1855 switch (enabled_rbs
& 3) {
1858 data
|= (RASTER_CONFIG_RB_MAP_3
<<
1859 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT
);
1861 data
|= (RASTER_CONFIG_RB_MAP_0
<<
1862 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT
);
1865 data
|= (RASTER_CONFIG_RB_MAP_0
<<
1866 (i
* sh_per_se
+ j
) * 2);
1869 data
|= (RASTER_CONFIG_RB_MAP_3
<<
1870 (i
* sh_per_se
+ j
) * 2);
1874 data
|= (RASTER_CONFIG_RB_MAP_2
<<
1875 (i
* sh_per_se
+ j
) * 2);
1880 WREG32(mmPA_SC_RASTER_CONFIG
, data
);
1882 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff);
1883 mutex_unlock(&adev
->grbm_idx_mutex
);
1886 static void gfx_v8_0_gpu_init(struct amdgpu_device
*adev
)
1889 u32 mc_shared_chmap
, mc_arb_ramcfg
;
1890 u32 dimm00_addr_map
, dimm01_addr_map
, dimm10_addr_map
, dimm11_addr_map
;
1894 switch (adev
->asic_type
) {
1896 adev
->gfx
.config
.max_shader_engines
= 1;
1897 adev
->gfx
.config
.max_tile_pipes
= 2;
1898 adev
->gfx
.config
.max_cu_per_sh
= 6;
1899 adev
->gfx
.config
.max_sh_per_se
= 1;
1900 adev
->gfx
.config
.max_backends_per_se
= 2;
1901 adev
->gfx
.config
.max_texture_channel_caches
= 2;
1902 adev
->gfx
.config
.max_gprs
= 256;
1903 adev
->gfx
.config
.max_gs_threads
= 32;
1904 adev
->gfx
.config
.max_hw_contexts
= 8;
1906 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1907 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1908 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1909 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1910 gb_addr_config
= TOPAZ_GB_ADDR_CONFIG_GOLDEN
;
1913 adev
->gfx
.config
.max_shader_engines
= 4;
1914 adev
->gfx
.config
.max_tile_pipes
= 8;
1915 adev
->gfx
.config
.max_cu_per_sh
= 8;
1916 adev
->gfx
.config
.max_sh_per_se
= 1;
1917 adev
->gfx
.config
.max_backends_per_se
= 2;
1918 adev
->gfx
.config
.max_texture_channel_caches
= 8;
1919 adev
->gfx
.config
.max_gprs
= 256;
1920 adev
->gfx
.config
.max_gs_threads
= 32;
1921 adev
->gfx
.config
.max_hw_contexts
= 8;
1923 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1924 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1925 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1926 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1927 gb_addr_config
= TONGA_GB_ADDR_CONFIG_GOLDEN
;
1930 adev
->gfx
.config
.max_shader_engines
= 1;
1931 adev
->gfx
.config
.max_tile_pipes
= 2;
1932 adev
->gfx
.config
.max_cu_per_sh
= 8;
1933 adev
->gfx
.config
.max_sh_per_se
= 1;
1934 adev
->gfx
.config
.max_backends_per_se
= 2;
1935 adev
->gfx
.config
.max_texture_channel_caches
= 2;
1936 adev
->gfx
.config
.max_gprs
= 256;
1937 adev
->gfx
.config
.max_gs_threads
= 32;
1938 adev
->gfx
.config
.max_hw_contexts
= 8;
1940 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1941 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1942 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1943 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1944 gb_addr_config
= CARRIZO_GB_ADDR_CONFIG_GOLDEN
;
1947 adev
->gfx
.config
.max_shader_engines
= 2;
1948 adev
->gfx
.config
.max_tile_pipes
= 4;
1949 adev
->gfx
.config
.max_cu_per_sh
= 2;
1950 adev
->gfx
.config
.max_sh_per_se
= 1;
1951 adev
->gfx
.config
.max_backends_per_se
= 2;
1952 adev
->gfx
.config
.max_texture_channel_caches
= 4;
1953 adev
->gfx
.config
.max_gprs
= 256;
1954 adev
->gfx
.config
.max_gs_threads
= 32;
1955 adev
->gfx
.config
.max_hw_contexts
= 8;
1957 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1958 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1959 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1960 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1961 gb_addr_config
= TONGA_GB_ADDR_CONFIG_GOLDEN
;
1965 tmp
= RREG32(mmGRBM_CNTL
);
1966 tmp
= REG_SET_FIELD(tmp
, GRBM_CNTL
, READ_TIMEOUT
, 0xff);
1967 WREG32(mmGRBM_CNTL
, tmp
);
1969 mc_shared_chmap
= RREG32(mmMC_SHARED_CHMAP
);
1970 adev
->gfx
.config
.mc_arb_ramcfg
= RREG32(mmMC_ARB_RAMCFG
);
1971 mc_arb_ramcfg
= adev
->gfx
.config
.mc_arb_ramcfg
;
1973 adev
->gfx
.config
.num_tile_pipes
= adev
->gfx
.config
.max_tile_pipes
;
1974 adev
->gfx
.config
.mem_max_burst_length_bytes
= 256;
1975 if (adev
->flags
& AMDGPU_IS_APU
) {
1976 /* Get memory bank mapping mode. */
1977 tmp
= RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING
);
1978 dimm00_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM0_BANK_ADDR_MAPPING
, DIMM0ADDRMAP
);
1979 dimm01_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM0_BANK_ADDR_MAPPING
, DIMM1ADDRMAP
);
1981 tmp
= RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING
);
1982 dimm10_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM1_BANK_ADDR_MAPPING
, DIMM0ADDRMAP
);
1983 dimm11_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM1_BANK_ADDR_MAPPING
, DIMM1ADDRMAP
);
1985 /* Validate settings in case only one DIMM installed. */
1986 if ((dimm00_addr_map
== 0) || (dimm00_addr_map
== 3) || (dimm00_addr_map
== 4) || (dimm00_addr_map
> 12))
1987 dimm00_addr_map
= 0;
1988 if ((dimm01_addr_map
== 0) || (dimm01_addr_map
== 3) || (dimm01_addr_map
== 4) || (dimm01_addr_map
> 12))
1989 dimm01_addr_map
= 0;
1990 if ((dimm10_addr_map
== 0) || (dimm10_addr_map
== 3) || (dimm10_addr_map
== 4) || (dimm10_addr_map
> 12))
1991 dimm10_addr_map
= 0;
1992 if ((dimm11_addr_map
== 0) || (dimm11_addr_map
== 3) || (dimm11_addr_map
== 4) || (dimm11_addr_map
> 12))
1993 dimm11_addr_map
= 0;
1995 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1996 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1997 if ((dimm00_addr_map
== 11) || (dimm01_addr_map
== 11) || (dimm10_addr_map
== 11) || (dimm11_addr_map
== 11))
1998 adev
->gfx
.config
.mem_row_size_in_kb
= 2;
2000 adev
->gfx
.config
.mem_row_size_in_kb
= 1;
2002 tmp
= REG_GET_FIELD(mc_arb_ramcfg
, MC_ARB_RAMCFG
, NOOFCOLS
);
2003 adev
->gfx
.config
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
2004 if (adev
->gfx
.config
.mem_row_size_in_kb
> 4)
2005 adev
->gfx
.config
.mem_row_size_in_kb
= 4;
2008 adev
->gfx
.config
.shader_engine_tile_size
= 32;
2009 adev
->gfx
.config
.num_gpus
= 1;
2010 adev
->gfx
.config
.multi_gpu_tile_size
= 64;
2012 /* fix up row size */
2013 switch (adev
->gfx
.config
.mem_row_size_in_kb
) {
2016 gb_addr_config
= REG_SET_FIELD(gb_addr_config
, GB_ADDR_CONFIG
, ROW_SIZE
, 0);
2019 gb_addr_config
= REG_SET_FIELD(gb_addr_config
, GB_ADDR_CONFIG
, ROW_SIZE
, 1);
2022 gb_addr_config
= REG_SET_FIELD(gb_addr_config
, GB_ADDR_CONFIG
, ROW_SIZE
, 2);
2025 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
2027 WREG32(mmGB_ADDR_CONFIG
, gb_addr_config
);
2028 WREG32(mmHDP_ADDR_CONFIG
, gb_addr_config
);
2029 WREG32(mmDMIF_ADDR_CALC
, gb_addr_config
);
2030 WREG32(mmSDMA0_TILING_CONFIG
+ SDMA0_REGISTER_OFFSET
,
2031 gb_addr_config
& 0x70);
2032 WREG32(mmSDMA0_TILING_CONFIG
+ SDMA1_REGISTER_OFFSET
,
2033 gb_addr_config
& 0x70);
2034 WREG32(mmUVD_UDEC_ADDR_CONFIG
, gb_addr_config
);
2035 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG
, gb_addr_config
);
2036 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG
, gb_addr_config
);
2038 gfx_v8_0_tiling_mode_table_init(adev
);
2040 gfx_v8_0_setup_rb(adev
, adev
->gfx
.config
.max_shader_engines
,
2041 adev
->gfx
.config
.max_sh_per_se
,
2042 adev
->gfx
.config
.max_backends_per_se
);
2044 /* XXX SH_MEM regs */
2045 /* where to put LDS, scratch, GPUVM in FSA64 space */
2046 mutex_lock(&adev
->srbm_mutex
);
2047 for (i
= 0; i
< 16; i
++) {
2048 vi_srbm_select(adev
, 0, 0, 0, i
);
2049 /* CP and shaders */
2051 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, DEFAULT_MTYPE
, MTYPE_UC
);
2052 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, APE1_MTYPE
, MTYPE_UC
);
2053 WREG32(mmSH_MEM_CONFIG
, tmp
);
2055 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, DEFAULT_MTYPE
, MTYPE_NC
);
2056 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, APE1_MTYPE
, MTYPE_NC
);
2057 WREG32(mmSH_MEM_CONFIG
, tmp
);
2060 WREG32(mmSH_MEM_APE1_BASE
, 1);
2061 WREG32(mmSH_MEM_APE1_LIMIT
, 0);
2062 WREG32(mmSH_MEM_BASES
, 0);
2064 vi_srbm_select(adev
, 0, 0, 0, 0);
2065 mutex_unlock(&adev
->srbm_mutex
);
2067 mutex_lock(&adev
->grbm_idx_mutex
);
2069 * making sure that the following register writes will be broadcasted
2070 * to all the shaders
2072 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff);
2074 WREG32(mmPA_SC_FIFO_SIZE
,
2075 (adev
->gfx
.config
.sc_prim_fifo_size_frontend
<<
2076 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT
) |
2077 (adev
->gfx
.config
.sc_prim_fifo_size_backend
<<
2078 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT
) |
2079 (adev
->gfx
.config
.sc_hiz_tile_fifo_size
<<
2080 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT
) |
2081 (adev
->gfx
.config
.sc_earlyz_tile_fifo_size
<<
2082 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT
));
2083 mutex_unlock(&adev
->grbm_idx_mutex
);
2087 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device
*adev
)
2092 mutex_lock(&adev
->grbm_idx_mutex
);
2093 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
2094 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
2095 gfx_v8_0_select_se_sh(adev
, i
, j
);
2096 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
2097 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY
) == 0)
2103 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff);
2104 mutex_unlock(&adev
->grbm_idx_mutex
);
2106 mask
= RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK
|
2107 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK
|
2108 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK
|
2109 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK
;
2110 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
2111 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
2117 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
2120 u32 tmp
= RREG32(mmCP_INT_CNTL_RING0
);
2123 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
, 1);
2124 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
, 1);
2125 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
, 1);
2126 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
, 1);
2128 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
, 0);
2129 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
, 0);
2130 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
, 0);
2131 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
, 0);
2133 WREG32(mmCP_INT_CNTL_RING0
, tmp
);
2136 void gfx_v8_0_rlc_stop(struct amdgpu_device
*adev
)
2138 u32 tmp
= RREG32(mmRLC_CNTL
);
2140 tmp
= REG_SET_FIELD(tmp
, RLC_CNTL
, RLC_ENABLE_F32
, 0);
2141 WREG32(mmRLC_CNTL
, tmp
);
2143 gfx_v8_0_enable_gui_idle_interrupt(adev
, false);
2145 gfx_v8_0_wait_for_rlc_serdes(adev
);
2148 static void gfx_v8_0_rlc_reset(struct amdgpu_device
*adev
)
2150 u32 tmp
= RREG32(mmGRBM_SOFT_RESET
);
2152 tmp
= REG_SET_FIELD(tmp
, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
2153 WREG32(mmGRBM_SOFT_RESET
, tmp
);
2155 tmp
= REG_SET_FIELD(tmp
, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
2156 WREG32(mmGRBM_SOFT_RESET
, tmp
);
2160 static void gfx_v8_0_rlc_start(struct amdgpu_device
*adev
)
2162 u32 tmp
= RREG32(mmRLC_CNTL
);
2164 tmp
= REG_SET_FIELD(tmp
, RLC_CNTL
, RLC_ENABLE_F32
, 1);
2165 WREG32(mmRLC_CNTL
, tmp
);
2167 /* carrizo do enable cp interrupt after cp inited */
2168 if (adev
->asic_type
!= CHIP_CARRIZO
)
2169 gfx_v8_0_enable_gui_idle_interrupt(adev
, true);
2174 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device
*adev
)
2176 const struct rlc_firmware_header_v2_0
*hdr
;
2177 const __le32
*fw_data
;
2178 unsigned i
, fw_size
;
2180 if (!adev
->gfx
.rlc_fw
)
2183 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
2184 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
2185 adev
->gfx
.rlc_fw_version
= le32_to_cpu(hdr
->header
.ucode_version
);
2187 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
2188 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
2189 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
2191 WREG32(mmRLC_GPM_UCODE_ADDR
, 0);
2192 for (i
= 0; i
< fw_size
; i
++)
2193 WREG32(mmRLC_GPM_UCODE_DATA
, le32_to_cpup(fw_data
++));
2194 WREG32(mmRLC_GPM_UCODE_ADDR
, adev
->gfx
.rlc_fw_version
);
2199 static int gfx_v8_0_rlc_resume(struct amdgpu_device
*adev
)
2203 gfx_v8_0_rlc_stop(adev
);
2206 WREG32(mmRLC_CGCG_CGLS_CTRL
, 0);
2209 WREG32(mmRLC_PG_CNTL
, 0);
2211 gfx_v8_0_rlc_reset(adev
);
2213 if (!adev
->firmware
.smu_load
) {
2214 /* legacy rlc firmware loading */
2215 r
= gfx_v8_0_rlc_load_microcode(adev
);
2219 r
= adev
->smu
.smumgr_funcs
->check_fw_load_finish(adev
,
2220 AMDGPU_UCODE_ID_RLC_G
);
2225 gfx_v8_0_rlc_start(adev
);
2230 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
2233 u32 tmp
= RREG32(mmCP_ME_CNTL
);
2236 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, 0);
2237 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, 0);
2238 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, 0);
2240 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, 1);
2241 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, 1);
2242 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, 1);
2243 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
2244 adev
->gfx
.gfx_ring
[i
].ready
= false;
2246 WREG32(mmCP_ME_CNTL
, tmp
);
2250 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
2252 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
2253 const struct gfx_firmware_header_v1_0
*ce_hdr
;
2254 const struct gfx_firmware_header_v1_0
*me_hdr
;
2255 const __le32
*fw_data
;
2256 unsigned i
, fw_size
;
2258 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
2261 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2262 adev
->gfx
.pfp_fw
->data
;
2263 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
2264 adev
->gfx
.ce_fw
->data
;
2265 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
2266 adev
->gfx
.me_fw
->data
;
2268 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
2269 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
2270 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
2271 adev
->gfx
.pfp_fw_version
= le32_to_cpu(pfp_hdr
->header
.ucode_version
);
2272 adev
->gfx
.ce_fw_version
= le32_to_cpu(ce_hdr
->header
.ucode_version
);
2273 adev
->gfx
.me_fw_version
= le32_to_cpu(me_hdr
->header
.ucode_version
);
2275 gfx_v8_0_cp_gfx_enable(adev
, false);
2278 fw_data
= (const __le32
*)
2279 (adev
->gfx
.pfp_fw
->data
+
2280 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
2281 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
) / 4;
2282 WREG32(mmCP_PFP_UCODE_ADDR
, 0);
2283 for (i
= 0; i
< fw_size
; i
++)
2284 WREG32(mmCP_PFP_UCODE_DATA
, le32_to_cpup(fw_data
++));
2285 WREG32(mmCP_PFP_UCODE_ADDR
, adev
->gfx
.pfp_fw_version
);
2288 fw_data
= (const __le32
*)
2289 (adev
->gfx
.ce_fw
->data
+
2290 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
2291 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
) / 4;
2292 WREG32(mmCP_CE_UCODE_ADDR
, 0);
2293 for (i
= 0; i
< fw_size
; i
++)
2294 WREG32(mmCP_CE_UCODE_DATA
, le32_to_cpup(fw_data
++));
2295 WREG32(mmCP_CE_UCODE_ADDR
, adev
->gfx
.ce_fw_version
);
2298 fw_data
= (const __le32
*)
2299 (adev
->gfx
.me_fw
->data
+
2300 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
2301 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
) / 4;
2302 WREG32(mmCP_ME_RAM_WADDR
, 0);
2303 for (i
= 0; i
< fw_size
; i
++)
2304 WREG32(mmCP_ME_RAM_DATA
, le32_to_cpup(fw_data
++));
2305 WREG32(mmCP_ME_RAM_WADDR
, adev
->gfx
.me_fw_version
);
2310 static u32
gfx_v8_0_get_csb_size(struct amdgpu_device
*adev
)
2313 const struct cs_section_def
*sect
= NULL
;
2314 const struct cs_extent_def
*ext
= NULL
;
2316 /* begin clear state */
2318 /* context control state */
2321 for (sect
= vi_cs_data
; sect
->section
!= NULL
; ++sect
) {
2322 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
2323 if (sect
->id
== SECT_CONTEXT
)
2324 count
+= 2 + ext
->reg_count
;
2329 /* pa_sc_raster_config/pa_sc_raster_config1 */
2331 /* end clear state */
2339 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device
*adev
)
2341 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
2342 const struct cs_section_def
*sect
= NULL
;
2343 const struct cs_extent_def
*ext
= NULL
;
2347 WREG32(mmCP_MAX_CONTEXT
, adev
->gfx
.config
.max_hw_contexts
- 1);
2348 WREG32(mmCP_ENDIAN_SWAP
, 0);
2349 WREG32(mmCP_DEVICE_ID
, 1);
2351 gfx_v8_0_cp_gfx_enable(adev
, true);
2353 r
= amdgpu_ring_lock(ring
, gfx_v8_0_get_csb_size(adev
) + 4);
2355 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
2359 /* clear state buffer */
2360 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2361 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2363 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
2364 amdgpu_ring_write(ring
, 0x80000000);
2365 amdgpu_ring_write(ring
, 0x80000000);
2367 for (sect
= vi_cs_data
; sect
->section
!= NULL
; ++sect
) {
2368 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
2369 if (sect
->id
== SECT_CONTEXT
) {
2370 amdgpu_ring_write(ring
,
2371 PACKET3(PACKET3_SET_CONTEXT_REG
,
2373 amdgpu_ring_write(ring
,
2374 ext
->reg_index
- PACKET3_SET_CONTEXT_REG_START
);
2375 for (i
= 0; i
< ext
->reg_count
; i
++)
2376 amdgpu_ring_write(ring
, ext
->extent
[i
]);
2381 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
2382 amdgpu_ring_write(ring
, mmPA_SC_RASTER_CONFIG
- PACKET3_SET_CONTEXT_REG_START
);
2383 switch (adev
->asic_type
) {
2385 amdgpu_ring_write(ring
, 0x16000012);
2386 amdgpu_ring_write(ring
, 0x0000002A);
2390 amdgpu_ring_write(ring
, 0x00000002);
2391 amdgpu_ring_write(ring
, 0x00000000);
2397 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2398 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2400 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2401 amdgpu_ring_write(ring
, 0);
2403 /* init the CE partitions */
2404 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
2405 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
2406 amdgpu_ring_write(ring
, 0x8000);
2407 amdgpu_ring_write(ring
, 0x8000);
2409 amdgpu_ring_unlock_commit(ring
);
2414 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device
*adev
)
2416 struct amdgpu_ring
*ring
;
2419 u64 rb_addr
, rptr_addr
;
2422 /* Set the write pointer delay */
2423 WREG32(mmCP_RB_WPTR_DELAY
, 0);
2425 /* set the RB to use vmid 0 */
2426 WREG32(mmCP_RB_VMID
, 0);
2428 /* Set ring buffer size */
2429 ring
= &adev
->gfx
.gfx_ring
[0];
2430 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2431 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
2432 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
2433 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, MTYPE
, 3);
2434 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, MIN_IB_AVAILSZ
, 1);
2436 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
2438 WREG32(mmCP_RB0_CNTL
, tmp
);
2440 /* Initialize the ring buffer's read and write pointers */
2441 WREG32(mmCP_RB0_CNTL
, tmp
| CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK
);
2443 WREG32(mmCP_RB0_WPTR
, ring
->wptr
);
2445 /* set the wb address wether it's enabled or not */
2446 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2447 WREG32(mmCP_RB0_RPTR_ADDR
, lower_32_bits(rptr_addr
));
2448 WREG32(mmCP_RB0_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) & 0xFF);
2451 WREG32(mmCP_RB0_CNTL
, tmp
);
2453 rb_addr
= ring
->gpu_addr
>> 8;
2454 WREG32(mmCP_RB0_BASE
, rb_addr
);
2455 WREG32(mmCP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
2457 /* no gfx doorbells on iceland */
2458 if (adev
->asic_type
!= CHIP_TOPAZ
) {
2459 tmp
= RREG32(mmCP_RB_DOORBELL_CONTROL
);
2460 if (ring
->use_doorbell
) {
2461 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2462 DOORBELL_OFFSET
, ring
->doorbell_index
);
2463 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2466 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2469 WREG32(mmCP_RB_DOORBELL_CONTROL
, tmp
);
2471 if (adev
->asic_type
== CHIP_TONGA
) {
2472 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
2473 DOORBELL_RANGE_LOWER
,
2474 AMDGPU_DOORBELL_GFX_RING0
);
2475 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER
, tmp
);
2477 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER
,
2478 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
2483 /* start the ring */
2484 gfx_v8_0_cp_gfx_start(adev
);
2486 r
= amdgpu_ring_test_ring(ring
);
2488 ring
->ready
= false;
2495 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
2500 WREG32(mmCP_MEC_CNTL
, 0);
2502 WREG32(mmCP_MEC_CNTL
, (CP_MEC_CNTL__MEC_ME1_HALT_MASK
| CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
2503 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
2504 adev
->gfx
.compute_ring
[i
].ready
= false;
2509 static int gfx_v8_0_cp_compute_start(struct amdgpu_device
*adev
)
2511 gfx_v8_0_cp_compute_enable(adev
, true);
2516 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
2518 const struct gfx_firmware_header_v1_0
*mec_hdr
;
2519 const __le32
*fw_data
;
2520 unsigned i
, fw_size
;
2522 if (!adev
->gfx
.mec_fw
)
2525 gfx_v8_0_cp_compute_enable(adev
, false);
2527 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
2528 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
2529 adev
->gfx
.mec_fw_version
= le32_to_cpu(mec_hdr
->header
.ucode_version
);
2531 fw_data
= (const __le32
*)
2532 (adev
->gfx
.mec_fw
->data
+
2533 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
2534 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
) / 4;
2537 WREG32(mmCP_MEC_ME1_UCODE_ADDR
, 0);
2538 for (i
= 0; i
< fw_size
; i
++)
2539 WREG32(mmCP_MEC_ME1_UCODE_DATA
, le32_to_cpup(fw_data
+i
));
2540 WREG32(mmCP_MEC_ME1_UCODE_ADDR
, adev
->gfx
.mec_fw_version
);
2542 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2543 if (adev
->gfx
.mec2_fw
) {
2544 const struct gfx_firmware_header_v1_0
*mec2_hdr
;
2546 mec2_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec2_fw
->data
;
2547 amdgpu_ucode_print_gfx_hdr(&mec2_hdr
->header
);
2548 adev
->gfx
.mec2_fw_version
= le32_to_cpu(mec2_hdr
->header
.ucode_version
);
2550 fw_data
= (const __le32
*)
2551 (adev
->gfx
.mec2_fw
->data
+
2552 le32_to_cpu(mec2_hdr
->header
.ucode_array_offset_bytes
));
2553 fw_size
= le32_to_cpu(mec2_hdr
->header
.ucode_size_bytes
) / 4;
2555 WREG32(mmCP_MEC_ME2_UCODE_ADDR
, 0);
2556 for (i
= 0; i
< fw_size
; i
++)
2557 WREG32(mmCP_MEC_ME2_UCODE_DATA
, le32_to_cpup(fw_data
+i
));
2558 WREG32(mmCP_MEC_ME2_UCODE_ADDR
, adev
->gfx
.mec2_fw_version
);
2565 uint32_t header
; /* ordinal0 */
2566 uint32_t compute_dispatch_initiator
; /* ordinal1 */
2567 uint32_t compute_dim_x
; /* ordinal2 */
2568 uint32_t compute_dim_y
; /* ordinal3 */
2569 uint32_t compute_dim_z
; /* ordinal4 */
2570 uint32_t compute_start_x
; /* ordinal5 */
2571 uint32_t compute_start_y
; /* ordinal6 */
2572 uint32_t compute_start_z
; /* ordinal7 */
2573 uint32_t compute_num_thread_x
; /* ordinal8 */
2574 uint32_t compute_num_thread_y
; /* ordinal9 */
2575 uint32_t compute_num_thread_z
; /* ordinal10 */
2576 uint32_t compute_pipelinestat_enable
; /* ordinal11 */
2577 uint32_t compute_perfcount_enable
; /* ordinal12 */
2578 uint32_t compute_pgm_lo
; /* ordinal13 */
2579 uint32_t compute_pgm_hi
; /* ordinal14 */
2580 uint32_t compute_tba_lo
; /* ordinal15 */
2581 uint32_t compute_tba_hi
; /* ordinal16 */
2582 uint32_t compute_tma_lo
; /* ordinal17 */
2583 uint32_t compute_tma_hi
; /* ordinal18 */
2584 uint32_t compute_pgm_rsrc1
; /* ordinal19 */
2585 uint32_t compute_pgm_rsrc2
; /* ordinal20 */
2586 uint32_t compute_vmid
; /* ordinal21 */
2587 uint32_t compute_resource_limits
; /* ordinal22 */
2588 uint32_t compute_static_thread_mgmt_se0
; /* ordinal23 */
2589 uint32_t compute_static_thread_mgmt_se1
; /* ordinal24 */
2590 uint32_t compute_tmpring_size
; /* ordinal25 */
2591 uint32_t compute_static_thread_mgmt_se2
; /* ordinal26 */
2592 uint32_t compute_static_thread_mgmt_se3
; /* ordinal27 */
2593 uint32_t compute_restart_x
; /* ordinal28 */
2594 uint32_t compute_restart_y
; /* ordinal29 */
2595 uint32_t compute_restart_z
; /* ordinal30 */
2596 uint32_t compute_thread_trace_enable
; /* ordinal31 */
2597 uint32_t compute_misc_reserved
; /* ordinal32 */
2598 uint32_t compute_dispatch_id
; /* ordinal33 */
2599 uint32_t compute_threadgroup_id
; /* ordinal34 */
2600 uint32_t compute_relaunch
; /* ordinal35 */
2601 uint32_t compute_wave_restore_addr_lo
; /* ordinal36 */
2602 uint32_t compute_wave_restore_addr_hi
; /* ordinal37 */
2603 uint32_t compute_wave_restore_control
; /* ordinal38 */
2604 uint32_t reserved9
; /* ordinal39 */
2605 uint32_t reserved10
; /* ordinal40 */
2606 uint32_t reserved11
; /* ordinal41 */
2607 uint32_t reserved12
; /* ordinal42 */
2608 uint32_t reserved13
; /* ordinal43 */
2609 uint32_t reserved14
; /* ordinal44 */
2610 uint32_t reserved15
; /* ordinal45 */
2611 uint32_t reserved16
; /* ordinal46 */
2612 uint32_t reserved17
; /* ordinal47 */
2613 uint32_t reserved18
; /* ordinal48 */
2614 uint32_t reserved19
; /* ordinal49 */
2615 uint32_t reserved20
; /* ordinal50 */
2616 uint32_t reserved21
; /* ordinal51 */
2617 uint32_t reserved22
; /* ordinal52 */
2618 uint32_t reserved23
; /* ordinal53 */
2619 uint32_t reserved24
; /* ordinal54 */
2620 uint32_t reserved25
; /* ordinal55 */
2621 uint32_t reserved26
; /* ordinal56 */
2622 uint32_t reserved27
; /* ordinal57 */
2623 uint32_t reserved28
; /* ordinal58 */
2624 uint32_t reserved29
; /* ordinal59 */
2625 uint32_t reserved30
; /* ordinal60 */
2626 uint32_t reserved31
; /* ordinal61 */
2627 uint32_t reserved32
; /* ordinal62 */
2628 uint32_t reserved33
; /* ordinal63 */
2629 uint32_t reserved34
; /* ordinal64 */
2630 uint32_t compute_user_data_0
; /* ordinal65 */
2631 uint32_t compute_user_data_1
; /* ordinal66 */
2632 uint32_t compute_user_data_2
; /* ordinal67 */
2633 uint32_t compute_user_data_3
; /* ordinal68 */
2634 uint32_t compute_user_data_4
; /* ordinal69 */
2635 uint32_t compute_user_data_5
; /* ordinal70 */
2636 uint32_t compute_user_data_6
; /* ordinal71 */
2637 uint32_t compute_user_data_7
; /* ordinal72 */
2638 uint32_t compute_user_data_8
; /* ordinal73 */
2639 uint32_t compute_user_data_9
; /* ordinal74 */
2640 uint32_t compute_user_data_10
; /* ordinal75 */
2641 uint32_t compute_user_data_11
; /* ordinal76 */
2642 uint32_t compute_user_data_12
; /* ordinal77 */
2643 uint32_t compute_user_data_13
; /* ordinal78 */
2644 uint32_t compute_user_data_14
; /* ordinal79 */
2645 uint32_t compute_user_data_15
; /* ordinal80 */
2646 uint32_t cp_compute_csinvoc_count_lo
; /* ordinal81 */
2647 uint32_t cp_compute_csinvoc_count_hi
; /* ordinal82 */
2648 uint32_t reserved35
; /* ordinal83 */
2649 uint32_t reserved36
; /* ordinal84 */
2650 uint32_t reserved37
; /* ordinal85 */
2651 uint32_t cp_mqd_query_time_lo
; /* ordinal86 */
2652 uint32_t cp_mqd_query_time_hi
; /* ordinal87 */
2653 uint32_t cp_mqd_connect_start_time_lo
; /* ordinal88 */
2654 uint32_t cp_mqd_connect_start_time_hi
; /* ordinal89 */
2655 uint32_t cp_mqd_connect_end_time_lo
; /* ordinal90 */
2656 uint32_t cp_mqd_connect_end_time_hi
; /* ordinal91 */
2657 uint32_t cp_mqd_connect_end_wf_count
; /* ordinal92 */
2658 uint32_t cp_mqd_connect_end_pq_rptr
; /* ordinal93 */
2659 uint32_t cp_mqd_connect_end_pq_wptr
; /* ordinal94 */
2660 uint32_t cp_mqd_connect_end_ib_rptr
; /* ordinal95 */
2661 uint32_t reserved38
; /* ordinal96 */
2662 uint32_t reserved39
; /* ordinal97 */
2663 uint32_t cp_mqd_save_start_time_lo
; /* ordinal98 */
2664 uint32_t cp_mqd_save_start_time_hi
; /* ordinal99 */
2665 uint32_t cp_mqd_save_end_time_lo
; /* ordinal100 */
2666 uint32_t cp_mqd_save_end_time_hi
; /* ordinal101 */
2667 uint32_t cp_mqd_restore_start_time_lo
; /* ordinal102 */
2668 uint32_t cp_mqd_restore_start_time_hi
; /* ordinal103 */
2669 uint32_t cp_mqd_restore_end_time_lo
; /* ordinal104 */
2670 uint32_t cp_mqd_restore_end_time_hi
; /* ordinal105 */
2671 uint32_t reserved40
; /* ordinal106 */
2672 uint32_t reserved41
; /* ordinal107 */
2673 uint32_t gds_cs_ctxsw_cnt0
; /* ordinal108 */
2674 uint32_t gds_cs_ctxsw_cnt1
; /* ordinal109 */
2675 uint32_t gds_cs_ctxsw_cnt2
; /* ordinal110 */
2676 uint32_t gds_cs_ctxsw_cnt3
; /* ordinal111 */
2677 uint32_t reserved42
; /* ordinal112 */
2678 uint32_t reserved43
; /* ordinal113 */
2679 uint32_t cp_pq_exe_status_lo
; /* ordinal114 */
2680 uint32_t cp_pq_exe_status_hi
; /* ordinal115 */
2681 uint32_t cp_packet_id_lo
; /* ordinal116 */
2682 uint32_t cp_packet_id_hi
; /* ordinal117 */
2683 uint32_t cp_packet_exe_status_lo
; /* ordinal118 */
2684 uint32_t cp_packet_exe_status_hi
; /* ordinal119 */
2685 uint32_t gds_save_base_addr_lo
; /* ordinal120 */
2686 uint32_t gds_save_base_addr_hi
; /* ordinal121 */
2687 uint32_t gds_save_mask_lo
; /* ordinal122 */
2688 uint32_t gds_save_mask_hi
; /* ordinal123 */
2689 uint32_t ctx_save_base_addr_lo
; /* ordinal124 */
2690 uint32_t ctx_save_base_addr_hi
; /* ordinal125 */
2691 uint32_t reserved44
; /* ordinal126 */
2692 uint32_t reserved45
; /* ordinal127 */
2693 uint32_t cp_mqd_base_addr_lo
; /* ordinal128 */
2694 uint32_t cp_mqd_base_addr_hi
; /* ordinal129 */
2695 uint32_t cp_hqd_active
; /* ordinal130 */
2696 uint32_t cp_hqd_vmid
; /* ordinal131 */
2697 uint32_t cp_hqd_persistent_state
; /* ordinal132 */
2698 uint32_t cp_hqd_pipe_priority
; /* ordinal133 */
2699 uint32_t cp_hqd_queue_priority
; /* ordinal134 */
2700 uint32_t cp_hqd_quantum
; /* ordinal135 */
2701 uint32_t cp_hqd_pq_base_lo
; /* ordinal136 */
2702 uint32_t cp_hqd_pq_base_hi
; /* ordinal137 */
2703 uint32_t cp_hqd_pq_rptr
; /* ordinal138 */
2704 uint32_t cp_hqd_pq_rptr_report_addr_lo
; /* ordinal139 */
2705 uint32_t cp_hqd_pq_rptr_report_addr_hi
; /* ordinal140 */
2706 uint32_t cp_hqd_pq_wptr_poll_addr
; /* ordinal141 */
2707 uint32_t cp_hqd_pq_wptr_poll_addr_hi
; /* ordinal142 */
2708 uint32_t cp_hqd_pq_doorbell_control
; /* ordinal143 */
2709 uint32_t cp_hqd_pq_wptr
; /* ordinal144 */
2710 uint32_t cp_hqd_pq_control
; /* ordinal145 */
2711 uint32_t cp_hqd_ib_base_addr_lo
; /* ordinal146 */
2712 uint32_t cp_hqd_ib_base_addr_hi
; /* ordinal147 */
2713 uint32_t cp_hqd_ib_rptr
; /* ordinal148 */
2714 uint32_t cp_hqd_ib_control
; /* ordinal149 */
2715 uint32_t cp_hqd_iq_timer
; /* ordinal150 */
2716 uint32_t cp_hqd_iq_rptr
; /* ordinal151 */
2717 uint32_t cp_hqd_dequeue_request
; /* ordinal152 */
2718 uint32_t cp_hqd_dma_offload
; /* ordinal153 */
2719 uint32_t cp_hqd_sema_cmd
; /* ordinal154 */
2720 uint32_t cp_hqd_msg_type
; /* ordinal155 */
2721 uint32_t cp_hqd_atomic0_preop_lo
; /* ordinal156 */
2722 uint32_t cp_hqd_atomic0_preop_hi
; /* ordinal157 */
2723 uint32_t cp_hqd_atomic1_preop_lo
; /* ordinal158 */
2724 uint32_t cp_hqd_atomic1_preop_hi
; /* ordinal159 */
2725 uint32_t cp_hqd_hq_status0
; /* ordinal160 */
2726 uint32_t cp_hqd_hq_control0
; /* ordinal161 */
2727 uint32_t cp_mqd_control
; /* ordinal162 */
2728 uint32_t cp_hqd_hq_status1
; /* ordinal163 */
2729 uint32_t cp_hqd_hq_control1
; /* ordinal164 */
2730 uint32_t cp_hqd_eop_base_addr_lo
; /* ordinal165 */
2731 uint32_t cp_hqd_eop_base_addr_hi
; /* ordinal166 */
2732 uint32_t cp_hqd_eop_control
; /* ordinal167 */
2733 uint32_t cp_hqd_eop_rptr
; /* ordinal168 */
2734 uint32_t cp_hqd_eop_wptr
; /* ordinal169 */
2735 uint32_t cp_hqd_eop_done_events
; /* ordinal170 */
2736 uint32_t cp_hqd_ctx_save_base_addr_lo
; /* ordinal171 */
2737 uint32_t cp_hqd_ctx_save_base_addr_hi
; /* ordinal172 */
2738 uint32_t cp_hqd_ctx_save_control
; /* ordinal173 */
2739 uint32_t cp_hqd_cntl_stack_offset
; /* ordinal174 */
2740 uint32_t cp_hqd_cntl_stack_size
; /* ordinal175 */
2741 uint32_t cp_hqd_wg_state_offset
; /* ordinal176 */
2742 uint32_t cp_hqd_ctx_save_size
; /* ordinal177 */
2743 uint32_t cp_hqd_gds_resource_state
; /* ordinal178 */
2744 uint32_t cp_hqd_error
; /* ordinal179 */
2745 uint32_t cp_hqd_eop_wptr_mem
; /* ordinal180 */
2746 uint32_t cp_hqd_eop_dones
; /* ordinal181 */
2747 uint32_t reserved46
; /* ordinal182 */
2748 uint32_t reserved47
; /* ordinal183 */
2749 uint32_t reserved48
; /* ordinal184 */
2750 uint32_t reserved49
; /* ordinal185 */
2751 uint32_t reserved50
; /* ordinal186 */
2752 uint32_t reserved51
; /* ordinal187 */
2753 uint32_t reserved52
; /* ordinal188 */
2754 uint32_t reserved53
; /* ordinal189 */
2755 uint32_t reserved54
; /* ordinal190 */
2756 uint32_t reserved55
; /* ordinal191 */
2757 uint32_t iqtimer_pkt_header
; /* ordinal192 */
2758 uint32_t iqtimer_pkt_dw0
; /* ordinal193 */
2759 uint32_t iqtimer_pkt_dw1
; /* ordinal194 */
2760 uint32_t iqtimer_pkt_dw2
; /* ordinal195 */
2761 uint32_t iqtimer_pkt_dw3
; /* ordinal196 */
2762 uint32_t iqtimer_pkt_dw4
; /* ordinal197 */
2763 uint32_t iqtimer_pkt_dw5
; /* ordinal198 */
2764 uint32_t iqtimer_pkt_dw6
; /* ordinal199 */
2765 uint32_t iqtimer_pkt_dw7
; /* ordinal200 */
2766 uint32_t iqtimer_pkt_dw8
; /* ordinal201 */
2767 uint32_t iqtimer_pkt_dw9
; /* ordinal202 */
2768 uint32_t iqtimer_pkt_dw10
; /* ordinal203 */
2769 uint32_t iqtimer_pkt_dw11
; /* ordinal204 */
2770 uint32_t iqtimer_pkt_dw12
; /* ordinal205 */
2771 uint32_t iqtimer_pkt_dw13
; /* ordinal206 */
2772 uint32_t iqtimer_pkt_dw14
; /* ordinal207 */
2773 uint32_t iqtimer_pkt_dw15
; /* ordinal208 */
2774 uint32_t iqtimer_pkt_dw16
; /* ordinal209 */
2775 uint32_t iqtimer_pkt_dw17
; /* ordinal210 */
2776 uint32_t iqtimer_pkt_dw18
; /* ordinal211 */
2777 uint32_t iqtimer_pkt_dw19
; /* ordinal212 */
2778 uint32_t iqtimer_pkt_dw20
; /* ordinal213 */
2779 uint32_t iqtimer_pkt_dw21
; /* ordinal214 */
2780 uint32_t iqtimer_pkt_dw22
; /* ordinal215 */
2781 uint32_t iqtimer_pkt_dw23
; /* ordinal216 */
2782 uint32_t iqtimer_pkt_dw24
; /* ordinal217 */
2783 uint32_t iqtimer_pkt_dw25
; /* ordinal218 */
2784 uint32_t iqtimer_pkt_dw26
; /* ordinal219 */
2785 uint32_t iqtimer_pkt_dw27
; /* ordinal220 */
2786 uint32_t iqtimer_pkt_dw28
; /* ordinal221 */
2787 uint32_t iqtimer_pkt_dw29
; /* ordinal222 */
2788 uint32_t iqtimer_pkt_dw30
; /* ordinal223 */
2789 uint32_t iqtimer_pkt_dw31
; /* ordinal224 */
2790 uint32_t reserved56
; /* ordinal225 */
2791 uint32_t reserved57
; /* ordinal226 */
2792 uint32_t reserved58
; /* ordinal227 */
2793 uint32_t set_resources_header
; /* ordinal228 */
2794 uint32_t set_resources_dw1
; /* ordinal229 */
2795 uint32_t set_resources_dw2
; /* ordinal230 */
2796 uint32_t set_resources_dw3
; /* ordinal231 */
2797 uint32_t set_resources_dw4
; /* ordinal232 */
2798 uint32_t set_resources_dw5
; /* ordinal233 */
2799 uint32_t set_resources_dw6
; /* ordinal234 */
2800 uint32_t set_resources_dw7
; /* ordinal235 */
2801 uint32_t reserved59
; /* ordinal236 */
2802 uint32_t reserved60
; /* ordinal237 */
2803 uint32_t reserved61
; /* ordinal238 */
2804 uint32_t reserved62
; /* ordinal239 */
2805 uint32_t reserved63
; /* ordinal240 */
2806 uint32_t reserved64
; /* ordinal241 */
2807 uint32_t reserved65
; /* ordinal242 */
2808 uint32_t reserved66
; /* ordinal243 */
2809 uint32_t reserved67
; /* ordinal244 */
2810 uint32_t reserved68
; /* ordinal245 */
2811 uint32_t reserved69
; /* ordinal246 */
2812 uint32_t reserved70
; /* ordinal247 */
2813 uint32_t reserved71
; /* ordinal248 */
2814 uint32_t reserved72
; /* ordinal249 */
2815 uint32_t reserved73
; /* ordinal250 */
2816 uint32_t reserved74
; /* ordinal251 */
2817 uint32_t reserved75
; /* ordinal252 */
2818 uint32_t reserved76
; /* ordinal253 */
2819 uint32_t reserved77
; /* ordinal254 */
2820 uint32_t reserved78
; /* ordinal255 */
2822 uint32_t reserved_t
[256]; /* Reserve 256 dword buffer used by ucode */
2825 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device
*adev
)
2829 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2830 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
2832 if (ring
->mqd_obj
) {
2833 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
2834 if (unlikely(r
!= 0))
2835 dev_warn(adev
->dev
, "(%d) reserve MQD bo failed\n", r
);
2837 amdgpu_bo_unpin(ring
->mqd_obj
);
2838 amdgpu_bo_unreserve(ring
->mqd_obj
);
2840 amdgpu_bo_unref(&ring
->mqd_obj
);
2841 ring
->mqd_obj
= NULL
;
2846 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device
*adev
)
2850 bool use_doorbell
= true;
2858 /* init the pipes */
2859 mutex_lock(&adev
->srbm_mutex
);
2860 for (i
= 0; i
< (adev
->gfx
.mec
.num_pipe
* adev
->gfx
.mec
.num_mec
); i
++) {
2861 int me
= (i
< 4) ? 1 : 2;
2862 int pipe
= (i
< 4) ? i
: (i
- 4);
2864 eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
+ (i
* MEC_HPD_SIZE
);
2867 vi_srbm_select(adev
, me
, pipe
, 0, 0);
2869 /* write the EOP addr */
2870 WREG32(mmCP_HQD_EOP_BASE_ADDR
, eop_gpu_addr
);
2871 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI
, upper_32_bits(eop_gpu_addr
));
2873 /* set the VMID assigned */
2874 WREG32(mmCP_HQD_VMID
, 0);
2876 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2877 tmp
= RREG32(mmCP_HQD_EOP_CONTROL
);
2878 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
2879 (order_base_2(MEC_HPD_SIZE
/ 4) - 1));
2880 WREG32(mmCP_HQD_EOP_CONTROL
, tmp
);
2882 vi_srbm_select(adev
, 0, 0, 0, 0);
2883 mutex_unlock(&adev
->srbm_mutex
);
2885 /* init the queues. Just two for now. */
2886 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2887 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
2889 if (ring
->mqd_obj
== NULL
) {
2890 r
= amdgpu_bo_create(adev
,
2891 sizeof(struct vi_mqd
),
2893 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
,
2896 dev_warn(adev
->dev
, "(%d) create MQD bo failed\n", r
);
2901 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
2902 if (unlikely(r
!= 0)) {
2903 gfx_v8_0_cp_compute_fini(adev
);
2906 r
= amdgpu_bo_pin(ring
->mqd_obj
, AMDGPU_GEM_DOMAIN_GTT
,
2909 dev_warn(adev
->dev
, "(%d) pin MQD bo failed\n", r
);
2910 gfx_v8_0_cp_compute_fini(adev
);
2913 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&buf
);
2915 dev_warn(adev
->dev
, "(%d) map MQD bo failed\n", r
);
2916 gfx_v8_0_cp_compute_fini(adev
);
2920 /* init the mqd struct */
2921 memset(buf
, 0, sizeof(struct vi_mqd
));
2923 mqd
= (struct vi_mqd
*)buf
;
2924 mqd
->header
= 0xC0310800;
2925 mqd
->compute_pipelinestat_enable
= 0x00000001;
2926 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
2927 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
2928 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
2929 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
2930 mqd
->compute_misc_reserved
= 0x00000003;
2932 mutex_lock(&adev
->srbm_mutex
);
2933 vi_srbm_select(adev
, ring
->me
,
2937 /* disable wptr polling */
2938 tmp
= RREG32(mmCP_PQ_WPTR_POLL_CNTL
);
2939 tmp
= REG_SET_FIELD(tmp
, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
2940 WREG32(mmCP_PQ_WPTR_POLL_CNTL
, tmp
);
2942 mqd
->cp_hqd_eop_base_addr_lo
=
2943 RREG32(mmCP_HQD_EOP_BASE_ADDR
);
2944 mqd
->cp_hqd_eop_base_addr_hi
=
2945 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI
);
2947 /* enable doorbell? */
2948 tmp
= RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL
);
2950 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 1);
2952 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
2954 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL
, tmp
);
2955 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
2957 /* disable the queue if it's active */
2958 mqd
->cp_hqd_dequeue_request
= 0;
2959 mqd
->cp_hqd_pq_rptr
= 0;
2960 mqd
->cp_hqd_pq_wptr
= 0;
2961 if (RREG32(mmCP_HQD_ACTIVE
) & 1) {
2962 WREG32(mmCP_HQD_DEQUEUE_REQUEST
, 1);
2963 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
2964 if (!(RREG32(mmCP_HQD_ACTIVE
) & 1))
2968 WREG32(mmCP_HQD_DEQUEUE_REQUEST
, mqd
->cp_hqd_dequeue_request
);
2969 WREG32(mmCP_HQD_PQ_RPTR
, mqd
->cp_hqd_pq_rptr
);
2970 WREG32(mmCP_HQD_PQ_WPTR
, mqd
->cp_hqd_pq_wptr
);
2973 /* set the pointer to the MQD */
2974 mqd
->cp_mqd_base_addr_lo
= mqd_gpu_addr
& 0xfffffffc;
2975 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(mqd_gpu_addr
);
2976 WREG32(mmCP_MQD_BASE_ADDR
, mqd
->cp_mqd_base_addr_lo
);
2977 WREG32(mmCP_MQD_BASE_ADDR_HI
, mqd
->cp_mqd_base_addr_hi
);
2979 /* set MQD vmid to 0 */
2980 tmp
= RREG32(mmCP_MQD_CONTROL
);
2981 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
2982 WREG32(mmCP_MQD_CONTROL
, tmp
);
2983 mqd
->cp_mqd_control
= tmp
;
2985 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2986 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
2987 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
2988 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
2989 WREG32(mmCP_HQD_PQ_BASE
, mqd
->cp_hqd_pq_base_lo
);
2990 WREG32(mmCP_HQD_PQ_BASE_HI
, mqd
->cp_hqd_pq_base_hi
);
2992 /* set up the HQD, this is similar to CP_RB0_CNTL */
2993 tmp
= RREG32(mmCP_HQD_PQ_CONTROL
);
2994 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
2995 (order_base_2(ring
->ring_size
/ 4) - 1));
2996 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
2997 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
2999 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
3001 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
3002 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
3003 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
3004 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
3005 WREG32(mmCP_HQD_PQ_CONTROL
, tmp
);
3006 mqd
->cp_hqd_pq_control
= tmp
;
3008 /* set the wb address wether it's enabled or not */
3009 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
3010 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3011 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
3012 upper_32_bits(wb_gpu_addr
) & 0xffff;
3013 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR
,
3014 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
3015 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
3016 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
3018 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3019 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
3020 mqd
->cp_hqd_pq_wptr_poll_addr
= wb_gpu_addr
& 0xfffffffc;
3021 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3022 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR
, mqd
->cp_hqd_pq_wptr_poll_addr
);
3023 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
,
3024 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
3026 /* enable the doorbell if requested */
3028 if (adev
->asic_type
== CHIP_CARRIZO
) {
3029 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER
,
3030 AMDGPU_DOORBELL_KIQ
<< 2);
3031 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER
,
3032 AMDGPU_DOORBELL_MEC_RING7
<< 2);
3034 tmp
= RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL
);
3035 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3036 DOORBELL_OFFSET
, ring
->doorbell_index
);
3037 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 1);
3038 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_SOURCE
, 0);
3039 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_HIT
, 0);
3040 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3043 mqd
->cp_hqd_pq_doorbell_control
= 0;
3045 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL
,
3046 mqd
->cp_hqd_pq_doorbell_control
);
3048 /* set the vmid for the queue */
3049 mqd
->cp_hqd_vmid
= 0;
3050 WREG32(mmCP_HQD_VMID
, mqd
->cp_hqd_vmid
);
3052 tmp
= RREG32(mmCP_HQD_PERSISTENT_STATE
);
3053 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
3054 WREG32(mmCP_HQD_PERSISTENT_STATE
, tmp
);
3055 mqd
->cp_hqd_persistent_state
= tmp
;
3057 /* activate the queue */
3058 mqd
->cp_hqd_active
= 1;
3059 WREG32(mmCP_HQD_ACTIVE
, mqd
->cp_hqd_active
);
3061 vi_srbm_select(adev
, 0, 0, 0, 0);
3062 mutex_unlock(&adev
->srbm_mutex
);
3064 amdgpu_bo_kunmap(ring
->mqd_obj
);
3065 amdgpu_bo_unreserve(ring
->mqd_obj
);
3069 tmp
= RREG32(mmCP_PQ_STATUS
);
3070 tmp
= REG_SET_FIELD(tmp
, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
3071 WREG32(mmCP_PQ_STATUS
, tmp
);
3074 r
= gfx_v8_0_cp_compute_start(adev
);
3078 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3079 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
3082 r
= amdgpu_ring_test_ring(ring
);
3084 ring
->ready
= false;
3090 static int gfx_v8_0_cp_resume(struct amdgpu_device
*adev
)
3094 if (adev
->asic_type
!= CHIP_CARRIZO
)
3095 gfx_v8_0_enable_gui_idle_interrupt(adev
, false);
3097 if (!adev
->firmware
.smu_load
) {
3098 /* legacy firmware loading */
3099 r
= gfx_v8_0_cp_gfx_load_microcode(adev
);
3103 r
= gfx_v8_0_cp_compute_load_microcode(adev
);
3107 r
= adev
->smu
.smumgr_funcs
->check_fw_load_finish(adev
,
3108 AMDGPU_UCODE_ID_CP_CE
);
3112 r
= adev
->smu
.smumgr_funcs
->check_fw_load_finish(adev
,
3113 AMDGPU_UCODE_ID_CP_PFP
);
3117 r
= adev
->smu
.smumgr_funcs
->check_fw_load_finish(adev
,
3118 AMDGPU_UCODE_ID_CP_ME
);
3122 r
= adev
->smu
.smumgr_funcs
->check_fw_load_finish(adev
,
3123 AMDGPU_UCODE_ID_CP_MEC1
);
3128 r
= gfx_v8_0_cp_gfx_resume(adev
);
3132 r
= gfx_v8_0_cp_compute_resume(adev
);
3136 gfx_v8_0_enable_gui_idle_interrupt(adev
, true);
3141 static void gfx_v8_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
3143 gfx_v8_0_cp_gfx_enable(adev
, enable
);
3144 gfx_v8_0_cp_compute_enable(adev
, enable
);
3147 static int gfx_v8_0_hw_init(struct amdgpu_device
*adev
)
3151 gfx_v8_0_init_golden_registers(adev
);
3153 gfx_v8_0_gpu_init(adev
);
3155 r
= gfx_v8_0_rlc_resume(adev
);
3159 r
= gfx_v8_0_cp_resume(adev
);
3166 static int gfx_v8_0_hw_fini(struct amdgpu_device
*adev
)
3168 gfx_v8_0_cp_enable(adev
, false);
3169 gfx_v8_0_rlc_stop(adev
);
3170 gfx_v8_0_cp_compute_fini(adev
);
3175 static int gfx_v8_0_suspend(struct amdgpu_device
*adev
)
3177 return gfx_v8_0_hw_fini(adev
);
3180 static int gfx_v8_0_resume(struct amdgpu_device
*adev
)
3182 return gfx_v8_0_hw_init(adev
);
3185 static bool gfx_v8_0_is_idle(struct amdgpu_device
*adev
)
3187 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS
), GRBM_STATUS
, GUI_ACTIVE
))
3193 static int gfx_v8_0_wait_for_idle(struct amdgpu_device
*adev
)
3198 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3199 /* read MC_STATUS */
3200 tmp
= RREG32(mmGRBM_STATUS
) & GRBM_STATUS__GUI_ACTIVE_MASK
;
3202 if (!REG_GET_FIELD(tmp
, GRBM_STATUS
, GUI_ACTIVE
))
3209 static void gfx_v8_0_print_status(struct amdgpu_device
*adev
)
3213 dev_info(adev
->dev
, "GFX 8.x registers\n");
3214 dev_info(adev
->dev
, " GRBM_STATUS=0x%08X\n",
3215 RREG32(mmGRBM_STATUS
));
3216 dev_info(adev
->dev
, " GRBM_STATUS2=0x%08X\n",
3217 RREG32(mmGRBM_STATUS2
));
3218 dev_info(adev
->dev
, " GRBM_STATUS_SE0=0x%08X\n",
3219 RREG32(mmGRBM_STATUS_SE0
));
3220 dev_info(adev
->dev
, " GRBM_STATUS_SE1=0x%08X\n",
3221 RREG32(mmGRBM_STATUS_SE1
));
3222 dev_info(adev
->dev
, " GRBM_STATUS_SE2=0x%08X\n",
3223 RREG32(mmGRBM_STATUS_SE2
));
3224 dev_info(adev
->dev
, " GRBM_STATUS_SE3=0x%08X\n",
3225 RREG32(mmGRBM_STATUS_SE3
));
3226 dev_info(adev
->dev
, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT
));
3227 dev_info(adev
->dev
, " CP_STALLED_STAT1 = 0x%08x\n",
3228 RREG32(mmCP_STALLED_STAT1
));
3229 dev_info(adev
->dev
, " CP_STALLED_STAT2 = 0x%08x\n",
3230 RREG32(mmCP_STALLED_STAT2
));
3231 dev_info(adev
->dev
, " CP_STALLED_STAT3 = 0x%08x\n",
3232 RREG32(mmCP_STALLED_STAT3
));
3233 dev_info(adev
->dev
, " CP_CPF_BUSY_STAT = 0x%08x\n",
3234 RREG32(mmCP_CPF_BUSY_STAT
));
3235 dev_info(adev
->dev
, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3236 RREG32(mmCP_CPF_STALLED_STAT1
));
3237 dev_info(adev
->dev
, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS
));
3238 dev_info(adev
->dev
, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT
));
3239 dev_info(adev
->dev
, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3240 RREG32(mmCP_CPC_STALLED_STAT1
));
3241 dev_info(adev
->dev
, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS
));
3243 for (i
= 0; i
< 32; i
++) {
3244 dev_info(adev
->dev
, " GB_TILE_MODE%d=0x%08X\n",
3245 i
, RREG32(mmGB_TILE_MODE0
+ (i
* 4)));
3247 for (i
= 0; i
< 16; i
++) {
3248 dev_info(adev
->dev
, " GB_MACROTILE_MODE%d=0x%08X\n",
3249 i
, RREG32(mmGB_MACROTILE_MODE0
+ (i
* 4)));
3251 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
3252 dev_info(adev
->dev
, " se: %d\n", i
);
3253 gfx_v8_0_select_se_sh(adev
, i
, 0xffffffff);
3254 dev_info(adev
->dev
, " PA_SC_RASTER_CONFIG=0x%08X\n",
3255 RREG32(mmPA_SC_RASTER_CONFIG
));
3256 dev_info(adev
->dev
, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
3257 RREG32(mmPA_SC_RASTER_CONFIG_1
));
3259 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff);
3261 dev_info(adev
->dev
, " GB_ADDR_CONFIG=0x%08X\n",
3262 RREG32(mmGB_ADDR_CONFIG
));
3263 dev_info(adev
->dev
, " HDP_ADDR_CONFIG=0x%08X\n",
3264 RREG32(mmHDP_ADDR_CONFIG
));
3265 dev_info(adev
->dev
, " DMIF_ADDR_CALC=0x%08X\n",
3266 RREG32(mmDMIF_ADDR_CALC
));
3267 dev_info(adev
->dev
, " SDMA0_TILING_CONFIG=0x%08X\n",
3268 RREG32(mmSDMA0_TILING_CONFIG
+ SDMA0_REGISTER_OFFSET
));
3269 dev_info(adev
->dev
, " SDMA1_TILING_CONFIG=0x%08X\n",
3270 RREG32(mmSDMA0_TILING_CONFIG
+ SDMA1_REGISTER_OFFSET
));
3271 dev_info(adev
->dev
, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
3272 RREG32(mmUVD_UDEC_ADDR_CONFIG
));
3273 dev_info(adev
->dev
, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
3274 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG
));
3275 dev_info(adev
->dev
, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
3276 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG
));
3278 dev_info(adev
->dev
, " CP_MEQ_THRESHOLDS=0x%08X\n",
3279 RREG32(mmCP_MEQ_THRESHOLDS
));
3280 dev_info(adev
->dev
, " SX_DEBUG_1=0x%08X\n",
3281 RREG32(mmSX_DEBUG_1
));
3282 dev_info(adev
->dev
, " TA_CNTL_AUX=0x%08X\n",
3283 RREG32(mmTA_CNTL_AUX
));
3284 dev_info(adev
->dev
, " SPI_CONFIG_CNTL=0x%08X\n",
3285 RREG32(mmSPI_CONFIG_CNTL
));
3286 dev_info(adev
->dev
, " SQ_CONFIG=0x%08X\n",
3287 RREG32(mmSQ_CONFIG
));
3288 dev_info(adev
->dev
, " DB_DEBUG=0x%08X\n",
3289 RREG32(mmDB_DEBUG
));
3290 dev_info(adev
->dev
, " DB_DEBUG2=0x%08X\n",
3291 RREG32(mmDB_DEBUG2
));
3292 dev_info(adev
->dev
, " DB_DEBUG3=0x%08X\n",
3293 RREG32(mmDB_DEBUG3
));
3294 dev_info(adev
->dev
, " CB_HW_CONTROL=0x%08X\n",
3295 RREG32(mmCB_HW_CONTROL
));
3296 dev_info(adev
->dev
, " SPI_CONFIG_CNTL_1=0x%08X\n",
3297 RREG32(mmSPI_CONFIG_CNTL_1
));
3298 dev_info(adev
->dev
, " PA_SC_FIFO_SIZE=0x%08X\n",
3299 RREG32(mmPA_SC_FIFO_SIZE
));
3300 dev_info(adev
->dev
, " VGT_NUM_INSTANCES=0x%08X\n",
3301 RREG32(mmVGT_NUM_INSTANCES
));
3302 dev_info(adev
->dev
, " CP_PERFMON_CNTL=0x%08X\n",
3303 RREG32(mmCP_PERFMON_CNTL
));
3304 dev_info(adev
->dev
, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3305 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS
));
3306 dev_info(adev
->dev
, " VGT_CACHE_INVALIDATION=0x%08X\n",
3307 RREG32(mmVGT_CACHE_INVALIDATION
));
3308 dev_info(adev
->dev
, " VGT_GS_VERTEX_REUSE=0x%08X\n",
3309 RREG32(mmVGT_GS_VERTEX_REUSE
));
3310 dev_info(adev
->dev
, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3311 RREG32(mmPA_SC_LINE_STIPPLE_STATE
));
3312 dev_info(adev
->dev
, " PA_CL_ENHANCE=0x%08X\n",
3313 RREG32(mmPA_CL_ENHANCE
));
3314 dev_info(adev
->dev
, " PA_SC_ENHANCE=0x%08X\n",
3315 RREG32(mmPA_SC_ENHANCE
));
3317 dev_info(adev
->dev
, " CP_ME_CNTL=0x%08X\n",
3318 RREG32(mmCP_ME_CNTL
));
3319 dev_info(adev
->dev
, " CP_MAX_CONTEXT=0x%08X\n",
3320 RREG32(mmCP_MAX_CONTEXT
));
3321 dev_info(adev
->dev
, " CP_ENDIAN_SWAP=0x%08X\n",
3322 RREG32(mmCP_ENDIAN_SWAP
));
3323 dev_info(adev
->dev
, " CP_DEVICE_ID=0x%08X\n",
3324 RREG32(mmCP_DEVICE_ID
));
3326 dev_info(adev
->dev
, " CP_SEM_WAIT_TIMER=0x%08X\n",
3327 RREG32(mmCP_SEM_WAIT_TIMER
));
3329 dev_info(adev
->dev
, " CP_RB_WPTR_DELAY=0x%08X\n",
3330 RREG32(mmCP_RB_WPTR_DELAY
));
3331 dev_info(adev
->dev
, " CP_RB_VMID=0x%08X\n",
3332 RREG32(mmCP_RB_VMID
));
3333 dev_info(adev
->dev
, " CP_RB0_CNTL=0x%08X\n",
3334 RREG32(mmCP_RB0_CNTL
));
3335 dev_info(adev
->dev
, " CP_RB0_WPTR=0x%08X\n",
3336 RREG32(mmCP_RB0_WPTR
));
3337 dev_info(adev
->dev
, " CP_RB0_RPTR_ADDR=0x%08X\n",
3338 RREG32(mmCP_RB0_RPTR_ADDR
));
3339 dev_info(adev
->dev
, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
3340 RREG32(mmCP_RB0_RPTR_ADDR_HI
));
3341 dev_info(adev
->dev
, " CP_RB0_CNTL=0x%08X\n",
3342 RREG32(mmCP_RB0_CNTL
));
3343 dev_info(adev
->dev
, " CP_RB0_BASE=0x%08X\n",
3344 RREG32(mmCP_RB0_BASE
));
3345 dev_info(adev
->dev
, " CP_RB0_BASE_HI=0x%08X\n",
3346 RREG32(mmCP_RB0_BASE_HI
));
3347 dev_info(adev
->dev
, " CP_MEC_CNTL=0x%08X\n",
3348 RREG32(mmCP_MEC_CNTL
));
3349 dev_info(adev
->dev
, " CP_CPF_DEBUG=0x%08X\n",
3350 RREG32(mmCP_CPF_DEBUG
));
3352 dev_info(adev
->dev
, " SCRATCH_ADDR=0x%08X\n",
3353 RREG32(mmSCRATCH_ADDR
));
3354 dev_info(adev
->dev
, " SCRATCH_UMSK=0x%08X\n",
3355 RREG32(mmSCRATCH_UMSK
));
3357 dev_info(adev
->dev
, " CP_INT_CNTL_RING0=0x%08X\n",
3358 RREG32(mmCP_INT_CNTL_RING0
));
3359 dev_info(adev
->dev
, " RLC_LB_CNTL=0x%08X\n",
3360 RREG32(mmRLC_LB_CNTL
));
3361 dev_info(adev
->dev
, " RLC_CNTL=0x%08X\n",
3362 RREG32(mmRLC_CNTL
));
3363 dev_info(adev
->dev
, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
3364 RREG32(mmRLC_CGCG_CGLS_CTRL
));
3365 dev_info(adev
->dev
, " RLC_LB_CNTR_INIT=0x%08X\n",
3366 RREG32(mmRLC_LB_CNTR_INIT
));
3367 dev_info(adev
->dev
, " RLC_LB_CNTR_MAX=0x%08X\n",
3368 RREG32(mmRLC_LB_CNTR_MAX
));
3369 dev_info(adev
->dev
, " RLC_LB_INIT_CU_MASK=0x%08X\n",
3370 RREG32(mmRLC_LB_INIT_CU_MASK
));
3371 dev_info(adev
->dev
, " RLC_LB_PARAMS=0x%08X\n",
3372 RREG32(mmRLC_LB_PARAMS
));
3373 dev_info(adev
->dev
, " RLC_LB_CNTL=0x%08X\n",
3374 RREG32(mmRLC_LB_CNTL
));
3375 dev_info(adev
->dev
, " RLC_MC_CNTL=0x%08X\n",
3376 RREG32(mmRLC_MC_CNTL
));
3377 dev_info(adev
->dev
, " RLC_UCODE_CNTL=0x%08X\n",
3378 RREG32(mmRLC_UCODE_CNTL
));
3380 mutex_lock(&adev
->srbm_mutex
);
3381 for (i
= 0; i
< 16; i
++) {
3382 vi_srbm_select(adev
, 0, 0, 0, i
);
3383 dev_info(adev
->dev
, " VM %d:\n", i
);
3384 dev_info(adev
->dev
, " SH_MEM_CONFIG=0x%08X\n",
3385 RREG32(mmSH_MEM_CONFIG
));
3386 dev_info(adev
->dev
, " SH_MEM_APE1_BASE=0x%08X\n",
3387 RREG32(mmSH_MEM_APE1_BASE
));
3388 dev_info(adev
->dev
, " SH_MEM_APE1_LIMIT=0x%08X\n",
3389 RREG32(mmSH_MEM_APE1_LIMIT
));
3390 dev_info(adev
->dev
, " SH_MEM_BASES=0x%08X\n",
3391 RREG32(mmSH_MEM_BASES
));
3393 vi_srbm_select(adev
, 0, 0, 0, 0);
3394 mutex_unlock(&adev
->srbm_mutex
);
3397 static int gfx_v8_0_soft_reset(struct amdgpu_device
*adev
)
3399 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
3403 tmp
= RREG32(mmGRBM_STATUS
);
3404 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
3405 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
3406 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__VGT_BUSY_MASK
|
3407 GRBM_STATUS__DB_BUSY_MASK
| GRBM_STATUS__CB_BUSY_MASK
|
3408 GRBM_STATUS__GDS_BUSY_MASK
| GRBM_STATUS__SPI_BUSY_MASK
|
3409 GRBM_STATUS__IA_BUSY_MASK
| GRBM_STATUS__IA_BUSY_NO_DMA_MASK
)) {
3410 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3411 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
3412 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3413 GRBM_SOFT_RESET
, SOFT_RESET_GFX
, 1);
3416 if (tmp
& (GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
3417 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3418 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
3419 srbm_soft_reset
= REG_SET_FIELD(srbm_soft_reset
,
3420 SRBM_SOFT_RESET
, SOFT_RESET_GRBM
, 1);
3424 tmp
= RREG32(mmGRBM_STATUS2
);
3425 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
3426 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3427 GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
3430 tmp
= RREG32(mmSRBM_STATUS
);
3431 if (REG_GET_FIELD(tmp
, SRBM_STATUS
, GRBM_RQ_PENDING
))
3432 srbm_soft_reset
= REG_SET_FIELD(srbm_soft_reset
,
3433 SRBM_SOFT_RESET
, SOFT_RESET_GRBM
, 1);
3435 if (grbm_soft_reset
|| srbm_soft_reset
) {
3436 gfx_v8_0_print_status(adev
);
3438 gfx_v8_0_rlc_stop(adev
);
3440 /* Disable GFX parsing/prefetching */
3441 gfx_v8_0_cp_gfx_enable(adev
, false);
3443 /* Disable MEC parsing/prefetching */
3446 if (grbm_soft_reset
) {
3447 tmp
= RREG32(mmGRBM_SOFT_RESET
);
3448 tmp
|= grbm_soft_reset
;
3449 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
3450 WREG32(mmGRBM_SOFT_RESET
, tmp
);
3451 tmp
= RREG32(mmGRBM_SOFT_RESET
);
3455 tmp
&= ~grbm_soft_reset
;
3456 WREG32(mmGRBM_SOFT_RESET
, tmp
);
3457 tmp
= RREG32(mmGRBM_SOFT_RESET
);
3460 if (srbm_soft_reset
) {
3461 tmp
= RREG32(mmSRBM_SOFT_RESET
);
3462 tmp
|= srbm_soft_reset
;
3463 dev_info(adev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
3464 WREG32(mmSRBM_SOFT_RESET
, tmp
);
3465 tmp
= RREG32(mmSRBM_SOFT_RESET
);
3469 tmp
&= ~srbm_soft_reset
;
3470 WREG32(mmSRBM_SOFT_RESET
, tmp
);
3471 tmp
= RREG32(mmSRBM_SOFT_RESET
);
3473 /* Wait a little for things to settle down */
3475 gfx_v8_0_print_status(adev
);
3481 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
3483 * @adev: amdgpu_device pointer
3485 * Fetches a GPU clock counter snapshot.
3486 * Returns the 64 bit clock counter snapshot.
3488 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
3492 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
3493 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
3494 clock
= (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB
) |
3495 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
3496 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
3500 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
3502 uint32_t gds_base
, uint32_t gds_size
,
3503 uint32_t gws_base
, uint32_t gws_size
,
3504 uint32_t oa_base
, uint32_t oa_size
)
3506 gds_base
= gds_base
>> AMDGPU_GDS_SHIFT
;
3507 gds_size
= gds_size
>> AMDGPU_GDS_SHIFT
;
3509 gws_base
= gws_base
>> AMDGPU_GWS_SHIFT
;
3510 gws_size
= gws_size
>> AMDGPU_GWS_SHIFT
;
3512 oa_base
= oa_base
>> AMDGPU_OA_SHIFT
;
3513 oa_size
= oa_size
>> AMDGPU_OA_SHIFT
;
3516 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3517 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3518 WRITE_DATA_DST_SEL(0)));
3519 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].mem_base
);
3520 amdgpu_ring_write(ring
, 0);
3521 amdgpu_ring_write(ring
, gds_base
);
3524 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3525 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3526 WRITE_DATA_DST_SEL(0)));
3527 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].mem_size
);
3528 amdgpu_ring_write(ring
, 0);
3529 amdgpu_ring_write(ring
, gds_size
);
3532 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3533 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3534 WRITE_DATA_DST_SEL(0)));
3535 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].gws
);
3536 amdgpu_ring_write(ring
, 0);
3537 amdgpu_ring_write(ring
, gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
3540 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3541 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3542 WRITE_DATA_DST_SEL(0)));
3543 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].oa
);
3544 amdgpu_ring_write(ring
, 0);
3545 amdgpu_ring_write(ring
, (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
3548 static int gfx_v8_0_early_init(struct amdgpu_device
*adev
)
3551 adev
->gfx
.num_gfx_rings
= GFX8_NUM_GFX_RINGS
;
3552 adev
->gfx
.num_compute_rings
= GFX8_NUM_COMPUTE_RINGS
;
3553 gfx_v8_0_set_ring_funcs(adev
);
3554 gfx_v8_0_set_irq_funcs(adev
);
3555 gfx_v8_0_set_gds_init(adev
);
3560 static int gfx_v8_0_set_powergating_state(struct amdgpu_device
*adev
,
3561 enum amdgpu_powergating_state state
)
3566 static int gfx_v8_0_set_clockgating_state(struct amdgpu_device
*adev
,
3567 enum amdgpu_clockgating_state state
)
3572 static u32
gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring
*ring
)
3576 rptr
= ring
->adev
->wb
.wb
[ring
->rptr_offs
];
3581 static u32
gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
3583 struct amdgpu_device
*adev
= ring
->adev
;
3586 if (ring
->use_doorbell
)
3587 /* XXX check if swapping is necessary on BE */
3588 wptr
= ring
->adev
->wb
.wb
[ring
->wptr_offs
];
3590 wptr
= RREG32(mmCP_RB0_WPTR
);
3595 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
3597 struct amdgpu_device
*adev
= ring
->adev
;
3599 if (ring
->use_doorbell
) {
3600 /* XXX check if swapping is necessary on BE */
3601 adev
->wb
.wb
[ring
->wptr_offs
] = ring
->wptr
;
3602 WDOORBELL32(ring
->doorbell_index
, ring
->wptr
);
3604 WREG32(mmCP_RB0_WPTR
, ring
->wptr
);
3605 (void)RREG32(mmCP_RB0_WPTR
);
3609 static void gfx_v8_0_hdp_flush_cp_ring_emit(struct amdgpu_ring
*ring
)
3611 u32 ref_and_mask
, reg_mem_engine
;
3613 if (ring
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
3616 ref_and_mask
= GPU_HDP_FLUSH_DONE__CP2_MASK
<< ring
->pipe
;
3619 ref_and_mask
= GPU_HDP_FLUSH_DONE__CP6_MASK
<< ring
->pipe
;
3626 ref_and_mask
= GPU_HDP_FLUSH_DONE__CP0_MASK
;
3627 reg_mem_engine
= WAIT_REG_MEM_ENGINE(1); /* pfp */
3630 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
3631 amdgpu_ring_write(ring
, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3632 WAIT_REG_MEM_FUNCTION(3) | /* == */
3634 amdgpu_ring_write(ring
, mmGPU_HDP_FLUSH_REQ
);
3635 amdgpu_ring_write(ring
, mmGPU_HDP_FLUSH_DONE
);
3636 amdgpu_ring_write(ring
, ref_and_mask
);
3637 amdgpu_ring_write(ring
, ref_and_mask
);
3638 amdgpu_ring_write(ring
, 0x20); /* poll interval */
3641 static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring
*ring
,
3642 struct amdgpu_ib
*ib
)
3644 u32 header
, control
= 0;
3645 u32 next_rptr
= ring
->wptr
+ 5;
3646 if (ring
->type
== AMDGPU_RING_TYPE_COMPUTE
)
3647 control
|= INDIRECT_BUFFER_VALID
;
3649 if (ib
->flush_hdp_writefifo
)
3652 if (ring
->need_ctx_switch
&& ring
->type
== AMDGPU_RING_TYPE_GFX
)
3656 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3657 amdgpu_ring_write(ring
, WRITE_DATA_DST_SEL(5) | WR_CONFIRM
);
3658 amdgpu_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
3659 amdgpu_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
3660 amdgpu_ring_write(ring
, next_rptr
);
3662 if (ib
->flush_hdp_writefifo
)
3663 gfx_v8_0_hdp_flush_cp_ring_emit(ring
);
3665 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
3666 if (ring
->need_ctx_switch
&& ring
->type
== AMDGPU_RING_TYPE_GFX
) {
3667 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
3668 amdgpu_ring_write(ring
, 0);
3669 ring
->need_ctx_switch
= false;
3672 if (ib
->is_const_ib
)
3673 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
3675 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
3677 control
|= ib
->length_dw
|
3678 (ib
->vm
? (ib
->vm
->ids
[ring
->idx
].id
<< 24) : 0);
3680 amdgpu_ring_write(ring
, header
);
3681 amdgpu_ring_write(ring
,
3685 (ib
->gpu_addr
& 0xFFFFFFFC));
3686 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
3687 amdgpu_ring_write(ring
, control
);
3690 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring
*ring
, u64 addr
,
3691 u64 seq
, bool write64bit
)
3693 /* EVENT_WRITE_EOP - flush caches, send int */
3694 amdgpu_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
3695 amdgpu_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
3697 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
3699 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
3700 amdgpu_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) |
3701 DATA_SEL(write64bit
? 2 : 1) | INT_SEL(2));
3702 amdgpu_ring_write(ring
, lower_32_bits(seq
));
3703 amdgpu_ring_write(ring
, upper_32_bits(seq
));
3707 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
3709 * @ring: amdgpu ring buffer object
3710 * @semaphore: amdgpu semaphore object
3711 * @emit_wait: Is this a sempahore wait?
3713 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3714 * from running ahead of semaphore waits.
3716 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring
*ring
,
3717 struct amdgpu_semaphore
*semaphore
,
3720 uint64_t addr
= semaphore
->gpu_addr
;
3721 unsigned sel
= emit_wait
? PACKET3_SEM_SEL_WAIT
: PACKET3_SEM_SEL_SIGNAL
;
3723 if (ring
->adev
->asic_type
== CHIP_TOPAZ
||
3724 ring
->adev
->asic_type
== CHIP_TONGA
) {
3725 amdgpu_ring_write(ring
, PACKET3(PACKET3_MEM_SEMAPHORE
, 1));
3726 amdgpu_ring_write(ring
, lower_32_bits(addr
));
3727 amdgpu_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | sel
);
3729 amdgpu_ring_write(ring
, PACKET3(PACKET3_MEM_SEMAPHORE
, 2));
3730 amdgpu_ring_write(ring
, lower_32_bits(addr
));
3731 amdgpu_ring_write(ring
, upper_32_bits(addr
));
3732 amdgpu_ring_write(ring
, sel
);
3735 if (emit_wait
&& (ring
->type
== AMDGPU_RING_TYPE_GFX
)) {
3736 /* Prevent the PFP from running ahead of the semaphore wait */
3737 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
3738 amdgpu_ring_write(ring
, 0x0);
3744 static void gfx_v8_0_ce_sync_me(struct amdgpu_ring
*ring
)
3746 struct amdgpu_device
*adev
= ring
->adev
;
3747 u64 gpu_addr
= adev
->wb
.gpu_addr
+ adev
->gfx
.ce_sync_offs
* 4;
3749 /* instruct DE to set a magic number */
3750 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3751 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3752 WRITE_DATA_DST_SEL(5)));
3753 amdgpu_ring_write(ring
, gpu_addr
& 0xfffffffc);
3754 amdgpu_ring_write(ring
, upper_32_bits(gpu_addr
) & 0xffffffff);
3755 amdgpu_ring_write(ring
, 1);
3757 /* let CE wait till condition satisfied */
3758 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
3759 amdgpu_ring_write(ring
, (WAIT_REG_MEM_OPERATION(0) | /* wait */
3760 WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
3761 WAIT_REG_MEM_FUNCTION(3) | /* == */
3762 WAIT_REG_MEM_ENGINE(2))); /* ce */
3763 amdgpu_ring_write(ring
, gpu_addr
& 0xfffffffc);
3764 amdgpu_ring_write(ring
, upper_32_bits(gpu_addr
) & 0xffffffff);
3765 amdgpu_ring_write(ring
, 1);
3766 amdgpu_ring_write(ring
, 0xffffffff);
3767 amdgpu_ring_write(ring
, 4); /* poll interval */
3769 /* instruct CE to reset wb of ce_sync to zero */
3770 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3771 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
3772 WRITE_DATA_DST_SEL(5) |
3774 amdgpu_ring_write(ring
, gpu_addr
& 0xfffffffc);
3775 amdgpu_ring_write(ring
, upper_32_bits(gpu_addr
) & 0xffffffff);
3776 amdgpu_ring_write(ring
, 0);
3779 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
3780 unsigned vm_id
, uint64_t pd_addr
)
3782 int usepfp
= (ring
->type
== AMDGPU_RING_TYPE_GFX
);
3783 u32 srbm_gfx_cntl
= 0;
3785 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3786 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(usepfp
) |
3787 WRITE_DATA_DST_SEL(0)));
3789 amdgpu_ring_write(ring
,
3790 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ vm_id
));
3792 amdgpu_ring_write(ring
,
3793 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ vm_id
- 8));
3795 amdgpu_ring_write(ring
, 0);
3796 amdgpu_ring_write(ring
, pd_addr
>> 12);
3798 /* update SH_MEM_* regs */
3799 srbm_gfx_cntl
= REG_SET_FIELD(srbm_gfx_cntl
, SRBM_GFX_CNTL
, VMID
, vm_id
);
3800 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3801 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3802 WRITE_DATA_DST_SEL(0)));
3803 amdgpu_ring_write(ring
, mmSRBM_GFX_CNTL
);
3804 amdgpu_ring_write(ring
, 0);
3805 amdgpu_ring_write(ring
, srbm_gfx_cntl
);
3807 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 6));
3808 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3809 WRITE_DATA_DST_SEL(0)));
3810 amdgpu_ring_write(ring
, mmSH_MEM_BASES
);
3811 amdgpu_ring_write(ring
, 0);
3813 amdgpu_ring_write(ring
, 0); /* SH_MEM_BASES */
3814 amdgpu_ring_write(ring
, 0); /* SH_MEM_CONFIG */
3815 amdgpu_ring_write(ring
, 1); /* SH_MEM_APE1_BASE */
3816 amdgpu_ring_write(ring
, 0); /* SH_MEM_APE1_LIMIT */
3818 srbm_gfx_cntl
= REG_SET_FIELD(srbm_gfx_cntl
, SRBM_GFX_CNTL
, VMID
, 0);
3819 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3820 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3821 WRITE_DATA_DST_SEL(0)));
3822 amdgpu_ring_write(ring
, mmSRBM_GFX_CNTL
);
3823 amdgpu_ring_write(ring
, 0);
3824 amdgpu_ring_write(ring
, srbm_gfx_cntl
);
3827 /* bits 0-15 are the VM contexts0-15 */
3828 /* invalidate the cache */
3829 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3830 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3831 WRITE_DATA_DST_SEL(0)));
3832 amdgpu_ring_write(ring
, mmVM_INVALIDATE_REQUEST
);
3833 amdgpu_ring_write(ring
, 0);
3834 amdgpu_ring_write(ring
, 1 << vm_id
);
3836 /* wait for the invalidate to complete */
3837 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
3838 amdgpu_ring_write(ring
, (WAIT_REG_MEM_OPERATION(0) | /* wait */
3839 WAIT_REG_MEM_FUNCTION(0) | /* always */
3840 WAIT_REG_MEM_ENGINE(0))); /* me */
3841 amdgpu_ring_write(ring
, mmVM_INVALIDATE_REQUEST
);
3842 amdgpu_ring_write(ring
, 0);
3843 amdgpu_ring_write(ring
, 0); /* ref */
3844 amdgpu_ring_write(ring
, 0); /* mask */
3845 amdgpu_ring_write(ring
, 0x20); /* poll interval */
3847 /* compute doesn't have PFP */
3849 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3850 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
3851 amdgpu_ring_write(ring
, 0x0);
3853 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
3854 gfx_v8_0_ce_sync_me(ring
);
3858 static bool gfx_v8_0_ring_is_lockup(struct amdgpu_ring
*ring
)
3860 if (gfx_v8_0_is_idle(ring
->adev
)) {
3861 amdgpu_ring_lockup_update(ring
);
3864 return amdgpu_ring_test_lockup(ring
);
3867 static u32
gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring
*ring
)
3869 return ring
->adev
->wb
.wb
[ring
->rptr_offs
];
3872 static u32
gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
3874 return ring
->adev
->wb
.wb
[ring
->wptr_offs
];
3877 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
3879 struct amdgpu_device
*adev
= ring
->adev
;
3881 /* XXX check if swapping is necessary on BE */
3882 adev
->wb
.wb
[ring
->wptr_offs
] = ring
->wptr
;
3883 WDOORBELL32(ring
->doorbell_index
, ring
->wptr
);
3886 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring
*ring
,
3890 /* RELEASE_MEM - flush caches, send int */
3891 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 5));
3892 amdgpu_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
3894 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
3896 amdgpu_ring_write(ring
, DATA_SEL(write64bits
? 2 : 1) | INT_SEL(2));
3897 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
3898 amdgpu_ring_write(ring
, upper_32_bits(addr
));
3899 amdgpu_ring_write(ring
, lower_32_bits(seq
));
3900 amdgpu_ring_write(ring
, upper_32_bits(seq
));
3903 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
3904 enum amdgpu_interrupt_state state
)
3909 case AMDGPU_IRQ_STATE_DISABLE
:
3910 cp_int_cntl
= RREG32(mmCP_INT_CNTL_RING0
);
3911 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
3912 TIME_STAMP_INT_ENABLE
, 0);
3913 WREG32(mmCP_INT_CNTL_RING0
, cp_int_cntl
);
3915 case AMDGPU_IRQ_STATE_ENABLE
:
3916 cp_int_cntl
= RREG32(mmCP_INT_CNTL_RING0
);
3918 REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
3919 TIME_STAMP_INT_ENABLE
, 1);
3920 WREG32(mmCP_INT_CNTL_RING0
, cp_int_cntl
);
3927 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
3929 enum amdgpu_interrupt_state state
)
3931 u32 mec_int_cntl
, mec_int_cntl_reg
;
3934 * amdgpu controls only pipe 0 of MEC1. That's why this function only
3935 * handles the setting of interrupts for this specific pipe. All other
3936 * pipes' interrupts are set by amdkfd.
3942 mec_int_cntl_reg
= mmCP_ME1_PIPE0_INT_CNTL
;
3945 DRM_DEBUG("invalid pipe %d\n", pipe
);
3949 DRM_DEBUG("invalid me %d\n", me
);
3954 case AMDGPU_IRQ_STATE_DISABLE
:
3955 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
3956 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
3957 TIME_STAMP_INT_ENABLE
, 0);
3958 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
3960 case AMDGPU_IRQ_STATE_ENABLE
:
3961 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
3962 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
3963 TIME_STAMP_INT_ENABLE
, 1);
3964 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
3971 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
3972 struct amdgpu_irq_src
*source
,
3974 enum amdgpu_interrupt_state state
)
3979 case AMDGPU_IRQ_STATE_DISABLE
:
3980 cp_int_cntl
= RREG32(mmCP_INT_CNTL_RING0
);
3981 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
3982 PRIV_REG_INT_ENABLE
, 0);
3983 WREG32(mmCP_INT_CNTL_RING0
, cp_int_cntl
);
3985 case AMDGPU_IRQ_STATE_ENABLE
:
3986 cp_int_cntl
= RREG32(mmCP_INT_CNTL_RING0
);
3987 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
3988 PRIV_REG_INT_ENABLE
, 0);
3989 WREG32(mmCP_INT_CNTL_RING0
, cp_int_cntl
);
3998 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
3999 struct amdgpu_irq_src
*source
,
4001 enum amdgpu_interrupt_state state
)
4006 case AMDGPU_IRQ_STATE_DISABLE
:
4007 cp_int_cntl
= RREG32(mmCP_INT_CNTL_RING0
);
4008 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
4009 PRIV_INSTR_INT_ENABLE
, 0);
4010 WREG32(mmCP_INT_CNTL_RING0
, cp_int_cntl
);
4012 case AMDGPU_IRQ_STATE_ENABLE
:
4013 cp_int_cntl
= RREG32(mmCP_INT_CNTL_RING0
);
4014 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
4015 PRIV_INSTR_INT_ENABLE
, 1);
4016 WREG32(mmCP_INT_CNTL_RING0
, cp_int_cntl
);
4025 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
4026 struct amdgpu_irq_src
*src
,
4028 enum amdgpu_interrupt_state state
)
4031 case AMDGPU_CP_IRQ_GFX_EOP
:
4032 gfx_v8_0_set_gfx_eop_interrupt_state(adev
, state
);
4034 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
4035 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
4037 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
4038 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
4040 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
4041 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
4043 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
4044 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
4046 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
4047 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
4049 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
4050 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
4052 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
4053 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
4055 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
4056 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
4064 static int gfx_v8_0_eop_irq(struct amdgpu_device
*adev
,
4065 struct amdgpu_irq_src
*source
,
4066 struct amdgpu_iv_entry
*entry
)
4069 u8 me_id
, pipe_id
, queue_id
;
4070 struct amdgpu_ring
*ring
;
4072 DRM_DEBUG("IH: CP EOP\n");
4073 me_id
= (entry
->ring_id
& 0x0c) >> 2;
4074 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
4075 queue_id
= (entry
->ring_id
& 0x70) >> 4;
4079 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
4083 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4084 ring
= &adev
->gfx
.compute_ring
[i
];
4085 /* Per-queue interrupt is supported for MEC starting from VI.
4086 * The interrupt can only be enabled/disabled per pipe instead of per queue.
4088 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
4089 amdgpu_fence_process(ring
);
4096 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device
*adev
,
4097 struct amdgpu_irq_src
*source
,
4098 struct amdgpu_iv_entry
*entry
)
4100 DRM_ERROR("Illegal register access in command stream\n");
4101 schedule_work(&adev
->reset_work
);
4105 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device
*adev
,
4106 struct amdgpu_irq_src
*source
,
4107 struct amdgpu_iv_entry
*entry
)
4109 DRM_ERROR("Illegal instruction in command stream\n");
4110 schedule_work(&adev
->reset_work
);
4114 const struct amdgpu_ip_funcs gfx_v8_0_ip_funcs
= {
4115 .early_init
= gfx_v8_0_early_init
,
4117 .sw_init
= gfx_v8_0_sw_init
,
4118 .sw_fini
= gfx_v8_0_sw_fini
,
4119 .hw_init
= gfx_v8_0_hw_init
,
4120 .hw_fini
= gfx_v8_0_hw_fini
,
4121 .suspend
= gfx_v8_0_suspend
,
4122 .resume
= gfx_v8_0_resume
,
4123 .is_idle
= gfx_v8_0_is_idle
,
4124 .wait_for_idle
= gfx_v8_0_wait_for_idle
,
4125 .soft_reset
= gfx_v8_0_soft_reset
,
4126 .print_status
= gfx_v8_0_print_status
,
4127 .set_clockgating_state
= gfx_v8_0_set_clockgating_state
,
4128 .set_powergating_state
= gfx_v8_0_set_powergating_state
,
4131 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx
= {
4132 .get_rptr
= gfx_v8_0_ring_get_rptr_gfx
,
4133 .get_wptr
= gfx_v8_0_ring_get_wptr_gfx
,
4134 .set_wptr
= gfx_v8_0_ring_set_wptr_gfx
,
4136 .emit_ib
= gfx_v8_0_ring_emit_ib
,
4137 .emit_fence
= gfx_v8_0_ring_emit_fence_gfx
,
4138 .emit_semaphore
= gfx_v8_0_ring_emit_semaphore
,
4139 .emit_vm_flush
= gfx_v8_0_ring_emit_vm_flush
,
4140 .emit_gds_switch
= gfx_v8_0_ring_emit_gds_switch
,
4141 .test_ring
= gfx_v8_0_ring_test_ring
,
4142 .test_ib
= gfx_v8_0_ring_test_ib
,
4143 .is_lockup
= gfx_v8_0_ring_is_lockup
,
4146 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute
= {
4147 .get_rptr
= gfx_v8_0_ring_get_rptr_compute
,
4148 .get_wptr
= gfx_v8_0_ring_get_wptr_compute
,
4149 .set_wptr
= gfx_v8_0_ring_set_wptr_compute
,
4151 .emit_ib
= gfx_v8_0_ring_emit_ib
,
4152 .emit_fence
= gfx_v8_0_ring_emit_fence_compute
,
4153 .emit_semaphore
= gfx_v8_0_ring_emit_semaphore
,
4154 .emit_vm_flush
= gfx_v8_0_ring_emit_vm_flush
,
4155 .emit_gds_switch
= gfx_v8_0_ring_emit_gds_switch
,
4156 .test_ring
= gfx_v8_0_ring_test_ring
,
4157 .test_ib
= gfx_v8_0_ring_test_ib
,
4158 .is_lockup
= gfx_v8_0_ring_is_lockup
,
4161 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device
*adev
)
4165 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
4166 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v8_0_ring_funcs_gfx
;
4168 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
4169 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v8_0_ring_funcs_compute
;
4172 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs
= {
4173 .set
= gfx_v8_0_set_eop_interrupt_state
,
4174 .process
= gfx_v8_0_eop_irq
,
4177 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs
= {
4178 .set
= gfx_v8_0_set_priv_reg_fault_state
,
4179 .process
= gfx_v8_0_priv_reg_irq
,
4182 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs
= {
4183 .set
= gfx_v8_0_set_priv_inst_fault_state
,
4184 .process
= gfx_v8_0_priv_inst_irq
,
4187 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device
*adev
)
4189 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
4190 adev
->gfx
.eop_irq
.funcs
= &gfx_v8_0_eop_irq_funcs
;
4192 adev
->gfx
.priv_reg_irq
.num_types
= 1;
4193 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v8_0_priv_reg_irq_funcs
;
4195 adev
->gfx
.priv_inst_irq
.num_types
= 1;
4196 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v8_0_priv_inst_irq_funcs
;
4199 static void gfx_v8_0_set_gds_init(struct amdgpu_device
*adev
)
4201 /* init asci gds info */
4202 adev
->gds
.mem
.total_size
= RREG32(mmGDS_VMID0_SIZE
);
4203 adev
->gds
.gws
.total_size
= 64;
4204 adev
->gds
.oa
.total_size
= 16;
4206 if (adev
->gds
.mem
.total_size
== 64 * 1024) {
4207 adev
->gds
.mem
.gfx_partition_size
= 4096;
4208 adev
->gds
.mem
.cs_partition_size
= 4096;
4210 adev
->gds
.gws
.gfx_partition_size
= 4;
4211 adev
->gds
.gws
.cs_partition_size
= 4;
4213 adev
->gds
.oa
.gfx_partition_size
= 4;
4214 adev
->gds
.oa
.cs_partition_size
= 1;
4216 adev
->gds
.mem
.gfx_partition_size
= 1024;
4217 adev
->gds
.mem
.cs_partition_size
= 1024;
4219 adev
->gds
.gws
.gfx_partition_size
= 16;
4220 adev
->gds
.gws
.cs_partition_size
= 16;
4222 adev
->gds
.oa
.gfx_partition_size
= 4;
4223 adev
->gds
.oa
.cs_partition_size
= 4;
4227 static u32
gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device
*adev
,
4230 u32 mask
= 0, tmp
, tmp1
;
4233 gfx_v8_0_select_se_sh(adev
, se
, sh
);
4234 tmp
= RREG32(mmCC_GC_SHADER_ARRAY_CONFIG
);
4235 tmp1
= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG
);
4236 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff);
4243 for (i
= 0; i
< adev
->gfx
.config
.max_cu_per_sh
; i
++) {
4248 return (~tmp
) & mask
;
4251 int gfx_v8_0_get_cu_info(struct amdgpu_device
*adev
,
4252 struct amdgpu_cu_info
*cu_info
)
4254 int i
, j
, k
, counter
, active_cu_number
= 0;
4255 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
4257 if (!adev
|| !cu_info
)
4260 mutex_lock(&adev
->grbm_idx_mutex
);
4261 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
4262 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
4266 bitmap
= gfx_v8_0_get_cu_active_bitmap(adev
, i
, j
);
4267 cu_info
->bitmap
[i
][j
] = bitmap
;
4269 for (k
= 0; k
< adev
->gfx
.config
.max_cu_per_sh
; k
++) {
4270 if (bitmap
& mask
) {
4277 active_cu_number
+= counter
;
4278 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
4282 cu_info
->number
= active_cu_number
;
4283 cu_info
->ao_cu_mask
= ao_cu_mask
;
4284 mutex_unlock(&adev
->grbm_idx_mutex
);