2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_MEC_HPD_SIZE 2048
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN
= 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON
, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE
, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE
, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE
, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset
[] =
166 {mmGDS_VMID0_BASE
, mmGDS_VMID0_SIZE
, mmGDS_GWS_VMID0
, mmGDS_OA_VMID0
},
167 {mmGDS_VMID1_BASE
, mmGDS_VMID1_SIZE
, mmGDS_GWS_VMID1
, mmGDS_OA_VMID1
},
168 {mmGDS_VMID2_BASE
, mmGDS_VMID2_SIZE
, mmGDS_GWS_VMID2
, mmGDS_OA_VMID2
},
169 {mmGDS_VMID3_BASE
, mmGDS_VMID3_SIZE
, mmGDS_GWS_VMID3
, mmGDS_OA_VMID3
},
170 {mmGDS_VMID4_BASE
, mmGDS_VMID4_SIZE
, mmGDS_GWS_VMID4
, mmGDS_OA_VMID4
},
171 {mmGDS_VMID5_BASE
, mmGDS_VMID5_SIZE
, mmGDS_GWS_VMID5
, mmGDS_OA_VMID5
},
172 {mmGDS_VMID6_BASE
, mmGDS_VMID6_SIZE
, mmGDS_GWS_VMID6
, mmGDS_OA_VMID6
},
173 {mmGDS_VMID7_BASE
, mmGDS_VMID7_SIZE
, mmGDS_GWS_VMID7
, mmGDS_OA_VMID7
},
174 {mmGDS_VMID8_BASE
, mmGDS_VMID8_SIZE
, mmGDS_GWS_VMID8
, mmGDS_OA_VMID8
},
175 {mmGDS_VMID9_BASE
, mmGDS_VMID9_SIZE
, mmGDS_GWS_VMID9
, mmGDS_OA_VMID9
},
176 {mmGDS_VMID10_BASE
, mmGDS_VMID10_SIZE
, mmGDS_GWS_VMID10
, mmGDS_OA_VMID10
},
177 {mmGDS_VMID11_BASE
, mmGDS_VMID11_SIZE
, mmGDS_GWS_VMID11
, mmGDS_OA_VMID11
},
178 {mmGDS_VMID12_BASE
, mmGDS_VMID12_SIZE
, mmGDS_GWS_VMID12
, mmGDS_OA_VMID12
},
179 {mmGDS_VMID13_BASE
, mmGDS_VMID13_SIZE
, mmGDS_GWS_VMID13
, mmGDS_OA_VMID13
},
180 {mmGDS_VMID14_BASE
, mmGDS_VMID14_SIZE
, mmGDS_GWS_VMID14
, mmGDS_OA_VMID14
},
181 {mmGDS_VMID15_BASE
, mmGDS_VMID15_SIZE
, mmGDS_GWS_VMID15
, mmGDS_OA_VMID15
}
184 static const u32 golden_settings_tonga_a11
[] =
186 mmCB_HW_CONTROL
, 0xfffdf3cf, 0x00007208,
187 mmCB_HW_CONTROL_3
, 0x00000040, 0x00000040,
188 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
189 mmGB_GPU_ID
, 0x0000000f, 0x00000000,
190 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
191 mmPA_SC_FIFO_DEPTH_CNTL
, 0x000003ff, 0x000000fc,
192 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
193 mmRLC_CGCG_CGLS_CTRL
, 0x00000003, 0x0000003c,
194 mmSQ_RANDOM_WAVE_PRI
, 0x001fffff, 0x000006fd,
195 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
196 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
197 mmTCC_EXE_DISABLE
, 0x00000002, 0x00000002,
198 mmTCP_ADDR_CONFIG
, 0x000003ff, 0x000002fb,
199 mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x0000543b,
200 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0xa9210876,
201 mmVGT_RESET_DEBUG
, 0x00000004, 0x00000004,
204 static const u32 tonga_golden_common_all
[] =
206 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
207 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x16000012,
208 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x0000002A,
209 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22011003,
210 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
211 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
212 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00FF7FBF,
213 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00FF7FAF
216 static const u32 tonga_mgcg_cgcg_init
[] =
218 mmRLC_CGTT_MGCG_OVERRIDE
, 0xffffffff, 0xffffffff,
219 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
220 mmCB_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
221 mmCGTT_BCI_CLK_CTRL
, 0xffffffff, 0x00000100,
222 mmCGTT_CP_CLK_CTRL
, 0xffffffff, 0x00000100,
223 mmCGTT_CPC_CLK_CTRL
, 0xffffffff, 0x00000100,
224 mmCGTT_CPF_CLK_CTRL
, 0xffffffff, 0x40000100,
225 mmCGTT_GDS_CLK_CTRL
, 0xffffffff, 0x00000100,
226 mmCGTT_IA_CLK_CTRL
, 0xffffffff, 0x06000100,
227 mmCGTT_PA_CLK_CTRL
, 0xffffffff, 0x00000100,
228 mmCGTT_WD_CLK_CTRL
, 0xffffffff, 0x06000100,
229 mmCGTT_PC_CLK_CTRL
, 0xffffffff, 0x00000100,
230 mmCGTT_RLC_CLK_CTRL
, 0xffffffff, 0x00000100,
231 mmCGTT_SC_CLK_CTRL
, 0xffffffff, 0x00000100,
232 mmCGTT_SPI_CLK_CTRL
, 0xffffffff, 0x00000100,
233 mmCGTT_SQ_CLK_CTRL
, 0xffffffff, 0x00000100,
234 mmCGTT_SQG_CLK_CTRL
, 0xffffffff, 0x00000100,
235 mmCGTT_SX_CLK_CTRL0
, 0xffffffff, 0x00000100,
236 mmCGTT_SX_CLK_CTRL1
, 0xffffffff, 0x00000100,
237 mmCGTT_SX_CLK_CTRL2
, 0xffffffff, 0x00000100,
238 mmCGTT_SX_CLK_CTRL3
, 0xffffffff, 0x00000100,
239 mmCGTT_SX_CLK_CTRL4
, 0xffffffff, 0x00000100,
240 mmCGTT_TCI_CLK_CTRL
, 0xffffffff, 0x00000100,
241 mmCGTT_TCP_CLK_CTRL
, 0xffffffff, 0x00000100,
242 mmCGTT_VGT_CLK_CTRL
, 0xffffffff, 0x06000100,
243 mmDB_CGTT_CLK_CTRL_0
, 0xffffffff, 0x00000100,
244 mmTA_CGTT_CTRL
, 0xffffffff, 0x00000100,
245 mmTCA_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
246 mmTCC_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
247 mmTD_CGTT_CTRL
, 0xffffffff, 0x00000100,
248 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
249 mmCGTS_CU0_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
250 mmCGTS_CU0_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
251 mmCGTS_CU0_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
252 mmCGTS_CU0_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
253 mmCGTS_CU0_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
254 mmCGTS_CU1_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
255 mmCGTS_CU1_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
256 mmCGTS_CU1_TA_CTRL_REG
, 0xffffffff, 0x00040007,
257 mmCGTS_CU1_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
258 mmCGTS_CU1_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
259 mmCGTS_CU2_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
260 mmCGTS_CU2_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
261 mmCGTS_CU2_TA_CTRL_REG
, 0xffffffff, 0x00040007,
262 mmCGTS_CU2_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
263 mmCGTS_CU2_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
264 mmCGTS_CU3_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
265 mmCGTS_CU3_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
266 mmCGTS_CU3_TA_CTRL_REG
, 0xffffffff, 0x00040007,
267 mmCGTS_CU3_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
268 mmCGTS_CU3_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
269 mmCGTS_CU4_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
270 mmCGTS_CU4_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
271 mmCGTS_CU4_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
272 mmCGTS_CU4_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
273 mmCGTS_CU4_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
274 mmCGTS_CU5_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
275 mmCGTS_CU5_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
276 mmCGTS_CU5_TA_CTRL_REG
, 0xffffffff, 0x00040007,
277 mmCGTS_CU5_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
278 mmCGTS_CU5_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
279 mmCGTS_CU6_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
280 mmCGTS_CU6_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
281 mmCGTS_CU6_TA_CTRL_REG
, 0xffffffff, 0x00040007,
282 mmCGTS_CU6_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
283 mmCGTS_CU6_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
284 mmCGTS_CU7_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
285 mmCGTS_CU7_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
286 mmCGTS_CU7_TA_CTRL_REG
, 0xffffffff, 0x00040007,
287 mmCGTS_CU7_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
288 mmCGTS_CU7_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
289 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96e00200,
290 mmCP_RB_WPTR_POLL_CNTL
, 0xffffffff, 0x00900100,
291 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003c,
292 mmCP_MEM_SLP_CNTL
, 0x00000001, 0x00000001,
295 static const u32 golden_settings_polaris11_a11
[] =
297 mmCB_HW_CONTROL
, 0x0000f3cf, 0x00007208,
298 mmCB_HW_CONTROL_2
, 0x0f000000, 0x0f000000,
299 mmCB_HW_CONTROL_3
, 0x000001ff, 0x00000040,
300 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
301 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
302 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
303 mmPA_SC_RASTER_CONFIG
, 0x3f3fffff, 0x16000012,
304 mmPA_SC_RASTER_CONFIG_1
, 0x0000003f, 0x00000000,
305 mmRLC_CGCG_CGLS_CTRL
, 0x00000003, 0x0001003c,
306 mmRLC_CGCG_CGLS_CTRL_3D
, 0xffffffff, 0x0001003c,
307 mmSQ_CONFIG
, 0x07f80000, 0x01180000,
308 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
309 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
310 mmTCP_ADDR_CONFIG
, 0x000003ff, 0x000000f3,
311 mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000,
312 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00003210,
313 mmVGT_RESET_DEBUG
, 0x00000004, 0x00000004,
316 static const u32 polaris11_golden_common_all
[] =
318 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
319 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22011002,
320 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
321 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
322 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00FF7FBF,
323 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00FF7FAF,
326 static const u32 golden_settings_polaris10_a11
[] =
328 mmATC_MISC_CG
, 0x000c0fc0, 0x000c0200,
329 mmCB_HW_CONTROL
, 0x0001f3cf, 0x00007208,
330 mmCB_HW_CONTROL_2
, 0x0f000000, 0x0f000000,
331 mmCB_HW_CONTROL_3
, 0x000001ff, 0x00000040,
332 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
333 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
334 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
335 mmPA_SC_RASTER_CONFIG
, 0x3f3fffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1
, 0x0000003f, 0x0000002a,
337 mmRLC_CGCG_CGLS_CTRL
, 0x00000003, 0x0001003c,
338 mmRLC_CGCG_CGLS_CTRL_3D
, 0xffffffff, 0x0001003c,
339 mmSQ_CONFIG
, 0x07f80000, 0x07180000,
340 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
341 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
342 mmTCP_ADDR_CONFIG
, 0x000003ff, 0x000000f7,
343 mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000,
344 mmVGT_RESET_DEBUG
, 0x00000004, 0x00000004,
347 static const u32 polaris10_golden_common_all
[] =
349 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
350 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x16000012,
351 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x0000002A,
352 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22011003,
353 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
354 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
355 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00FF7FBF,
356 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00FF7FAF,
359 static const u32 fiji_golden_common_all
[] =
361 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
362 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x3a00161a,
363 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x0000002e,
364 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22011003,
365 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
366 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
367 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00FF7FBF,
368 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00FF7FAF,
369 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
370 mmSPI_CONFIG_CNTL_1
, 0x0000000f, 0x00000009,
373 static const u32 golden_settings_fiji_a10
[] =
375 mmCB_HW_CONTROL_3
, 0x000001ff, 0x00000040,
376 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
377 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
378 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
379 mmRLC_CGCG_CGLS_CTRL
, 0x00000003, 0x0001003c,
380 mmSQ_RANDOM_WAVE_PRI
, 0x001fffff, 0x000006fd,
381 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
382 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
383 mmTCC_EXE_DISABLE
, 0x00000002, 0x00000002,
384 mmTCP_ADDR_CONFIG
, 0x000003ff, 0x000000ff,
385 mmVGT_RESET_DEBUG
, 0x00000004, 0x00000004,
388 static const u32 fiji_mgcg_cgcg_init
[] =
390 mmRLC_CGTT_MGCG_OVERRIDE
, 0xffffffff, 0xffffffff,
391 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
392 mmCB_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
393 mmCGTT_BCI_CLK_CTRL
, 0xffffffff, 0x00000100,
394 mmCGTT_CP_CLK_CTRL
, 0xffffffff, 0x00000100,
395 mmCGTT_CPC_CLK_CTRL
, 0xffffffff, 0x00000100,
396 mmCGTT_CPF_CLK_CTRL
, 0xffffffff, 0x40000100,
397 mmCGTT_GDS_CLK_CTRL
, 0xffffffff, 0x00000100,
398 mmCGTT_IA_CLK_CTRL
, 0xffffffff, 0x06000100,
399 mmCGTT_PA_CLK_CTRL
, 0xffffffff, 0x00000100,
400 mmCGTT_WD_CLK_CTRL
, 0xffffffff, 0x06000100,
401 mmCGTT_PC_CLK_CTRL
, 0xffffffff, 0x00000100,
402 mmCGTT_RLC_CLK_CTRL
, 0xffffffff, 0x00000100,
403 mmCGTT_SC_CLK_CTRL
, 0xffffffff, 0x00000100,
404 mmCGTT_SPI_CLK_CTRL
, 0xffffffff, 0x00000100,
405 mmCGTT_SQ_CLK_CTRL
, 0xffffffff, 0x00000100,
406 mmCGTT_SQG_CLK_CTRL
, 0xffffffff, 0x00000100,
407 mmCGTT_SX_CLK_CTRL0
, 0xffffffff, 0x00000100,
408 mmCGTT_SX_CLK_CTRL1
, 0xffffffff, 0x00000100,
409 mmCGTT_SX_CLK_CTRL2
, 0xffffffff, 0x00000100,
410 mmCGTT_SX_CLK_CTRL3
, 0xffffffff, 0x00000100,
411 mmCGTT_SX_CLK_CTRL4
, 0xffffffff, 0x00000100,
412 mmCGTT_TCI_CLK_CTRL
, 0xffffffff, 0x00000100,
413 mmCGTT_TCP_CLK_CTRL
, 0xffffffff, 0x00000100,
414 mmCGTT_VGT_CLK_CTRL
, 0xffffffff, 0x06000100,
415 mmDB_CGTT_CLK_CTRL_0
, 0xffffffff, 0x00000100,
416 mmTA_CGTT_CTRL
, 0xffffffff, 0x00000100,
417 mmTCA_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
418 mmTCC_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
419 mmTD_CGTT_CTRL
, 0xffffffff, 0x00000100,
420 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
421 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96e00200,
422 mmCP_RB_WPTR_POLL_CNTL
, 0xffffffff, 0x00900100,
423 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003c,
424 mmCP_MEM_SLP_CNTL
, 0x00000001, 0x00000001,
427 static const u32 golden_settings_iceland_a11
[] =
429 mmCB_HW_CONTROL_3
, 0x00000040, 0x00000040,
430 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
431 mmDB_DEBUG3
, 0xc0000000, 0xc0000000,
432 mmGB_GPU_ID
, 0x0000000f, 0x00000000,
433 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
434 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
435 mmPA_SC_RASTER_CONFIG
, 0x3f3fffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1
, 0x0000003f, 0x00000000,
437 mmRLC_CGCG_CGLS_CTRL
, 0x00000003, 0x0000003c,
438 mmSQ_RANDOM_WAVE_PRI
, 0x001fffff, 0x000006fd,
439 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
440 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
441 mmTCC_EXE_DISABLE
, 0x00000002, 0x00000002,
442 mmTCP_ADDR_CONFIG
, 0x000003ff, 0x000000f1,
443 mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000,
444 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00000010,
447 static const u32 iceland_golden_common_all
[] =
449 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
450 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x00000002,
451 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x00000000,
452 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22010001,
453 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
454 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
455 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00FF7FBF,
456 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00FF7FAF
459 static const u32 iceland_mgcg_cgcg_init
[] =
461 mmRLC_CGTT_MGCG_OVERRIDE
, 0xffffffff, 0xffffffff,
462 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
463 mmCB_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
464 mmCGTT_BCI_CLK_CTRL
, 0xffffffff, 0x00000100,
465 mmCGTT_CP_CLK_CTRL
, 0xffffffff, 0xc0000100,
466 mmCGTT_CPC_CLK_CTRL
, 0xffffffff, 0xc0000100,
467 mmCGTT_CPF_CLK_CTRL
, 0xffffffff, 0xc0000100,
468 mmCGTT_GDS_CLK_CTRL
, 0xffffffff, 0x00000100,
469 mmCGTT_IA_CLK_CTRL
, 0xffffffff, 0x06000100,
470 mmCGTT_PA_CLK_CTRL
, 0xffffffff, 0x00000100,
471 mmCGTT_WD_CLK_CTRL
, 0xffffffff, 0x06000100,
472 mmCGTT_PC_CLK_CTRL
, 0xffffffff, 0x00000100,
473 mmCGTT_RLC_CLK_CTRL
, 0xffffffff, 0x00000100,
474 mmCGTT_SC_CLK_CTRL
, 0xffffffff, 0x00000100,
475 mmCGTT_SPI_CLK_CTRL
, 0xffffffff, 0x00000100,
476 mmCGTT_SQ_CLK_CTRL
, 0xffffffff, 0x00000100,
477 mmCGTT_SQG_CLK_CTRL
, 0xffffffff, 0x00000100,
478 mmCGTT_SX_CLK_CTRL0
, 0xffffffff, 0x00000100,
479 mmCGTT_SX_CLK_CTRL1
, 0xffffffff, 0x00000100,
480 mmCGTT_SX_CLK_CTRL2
, 0xffffffff, 0x00000100,
481 mmCGTT_SX_CLK_CTRL3
, 0xffffffff, 0x00000100,
482 mmCGTT_SX_CLK_CTRL4
, 0xffffffff, 0x00000100,
483 mmCGTT_TCI_CLK_CTRL
, 0xffffffff, 0xff000100,
484 mmCGTT_TCP_CLK_CTRL
, 0xffffffff, 0x00000100,
485 mmCGTT_VGT_CLK_CTRL
, 0xffffffff, 0x06000100,
486 mmDB_CGTT_CLK_CTRL_0
, 0xffffffff, 0x00000100,
487 mmTA_CGTT_CTRL
, 0xffffffff, 0x00000100,
488 mmTCA_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
489 mmTCC_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
490 mmTD_CGTT_CTRL
, 0xffffffff, 0x00000100,
491 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
492 mmCGTS_CU0_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
493 mmCGTS_CU0_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
494 mmCGTS_CU0_TA_SQC_CTRL_REG
, 0xffffffff, 0x0f840f87,
495 mmCGTS_CU0_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
496 mmCGTS_CU0_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
497 mmCGTS_CU1_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
498 mmCGTS_CU1_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
499 mmCGTS_CU1_TA_CTRL_REG
, 0xffffffff, 0x00040007,
500 mmCGTS_CU1_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
501 mmCGTS_CU1_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
502 mmCGTS_CU2_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
503 mmCGTS_CU2_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
504 mmCGTS_CU2_TA_CTRL_REG
, 0xffffffff, 0x00040007,
505 mmCGTS_CU2_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
506 mmCGTS_CU2_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
507 mmCGTS_CU3_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
508 mmCGTS_CU3_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
509 mmCGTS_CU3_TA_CTRL_REG
, 0xffffffff, 0x00040007,
510 mmCGTS_CU3_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
511 mmCGTS_CU3_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
512 mmCGTS_CU4_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
513 mmCGTS_CU4_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
514 mmCGTS_CU4_TA_SQC_CTRL_REG
, 0xffffffff, 0x0f840f87,
515 mmCGTS_CU4_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
516 mmCGTS_CU4_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
517 mmCGTS_CU5_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
518 mmCGTS_CU5_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
519 mmCGTS_CU5_TA_CTRL_REG
, 0xffffffff, 0x00040007,
520 mmCGTS_CU5_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
521 mmCGTS_CU5_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
522 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96e00200,
523 mmCP_RB_WPTR_POLL_CNTL
, 0xffffffff, 0x00900100,
524 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003c,
527 static const u32 cz_golden_settings_a11
[] =
529 mmCB_HW_CONTROL_3
, 0x00000040, 0x00000040,
530 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
531 mmGB_GPU_ID
, 0x0000000f, 0x00000000,
532 mmPA_SC_ENHANCE
, 0xffffffff, 0x00000001,
533 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
534 mmRLC_CGCG_CGLS_CTRL
, 0x00000003, 0x0000003c,
535 mmSQ_RANDOM_WAVE_PRI
, 0x001fffff, 0x000006fd,
536 mmTA_CNTL_AUX
, 0x000f000f, 0x00010000,
537 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
538 mmTCC_EXE_DISABLE
, 0x00000002, 0x00000002,
539 mmTCP_ADDR_CONFIG
, 0x0000000f, 0x000000f3,
540 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00001302
543 static const u32 cz_golden_common_all
[] =
545 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
546 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x00000002,
547 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x00000000,
548 mmGB_ADDR_CONFIG
, 0xffffffff, 0x22010001,
549 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
550 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
551 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00FF7FBF,
552 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00FF7FAF
555 static const u32 cz_mgcg_cgcg_init
[] =
557 mmRLC_CGTT_MGCG_OVERRIDE
, 0xffffffff, 0xffffffff,
558 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
559 mmCB_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
560 mmCGTT_BCI_CLK_CTRL
, 0xffffffff, 0x00000100,
561 mmCGTT_CP_CLK_CTRL
, 0xffffffff, 0x00000100,
562 mmCGTT_CPC_CLK_CTRL
, 0xffffffff, 0x00000100,
563 mmCGTT_CPF_CLK_CTRL
, 0xffffffff, 0x00000100,
564 mmCGTT_GDS_CLK_CTRL
, 0xffffffff, 0x00000100,
565 mmCGTT_IA_CLK_CTRL
, 0xffffffff, 0x06000100,
566 mmCGTT_PA_CLK_CTRL
, 0xffffffff, 0x00000100,
567 mmCGTT_WD_CLK_CTRL
, 0xffffffff, 0x06000100,
568 mmCGTT_PC_CLK_CTRL
, 0xffffffff, 0x00000100,
569 mmCGTT_RLC_CLK_CTRL
, 0xffffffff, 0x00000100,
570 mmCGTT_SC_CLK_CTRL
, 0xffffffff, 0x00000100,
571 mmCGTT_SPI_CLK_CTRL
, 0xffffffff, 0x00000100,
572 mmCGTT_SQ_CLK_CTRL
, 0xffffffff, 0x00000100,
573 mmCGTT_SQG_CLK_CTRL
, 0xffffffff, 0x00000100,
574 mmCGTT_SX_CLK_CTRL0
, 0xffffffff, 0x00000100,
575 mmCGTT_SX_CLK_CTRL1
, 0xffffffff, 0x00000100,
576 mmCGTT_SX_CLK_CTRL2
, 0xffffffff, 0x00000100,
577 mmCGTT_SX_CLK_CTRL3
, 0xffffffff, 0x00000100,
578 mmCGTT_SX_CLK_CTRL4
, 0xffffffff, 0x00000100,
579 mmCGTT_TCI_CLK_CTRL
, 0xffffffff, 0x00000100,
580 mmCGTT_TCP_CLK_CTRL
, 0xffffffff, 0x00000100,
581 mmCGTT_VGT_CLK_CTRL
, 0xffffffff, 0x06000100,
582 mmDB_CGTT_CLK_CTRL_0
, 0xffffffff, 0x00000100,
583 mmTA_CGTT_CTRL
, 0xffffffff, 0x00000100,
584 mmTCA_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
585 mmTCC_CGTT_SCLK_CTRL
, 0xffffffff, 0x00000100,
586 mmTD_CGTT_CTRL
, 0xffffffff, 0x00000100,
587 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
588 mmCGTS_CU0_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
589 mmCGTS_CU0_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
590 mmCGTS_CU0_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
591 mmCGTS_CU0_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
592 mmCGTS_CU0_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
593 mmCGTS_CU1_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
594 mmCGTS_CU1_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
595 mmCGTS_CU1_TA_CTRL_REG
, 0xffffffff, 0x00040007,
596 mmCGTS_CU1_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
597 mmCGTS_CU1_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
598 mmCGTS_CU2_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
599 mmCGTS_CU2_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
600 mmCGTS_CU2_TA_CTRL_REG
, 0xffffffff, 0x00040007,
601 mmCGTS_CU2_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
602 mmCGTS_CU2_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
603 mmCGTS_CU3_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
604 mmCGTS_CU3_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
605 mmCGTS_CU3_TA_CTRL_REG
, 0xffffffff, 0x00040007,
606 mmCGTS_CU3_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
607 mmCGTS_CU3_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
608 mmCGTS_CU4_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
609 mmCGTS_CU4_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
610 mmCGTS_CU4_TA_SQC_CTRL_REG
, 0xffffffff, 0x00040007,
611 mmCGTS_CU4_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
612 mmCGTS_CU4_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
613 mmCGTS_CU5_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
614 mmCGTS_CU5_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
615 mmCGTS_CU5_TA_CTRL_REG
, 0xffffffff, 0x00040007,
616 mmCGTS_CU5_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
617 mmCGTS_CU5_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
618 mmCGTS_CU6_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
619 mmCGTS_CU6_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
620 mmCGTS_CU6_TA_CTRL_REG
, 0xffffffff, 0x00040007,
621 mmCGTS_CU6_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
622 mmCGTS_CU6_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
623 mmCGTS_CU7_SP0_CTRL_REG
, 0xffffffff, 0x00010000,
624 mmCGTS_CU7_LDS_SQ_CTRL_REG
, 0xffffffff, 0x00030002,
625 mmCGTS_CU7_TA_CTRL_REG
, 0xffffffff, 0x00040007,
626 mmCGTS_CU7_SP1_CTRL_REG
, 0xffffffff, 0x00060005,
627 mmCGTS_CU7_TD_TCP_CTRL_REG
, 0xffffffff, 0x00090008,
628 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96e00200,
629 mmCP_RB_WPTR_POLL_CNTL
, 0xffffffff, 0x00900100,
630 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003f,
631 mmCP_MEM_SLP_CNTL
, 0x00000001, 0x00000001,
634 static const u32 stoney_golden_settings_a11
[] =
636 mmDB_DEBUG2
, 0xf00fffff, 0x00000400,
637 mmGB_GPU_ID
, 0x0000000f, 0x00000000,
638 mmPA_SC_ENHANCE
, 0xffffffff, 0x20000001,
639 mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000,
640 mmRLC_CGCG_CGLS_CTRL
, 0x00000003, 0x0001003c,
641 mmTA_CNTL_AUX
, 0x000f000f, 0x000b0000,
642 mmTCC_CTRL
, 0x00100000, 0xf31fff7f,
643 mmTCC_EXE_DISABLE
, 0x00000002, 0x00000002,
644 mmTCP_ADDR_CONFIG
, 0x0000000f, 0x000000f1,
645 mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x10101010,
648 static const u32 stoney_golden_common_all
[] =
650 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
651 mmPA_SC_RASTER_CONFIG
, 0xffffffff, 0x00000000,
652 mmPA_SC_RASTER_CONFIG_1
, 0xffffffff, 0x00000000,
653 mmGB_ADDR_CONFIG
, 0xffffffff, 0x12010001,
654 mmSPI_RESOURCE_RESERVE_CU_0
, 0xffffffff, 0x00000800,
655 mmSPI_RESOURCE_RESERVE_CU_1
, 0xffffffff, 0x00000800,
656 mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0xffffffff, 0x00FF7FBF,
657 mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0xffffffff, 0x00FF7FAF,
660 static const u32 stoney_mgcg_cgcg_init
[] =
662 mmGRBM_GFX_INDEX
, 0xffffffff, 0xe0000000,
663 mmRLC_CGCG_CGLS_CTRL
, 0xffffffff, 0x0020003f,
664 mmCP_MEM_SLP_CNTL
, 0xffffffff, 0x00020201,
665 mmRLC_MEM_SLP_CNTL
, 0xffffffff, 0x00020201,
666 mmCGTS_SM_CTRL_REG
, 0xffffffff, 0x96940200,
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device
*adev
);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device
*adev
);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device
*adev
);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device
*adev
);
673 static u32
gfx_v8_0_get_csb_size(struct amdgpu_device
*adev
);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device
*adev
);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring
*ring
);
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device
*adev
)
680 switch (adev
->asic_type
) {
682 amdgpu_program_register_sequence(adev
,
683 iceland_mgcg_cgcg_init
,
684 (const u32
)ARRAY_SIZE(iceland_mgcg_cgcg_init
));
685 amdgpu_program_register_sequence(adev
,
686 golden_settings_iceland_a11
,
687 (const u32
)ARRAY_SIZE(golden_settings_iceland_a11
));
688 amdgpu_program_register_sequence(adev
,
689 iceland_golden_common_all
,
690 (const u32
)ARRAY_SIZE(iceland_golden_common_all
));
693 amdgpu_program_register_sequence(adev
,
695 (const u32
)ARRAY_SIZE(fiji_mgcg_cgcg_init
));
696 amdgpu_program_register_sequence(adev
,
697 golden_settings_fiji_a10
,
698 (const u32
)ARRAY_SIZE(golden_settings_fiji_a10
));
699 amdgpu_program_register_sequence(adev
,
700 fiji_golden_common_all
,
701 (const u32
)ARRAY_SIZE(fiji_golden_common_all
));
705 amdgpu_program_register_sequence(adev
,
706 tonga_mgcg_cgcg_init
,
707 (const u32
)ARRAY_SIZE(tonga_mgcg_cgcg_init
));
708 amdgpu_program_register_sequence(adev
,
709 golden_settings_tonga_a11
,
710 (const u32
)ARRAY_SIZE(golden_settings_tonga_a11
));
711 amdgpu_program_register_sequence(adev
,
712 tonga_golden_common_all
,
713 (const u32
)ARRAY_SIZE(tonga_golden_common_all
));
717 amdgpu_program_register_sequence(adev
,
718 golden_settings_polaris11_a11
,
719 (const u32
)ARRAY_SIZE(golden_settings_polaris11_a11
));
720 amdgpu_program_register_sequence(adev
,
721 polaris11_golden_common_all
,
722 (const u32
)ARRAY_SIZE(polaris11_golden_common_all
));
725 amdgpu_program_register_sequence(adev
,
726 golden_settings_polaris10_a11
,
727 (const u32
)ARRAY_SIZE(golden_settings_polaris10_a11
));
728 amdgpu_program_register_sequence(adev
,
729 polaris10_golden_common_all
,
730 (const u32
)ARRAY_SIZE(polaris10_golden_common_all
));
731 WREG32_SMC(ixCG_ACLK_CNTL
, 0x0000001C);
732 if (adev
->pdev
->revision
== 0xc7 &&
733 ((adev
->pdev
->subsystem_device
== 0xb37 && adev
->pdev
->subsystem_vendor
== 0x1002) ||
734 (adev
->pdev
->subsystem_device
== 0x4a8 && adev
->pdev
->subsystem_vendor
== 0x1043) ||
735 (adev
->pdev
->subsystem_device
== 0x9480 && adev
->pdev
->subsystem_vendor
== 0x1682))) {
736 amdgpu_atombios_i2c_channel_trans(adev
, 0x10, 0x96, 0x1E, 0xDD);
737 amdgpu_atombios_i2c_channel_trans(adev
, 0x10, 0x96, 0x1F, 0xD0);
741 amdgpu_program_register_sequence(adev
,
743 (const u32
)ARRAY_SIZE(cz_mgcg_cgcg_init
));
744 amdgpu_program_register_sequence(adev
,
745 cz_golden_settings_a11
,
746 (const u32
)ARRAY_SIZE(cz_golden_settings_a11
));
747 amdgpu_program_register_sequence(adev
,
748 cz_golden_common_all
,
749 (const u32
)ARRAY_SIZE(cz_golden_common_all
));
752 amdgpu_program_register_sequence(adev
,
753 stoney_mgcg_cgcg_init
,
754 (const u32
)ARRAY_SIZE(stoney_mgcg_cgcg_init
));
755 amdgpu_program_register_sequence(adev
,
756 stoney_golden_settings_a11
,
757 (const u32
)ARRAY_SIZE(stoney_golden_settings_a11
));
758 amdgpu_program_register_sequence(adev
,
759 stoney_golden_common_all
,
760 (const u32
)ARRAY_SIZE(stoney_golden_common_all
));
767 static void gfx_v8_0_scratch_init(struct amdgpu_device
*adev
)
769 adev
->gfx
.scratch
.num_reg
= 8;
770 adev
->gfx
.scratch
.reg_base
= mmSCRATCH_REG0
;
771 adev
->gfx
.scratch
.free_mask
= (1u << adev
->gfx
.scratch
.num_reg
) - 1;
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring
*ring
)
776 struct amdgpu_device
*adev
= ring
->adev
;
782 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
784 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r
);
787 WREG32(scratch
, 0xCAFEDEAD);
788 r
= amdgpu_ring_alloc(ring
, 3);
790 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
792 amdgpu_gfx_scratch_free(adev
, scratch
);
795 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
796 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
797 amdgpu_ring_write(ring
, 0xDEADBEEF);
798 amdgpu_ring_commit(ring
);
800 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
801 tmp
= RREG32(scratch
);
802 if (tmp
== 0xDEADBEEF)
806 if (i
< adev
->usec_timeout
) {
807 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
810 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811 ring
->idx
, scratch
, tmp
);
814 amdgpu_gfx_scratch_free(adev
, scratch
);
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
820 struct amdgpu_device
*adev
= ring
->adev
;
822 struct dma_fence
*f
= NULL
;
827 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
829 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r
);
832 WREG32(scratch
, 0xCAFEDEAD);
833 memset(&ib
, 0, sizeof(ib
));
834 r
= amdgpu_ib_get(adev
, NULL
, 256, &ib
);
836 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r
);
839 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
840 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
));
841 ib
.ptr
[2] = 0xDEADBEEF;
844 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
848 r
= dma_fence_wait_timeout(f
, false, timeout
);
850 DRM_ERROR("amdgpu: IB test timed out.\n");
854 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
857 tmp
= RREG32(scratch
);
858 if (tmp
== 0xDEADBEEF) {
859 DRM_DEBUG("ib test on ring %d succeeded\n", ring
->idx
);
862 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
867 amdgpu_ib_free(adev
, &ib
, NULL
);
870 amdgpu_gfx_scratch_free(adev
, scratch
);
875 static void gfx_v8_0_free_microcode(struct amdgpu_device
*adev
)
877 release_firmware(adev
->gfx
.pfp_fw
);
878 adev
->gfx
.pfp_fw
= NULL
;
879 release_firmware(adev
->gfx
.me_fw
);
880 adev
->gfx
.me_fw
= NULL
;
881 release_firmware(adev
->gfx
.ce_fw
);
882 adev
->gfx
.ce_fw
= NULL
;
883 release_firmware(adev
->gfx
.rlc_fw
);
884 adev
->gfx
.rlc_fw
= NULL
;
885 release_firmware(adev
->gfx
.mec_fw
);
886 adev
->gfx
.mec_fw
= NULL
;
887 if ((adev
->asic_type
!= CHIP_STONEY
) &&
888 (adev
->asic_type
!= CHIP_TOPAZ
))
889 release_firmware(adev
->gfx
.mec2_fw
);
890 adev
->gfx
.mec2_fw
= NULL
;
892 kfree(adev
->gfx
.rlc
.register_list_format
);
895 static int gfx_v8_0_init_microcode(struct amdgpu_device
*adev
)
897 const char *chip_name
;
900 struct amdgpu_firmware_info
*info
= NULL
;
901 const struct common_firmware_header
*header
= NULL
;
902 const struct gfx_firmware_header_v1_0
*cp_hdr
;
903 const struct rlc_firmware_header_v2_0
*rlc_hdr
;
904 unsigned int *tmp
= NULL
, i
;
908 switch (adev
->asic_type
) {
916 chip_name
= "carrizo";
922 chip_name
= "polaris11";
925 chip_name
= "polaris10";
928 chip_name
= "polaris12";
931 chip_name
= "stoney";
937 if (adev
->asic_type
>= CHIP_POLARIS10
&& adev
->asic_type
<= CHIP_POLARIS12
) {
938 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp_2.bin", chip_name
);
939 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
940 if (err
== -ENOENT
) {
941 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp.bin", chip_name
);
942 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
945 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp.bin", chip_name
);
946 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
950 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
953 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
954 adev
->gfx
.pfp_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
955 adev
->gfx
.pfp_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
957 if (adev
->asic_type
>= CHIP_POLARIS10
&& adev
->asic_type
<= CHIP_POLARIS12
) {
958 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me_2.bin", chip_name
);
959 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
960 if (err
== -ENOENT
) {
961 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me.bin", chip_name
);
962 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
965 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me.bin", chip_name
);
966 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
970 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
973 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
974 adev
->gfx
.me_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
976 adev
->gfx
.me_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
978 if (adev
->asic_type
>= CHIP_POLARIS10
&& adev
->asic_type
<= CHIP_POLARIS12
) {
979 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce_2.bin", chip_name
);
980 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
981 if (err
== -ENOENT
) {
982 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce.bin", chip_name
);
983 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
986 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce.bin", chip_name
);
987 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
991 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
994 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
995 adev
->gfx
.ce_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
996 adev
->gfx
.ce_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
999 * Support for MCBP/Virtualization in combination with chained IBs is
1000 * formal released on feature version #46
1002 if (adev
->gfx
.ce_feature_version
>= 46 &&
1003 adev
->gfx
.pfp_feature_version
>= 46) {
1004 adev
->virt
.chained_ib_support
= true;
1005 DRM_INFO("Chained IB support enabled!\n");
1007 adev
->virt
.chained_ib_support
= false;
1009 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc.bin", chip_name
);
1010 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
1013 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
1014 rlc_hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
1015 adev
->gfx
.rlc_fw_version
= le32_to_cpu(rlc_hdr
->header
.ucode_version
);
1016 adev
->gfx
.rlc_feature_version
= le32_to_cpu(rlc_hdr
->ucode_feature_version
);
1018 adev
->gfx
.rlc
.save_and_restore_offset
=
1019 le32_to_cpu(rlc_hdr
->save_and_restore_offset
);
1020 adev
->gfx
.rlc
.clear_state_descriptor_offset
=
1021 le32_to_cpu(rlc_hdr
->clear_state_descriptor_offset
);
1022 adev
->gfx
.rlc
.avail_scratch_ram_locations
=
1023 le32_to_cpu(rlc_hdr
->avail_scratch_ram_locations
);
1024 adev
->gfx
.rlc
.reg_restore_list_size
=
1025 le32_to_cpu(rlc_hdr
->reg_restore_list_size
);
1026 adev
->gfx
.rlc
.reg_list_format_start
=
1027 le32_to_cpu(rlc_hdr
->reg_list_format_start
);
1028 adev
->gfx
.rlc
.reg_list_format_separate_start
=
1029 le32_to_cpu(rlc_hdr
->reg_list_format_separate_start
);
1030 adev
->gfx
.rlc
.starting_offsets_start
=
1031 le32_to_cpu(rlc_hdr
->starting_offsets_start
);
1032 adev
->gfx
.rlc
.reg_list_format_size_bytes
=
1033 le32_to_cpu(rlc_hdr
->reg_list_format_size_bytes
);
1034 adev
->gfx
.rlc
.reg_list_size_bytes
=
1035 le32_to_cpu(rlc_hdr
->reg_list_size_bytes
);
1037 adev
->gfx
.rlc
.register_list_format
=
1038 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
+
1039 adev
->gfx
.rlc
.reg_list_size_bytes
, GFP_KERNEL
);
1041 if (!adev
->gfx
.rlc
.register_list_format
) {
1046 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
1047 le32_to_cpu(rlc_hdr
->reg_list_format_array_offset_bytes
));
1048 for (i
= 0 ; i
< (rlc_hdr
->reg_list_format_size_bytes
>> 2); i
++)
1049 adev
->gfx
.rlc
.register_list_format
[i
] = le32_to_cpu(tmp
[i
]);
1051 adev
->gfx
.rlc
.register_restore
= adev
->gfx
.rlc
.register_list_format
+ i
;
1053 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
1054 le32_to_cpu(rlc_hdr
->reg_list_array_offset_bytes
));
1055 for (i
= 0 ; i
< (rlc_hdr
->reg_list_size_bytes
>> 2); i
++)
1056 adev
->gfx
.rlc
.register_restore
[i
] = le32_to_cpu(tmp
[i
]);
1058 if (adev
->asic_type
>= CHIP_POLARIS10
&& adev
->asic_type
<= CHIP_POLARIS12
) {
1059 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec_2.bin", chip_name
);
1060 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
1061 if (err
== -ENOENT
) {
1062 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec.bin", chip_name
);
1063 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
1066 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec.bin", chip_name
);
1067 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
1071 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
1074 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1075 adev
->gfx
.mec_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
1076 adev
->gfx
.mec_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
1078 if ((adev
->asic_type
!= CHIP_STONEY
) &&
1079 (adev
->asic_type
!= CHIP_TOPAZ
)) {
1080 if (adev
->asic_type
>= CHIP_POLARIS10
&& adev
->asic_type
<= CHIP_POLARIS12
) {
1081 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2_2.bin", chip_name
);
1082 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
1083 if (err
== -ENOENT
) {
1084 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2.bin", chip_name
);
1085 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
1088 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2.bin", chip_name
);
1089 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
1092 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
1095 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
1096 adev
->gfx
.mec2_fw
->data
;
1097 adev
->gfx
.mec2_fw_version
=
1098 le32_to_cpu(cp_hdr
->header
.ucode_version
);
1099 adev
->gfx
.mec2_feature_version
=
1100 le32_to_cpu(cp_hdr
->ucode_feature_version
);
1103 adev
->gfx
.mec2_fw
= NULL
;
1107 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_SMU
) {
1108 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
1109 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
1110 info
->fw
= adev
->gfx
.pfp_fw
;
1111 header
= (const struct common_firmware_header
*)info
->fw
->data
;
1112 adev
->firmware
.fw_size
+=
1113 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
1115 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
1116 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
1117 info
->fw
= adev
->gfx
.me_fw
;
1118 header
= (const struct common_firmware_header
*)info
->fw
->data
;
1119 adev
->firmware
.fw_size
+=
1120 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
1122 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
1123 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
1124 info
->fw
= adev
->gfx
.ce_fw
;
1125 header
= (const struct common_firmware_header
*)info
->fw
->data
;
1126 adev
->firmware
.fw_size
+=
1127 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
1129 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
1130 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
1131 info
->fw
= adev
->gfx
.rlc_fw
;
1132 header
= (const struct common_firmware_header
*)info
->fw
->data
;
1133 adev
->firmware
.fw_size
+=
1134 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
1136 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
1137 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
1138 info
->fw
= adev
->gfx
.mec_fw
;
1139 header
= (const struct common_firmware_header
*)info
->fw
->data
;
1140 adev
->firmware
.fw_size
+=
1141 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
1143 /* we need account JT in */
1144 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1145 adev
->firmware
.fw_size
+=
1146 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) << 2, PAGE_SIZE
);
1148 if (amdgpu_sriov_vf(adev
)) {
1149 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_STORAGE
];
1150 info
->ucode_id
= AMDGPU_UCODE_ID_STORAGE
;
1151 info
->fw
= adev
->gfx
.mec_fw
;
1152 adev
->firmware
.fw_size
+=
1153 ALIGN(le32_to_cpu(64 * PAGE_SIZE
), PAGE_SIZE
);
1156 if (adev
->gfx
.mec2_fw
) {
1157 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
1158 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
1159 info
->fw
= adev
->gfx
.mec2_fw
;
1160 header
= (const struct common_firmware_header
*)info
->fw
->data
;
1161 adev
->firmware
.fw_size
+=
1162 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
1170 "gfx8: Failed to load firmware \"%s\"\n",
1172 release_firmware(adev
->gfx
.pfp_fw
);
1173 adev
->gfx
.pfp_fw
= NULL
;
1174 release_firmware(adev
->gfx
.me_fw
);
1175 adev
->gfx
.me_fw
= NULL
;
1176 release_firmware(adev
->gfx
.ce_fw
);
1177 adev
->gfx
.ce_fw
= NULL
;
1178 release_firmware(adev
->gfx
.rlc_fw
);
1179 adev
->gfx
.rlc_fw
= NULL
;
1180 release_firmware(adev
->gfx
.mec_fw
);
1181 adev
->gfx
.mec_fw
= NULL
;
1182 release_firmware(adev
->gfx
.mec2_fw
);
1183 adev
->gfx
.mec2_fw
= NULL
;
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device
*adev
,
1189 volatile u32
*buffer
)
1192 const struct cs_section_def
*sect
= NULL
;
1193 const struct cs_extent_def
*ext
= NULL
;
1195 if (adev
->gfx
.rlc
.cs_data
== NULL
)
1200 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1201 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
1203 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
1204 buffer
[count
++] = cpu_to_le32(0x80000000);
1205 buffer
[count
++] = cpu_to_le32(0x80000000);
1207 for (sect
= adev
->gfx
.rlc
.cs_data
; sect
->section
!= NULL
; ++sect
) {
1208 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
1209 if (sect
->id
== SECT_CONTEXT
) {
1211 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, ext
->reg_count
));
1212 buffer
[count
++] = cpu_to_le32(ext
->reg_index
-
1213 PACKET3_SET_CONTEXT_REG_START
);
1214 for (i
= 0; i
< ext
->reg_count
; i
++)
1215 buffer
[count
++] = cpu_to_le32(ext
->extent
[i
]);
1222 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
1223 buffer
[count
++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG
-
1224 PACKET3_SET_CONTEXT_REG_START
);
1225 buffer
[count
++] = cpu_to_le32(adev
->gfx
.config
.rb_config
[0][0].raster_config
);
1226 buffer
[count
++] = cpu_to_le32(adev
->gfx
.config
.rb_config
[0][0].raster_config_1
);
1228 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1229 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE
);
1231 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE
, 0));
1232 buffer
[count
++] = cpu_to_le32(0);
1235 static void cz_init_cp_jump_table(struct amdgpu_device
*adev
)
1237 const __le32
*fw_data
;
1238 volatile u32
*dst_ptr
;
1239 int me
, i
, max_me
= 4;
1241 u32 table_offset
, table_size
;
1243 if (adev
->asic_type
== CHIP_CARRIZO
)
1246 /* write the cp table buffer */
1247 dst_ptr
= adev
->gfx
.rlc
.cp_table_ptr
;
1248 for (me
= 0; me
< max_me
; me
++) {
1250 const struct gfx_firmware_header_v1_0
*hdr
=
1251 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
1252 fw_data
= (const __le32
*)
1253 (adev
->gfx
.ce_fw
->data
+
1254 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1255 table_offset
= le32_to_cpu(hdr
->jt_offset
);
1256 table_size
= le32_to_cpu(hdr
->jt_size
);
1257 } else if (me
== 1) {
1258 const struct gfx_firmware_header_v1_0
*hdr
=
1259 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
1260 fw_data
= (const __le32
*)
1261 (adev
->gfx
.pfp_fw
->data
+
1262 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1263 table_offset
= le32_to_cpu(hdr
->jt_offset
);
1264 table_size
= le32_to_cpu(hdr
->jt_size
);
1265 } else if (me
== 2) {
1266 const struct gfx_firmware_header_v1_0
*hdr
=
1267 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
1268 fw_data
= (const __le32
*)
1269 (adev
->gfx
.me_fw
->data
+
1270 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1271 table_offset
= le32_to_cpu(hdr
->jt_offset
);
1272 table_size
= le32_to_cpu(hdr
->jt_size
);
1273 } else if (me
== 3) {
1274 const struct gfx_firmware_header_v1_0
*hdr
=
1275 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1276 fw_data
= (const __le32
*)
1277 (adev
->gfx
.mec_fw
->data
+
1278 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1279 table_offset
= le32_to_cpu(hdr
->jt_offset
);
1280 table_size
= le32_to_cpu(hdr
->jt_size
);
1281 } else if (me
== 4) {
1282 const struct gfx_firmware_header_v1_0
*hdr
=
1283 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec2_fw
->data
;
1284 fw_data
= (const __le32
*)
1285 (adev
->gfx
.mec2_fw
->data
+
1286 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1287 table_offset
= le32_to_cpu(hdr
->jt_offset
);
1288 table_size
= le32_to_cpu(hdr
->jt_size
);
1291 for (i
= 0; i
< table_size
; i
++) {
1292 dst_ptr
[bo_offset
+ i
] =
1293 cpu_to_le32(le32_to_cpu(fw_data
[table_offset
+ i
]));
1296 bo_offset
+= table_size
;
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device
*adev
)
1302 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.clear_state_obj
, NULL
, NULL
);
1303 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.cp_table_obj
, NULL
, NULL
);
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device
*adev
)
1308 volatile u32
*dst_ptr
;
1310 const struct cs_section_def
*cs_data
;
1313 adev
->gfx
.rlc
.cs_data
= vi_cs_data
;
1315 cs_data
= adev
->gfx
.rlc
.cs_data
;
1318 /* clear state block */
1319 adev
->gfx
.rlc
.clear_state_size
= dws
= gfx_v8_0_get_csb_size(adev
);
1321 r
= amdgpu_bo_create_reserved(adev
, dws
* 4, PAGE_SIZE
,
1322 AMDGPU_GEM_DOMAIN_VRAM
,
1323 &adev
->gfx
.rlc
.clear_state_obj
,
1324 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
1325 (void **)&adev
->gfx
.rlc
.cs_ptr
);
1327 dev_warn(adev
->dev
, "(%d) create RLC c bo failed\n", r
);
1328 gfx_v8_0_rlc_fini(adev
);
1332 /* set up the cs buffer */
1333 dst_ptr
= adev
->gfx
.rlc
.cs_ptr
;
1334 gfx_v8_0_get_csb_buffer(adev
, dst_ptr
);
1335 amdgpu_bo_kunmap(adev
->gfx
.rlc
.clear_state_obj
);
1336 amdgpu_bo_unreserve(adev
->gfx
.rlc
.clear_state_obj
);
1339 if ((adev
->asic_type
== CHIP_CARRIZO
) ||
1340 (adev
->asic_type
== CHIP_STONEY
)) {
1341 adev
->gfx
.rlc
.cp_table_size
= ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342 r
= amdgpu_bo_create_reserved(adev
, adev
->gfx
.rlc
.cp_table_size
,
1343 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_VRAM
,
1344 &adev
->gfx
.rlc
.cp_table_obj
,
1345 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
1346 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
1348 dev_warn(adev
->dev
, "(%d) create RLC cp table bo failed\n", r
);
1352 cz_init_cp_jump_table(adev
);
1354 amdgpu_bo_kunmap(adev
->gfx
.rlc
.cp_table_obj
);
1355 amdgpu_bo_unreserve(adev
->gfx
.rlc
.cp_table_obj
);
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device
*adev
)
1363 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.hpd_eop_obj
, NULL
, NULL
);
1366 static int gfx_v8_0_mec_init(struct amdgpu_device
*adev
)
1370 size_t mec_hpd_size
;
1372 bitmap_zero(adev
->gfx
.mec
.queue_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
);
1374 /* take ownership of the relevant compute queues */
1375 amdgpu_gfx_compute_queue_acquire(adev
);
1377 mec_hpd_size
= adev
->gfx
.num_compute_rings
* GFX8_MEC_HPD_SIZE
;
1379 r
= amdgpu_bo_create_reserved(adev
, mec_hpd_size
, PAGE_SIZE
,
1380 AMDGPU_GEM_DOMAIN_GTT
,
1381 &adev
->gfx
.mec
.hpd_eop_obj
,
1382 &adev
->gfx
.mec
.hpd_eop_gpu_addr
,
1385 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
1389 memset(hpd
, 0, mec_hpd_size
);
1391 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
1392 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
1397 static const u32 vgpr_init_compute_shader
[] =
1399 0x7e000209, 0x7e020208,
1400 0x7e040207, 0x7e060206,
1401 0x7e080205, 0x7e0a0204,
1402 0x7e0c0203, 0x7e0e0202,
1403 0x7e100201, 0x7e120200,
1404 0x7e140209, 0x7e160208,
1405 0x7e180207, 0x7e1a0206,
1406 0x7e1c0205, 0x7e1e0204,
1407 0x7e200203, 0x7e220202,
1408 0x7e240201, 0x7e260200,
1409 0x7e280209, 0x7e2a0208,
1410 0x7e2c0207, 0x7e2e0206,
1411 0x7e300205, 0x7e320204,
1412 0x7e340203, 0x7e360202,
1413 0x7e380201, 0x7e3a0200,
1414 0x7e3c0209, 0x7e3e0208,
1415 0x7e400207, 0x7e420206,
1416 0x7e440205, 0x7e460204,
1417 0x7e480203, 0x7e4a0202,
1418 0x7e4c0201, 0x7e4e0200,
1419 0x7e500209, 0x7e520208,
1420 0x7e540207, 0x7e560206,
1421 0x7e580205, 0x7e5a0204,
1422 0x7e5c0203, 0x7e5e0202,
1423 0x7e600201, 0x7e620200,
1424 0x7e640209, 0x7e660208,
1425 0x7e680207, 0x7e6a0206,
1426 0x7e6c0205, 0x7e6e0204,
1427 0x7e700203, 0x7e720202,
1428 0x7e740201, 0x7e760200,
1429 0x7e780209, 0x7e7a0208,
1430 0x7e7c0207, 0x7e7e0206,
1431 0xbf8a0000, 0xbf810000,
1434 static const u32 sgpr_init_compute_shader
[] =
1436 0xbe8a0100, 0xbe8c0102,
1437 0xbe8e0104, 0xbe900106,
1438 0xbe920108, 0xbe940100,
1439 0xbe960102, 0xbe980104,
1440 0xbe9a0106, 0xbe9c0108,
1441 0xbe9e0100, 0xbea00102,
1442 0xbea20104, 0xbea40106,
1443 0xbea60108, 0xbea80100,
1444 0xbeaa0102, 0xbeac0104,
1445 0xbeae0106, 0xbeb00108,
1446 0xbeb20100, 0xbeb40102,
1447 0xbeb60104, 0xbeb80106,
1448 0xbeba0108, 0xbebc0100,
1449 0xbebe0102, 0xbec00104,
1450 0xbec20106, 0xbec40108,
1451 0xbec60100, 0xbec80102,
1452 0xbee60004, 0xbee70005,
1453 0xbeea0006, 0xbeeb0007,
1454 0xbee80008, 0xbee90009,
1455 0xbefc0000, 0xbf8a0000,
1456 0xbf810000, 0x00000000,
1459 static const u32 vgpr_init_regs
[] =
1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0
, 0xffffffff,
1462 mmCOMPUTE_RESOURCE_LIMITS
, 0,
1463 mmCOMPUTE_NUM_THREAD_X
, 256*4,
1464 mmCOMPUTE_NUM_THREAD_Y
, 1,
1465 mmCOMPUTE_NUM_THREAD_Z
, 1,
1466 mmCOMPUTE_PGM_RSRC2
, 20,
1467 mmCOMPUTE_USER_DATA_0
, 0xedcedc00,
1468 mmCOMPUTE_USER_DATA_1
, 0xedcedc01,
1469 mmCOMPUTE_USER_DATA_2
, 0xedcedc02,
1470 mmCOMPUTE_USER_DATA_3
, 0xedcedc03,
1471 mmCOMPUTE_USER_DATA_4
, 0xedcedc04,
1472 mmCOMPUTE_USER_DATA_5
, 0xedcedc05,
1473 mmCOMPUTE_USER_DATA_6
, 0xedcedc06,
1474 mmCOMPUTE_USER_DATA_7
, 0xedcedc07,
1475 mmCOMPUTE_USER_DATA_8
, 0xedcedc08,
1476 mmCOMPUTE_USER_DATA_9
, 0xedcedc09,
1479 static const u32 sgpr1_init_regs
[] =
1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0
, 0x0f,
1482 mmCOMPUTE_RESOURCE_LIMITS
, 0x1000000,
1483 mmCOMPUTE_NUM_THREAD_X
, 256*5,
1484 mmCOMPUTE_NUM_THREAD_Y
, 1,
1485 mmCOMPUTE_NUM_THREAD_Z
, 1,
1486 mmCOMPUTE_PGM_RSRC2
, 20,
1487 mmCOMPUTE_USER_DATA_0
, 0xedcedc00,
1488 mmCOMPUTE_USER_DATA_1
, 0xedcedc01,
1489 mmCOMPUTE_USER_DATA_2
, 0xedcedc02,
1490 mmCOMPUTE_USER_DATA_3
, 0xedcedc03,
1491 mmCOMPUTE_USER_DATA_4
, 0xedcedc04,
1492 mmCOMPUTE_USER_DATA_5
, 0xedcedc05,
1493 mmCOMPUTE_USER_DATA_6
, 0xedcedc06,
1494 mmCOMPUTE_USER_DATA_7
, 0xedcedc07,
1495 mmCOMPUTE_USER_DATA_8
, 0xedcedc08,
1496 mmCOMPUTE_USER_DATA_9
, 0xedcedc09,
1499 static const u32 sgpr2_init_regs
[] =
1501 mmCOMPUTE_STATIC_THREAD_MGMT_SE0
, 0xf0,
1502 mmCOMPUTE_RESOURCE_LIMITS
, 0x1000000,
1503 mmCOMPUTE_NUM_THREAD_X
, 256*5,
1504 mmCOMPUTE_NUM_THREAD_Y
, 1,
1505 mmCOMPUTE_NUM_THREAD_Z
, 1,
1506 mmCOMPUTE_PGM_RSRC2
, 20,
1507 mmCOMPUTE_USER_DATA_0
, 0xedcedc00,
1508 mmCOMPUTE_USER_DATA_1
, 0xedcedc01,
1509 mmCOMPUTE_USER_DATA_2
, 0xedcedc02,
1510 mmCOMPUTE_USER_DATA_3
, 0xedcedc03,
1511 mmCOMPUTE_USER_DATA_4
, 0xedcedc04,
1512 mmCOMPUTE_USER_DATA_5
, 0xedcedc05,
1513 mmCOMPUTE_USER_DATA_6
, 0xedcedc06,
1514 mmCOMPUTE_USER_DATA_7
, 0xedcedc07,
1515 mmCOMPUTE_USER_DATA_8
, 0xedcedc08,
1516 mmCOMPUTE_USER_DATA_9
, 0xedcedc09,
1519 static const u32 sec_ded_counter_registers
[] =
1522 mmCPC_EDC_SCRATCH_CNT
,
1523 mmCPC_EDC_UCODE_CNT
,
1530 mmDC_EDC_CSINVOC_CNT
,
1531 mmDC_EDC_RESTORE_CNT
,
1537 mmSQC_ATC_EDC_GATCL1_CNT
,
1543 mmTCP_ATC_EDC_GATCL1_CNT
,
1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device
*adev
)
1550 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[0];
1551 struct amdgpu_ib ib
;
1552 struct dma_fence
*f
= NULL
;
1555 unsigned total_size
, vgpr_offset
, sgpr_offset
;
1558 /* only supported on CZ */
1559 if (adev
->asic_type
!= CHIP_CARRIZO
)
1562 /* bail if the compute ring is not ready */
1566 tmp
= RREG32(mmGB_EDC_MODE
);
1567 WREG32(mmGB_EDC_MODE
, 0);
1570 (((ARRAY_SIZE(vgpr_init_regs
) / 2) * 3) + 4 + 5 + 2) * 4;
1572 (((ARRAY_SIZE(sgpr1_init_regs
) / 2) * 3) + 4 + 5 + 2) * 4;
1574 (((ARRAY_SIZE(sgpr2_init_regs
) / 2) * 3) + 4 + 5 + 2) * 4;
1575 total_size
= ALIGN(total_size
, 256);
1576 vgpr_offset
= total_size
;
1577 total_size
+= ALIGN(sizeof(vgpr_init_compute_shader
), 256);
1578 sgpr_offset
= total_size
;
1579 total_size
+= sizeof(sgpr_init_compute_shader
);
1581 /* allocate an indirect buffer to put the commands in */
1582 memset(&ib
, 0, sizeof(ib
));
1583 r
= amdgpu_ib_get(adev
, NULL
, total_size
, &ib
);
1585 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r
);
1589 /* load the compute shaders */
1590 for (i
= 0; i
< ARRAY_SIZE(vgpr_init_compute_shader
); i
++)
1591 ib
.ptr
[i
+ (vgpr_offset
/ 4)] = vgpr_init_compute_shader
[i
];
1593 for (i
= 0; i
< ARRAY_SIZE(sgpr_init_compute_shader
); i
++)
1594 ib
.ptr
[i
+ (sgpr_offset
/ 4)] = sgpr_init_compute_shader
[i
];
1596 /* init the ib length to 0 */
1600 /* write the register state for the compute dispatch */
1601 for (i
= 0; i
< ARRAY_SIZE(vgpr_init_regs
); i
+= 2) {
1602 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_SET_SH_REG
, 1);
1603 ib
.ptr
[ib
.length_dw
++] = vgpr_init_regs
[i
] - PACKET3_SET_SH_REG_START
;
1604 ib
.ptr
[ib
.length_dw
++] = vgpr_init_regs
[i
+ 1];
1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607 gpu_addr
= (ib
.gpu_addr
+ (u64
)vgpr_offset
) >> 8;
1608 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_SET_SH_REG
, 2);
1609 ib
.ptr
[ib
.length_dw
++] = mmCOMPUTE_PGM_LO
- PACKET3_SET_SH_REG_START
;
1610 ib
.ptr
[ib
.length_dw
++] = lower_32_bits(gpu_addr
);
1611 ib
.ptr
[ib
.length_dw
++] = upper_32_bits(gpu_addr
);
1613 /* write dispatch packet */
1614 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_DISPATCH_DIRECT
, 3);
1615 ib
.ptr
[ib
.length_dw
++] = 8; /* x */
1616 ib
.ptr
[ib
.length_dw
++] = 1; /* y */
1617 ib
.ptr
[ib
.length_dw
++] = 1; /* z */
1618 ib
.ptr
[ib
.length_dw
++] =
1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR
, COMPUTE_SHADER_EN
, 1);
1621 /* write CS partial flush packet */
1622 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_EVENT_WRITE
, 0);
1623 ib
.ptr
[ib
.length_dw
++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1626 /* write the register state for the compute dispatch */
1627 for (i
= 0; i
< ARRAY_SIZE(sgpr1_init_regs
); i
+= 2) {
1628 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_SET_SH_REG
, 1);
1629 ib
.ptr
[ib
.length_dw
++] = sgpr1_init_regs
[i
] - PACKET3_SET_SH_REG_START
;
1630 ib
.ptr
[ib
.length_dw
++] = sgpr1_init_regs
[i
+ 1];
1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 gpu_addr
= (ib
.gpu_addr
+ (u64
)sgpr_offset
) >> 8;
1634 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_SET_SH_REG
, 2);
1635 ib
.ptr
[ib
.length_dw
++] = mmCOMPUTE_PGM_LO
- PACKET3_SET_SH_REG_START
;
1636 ib
.ptr
[ib
.length_dw
++] = lower_32_bits(gpu_addr
);
1637 ib
.ptr
[ib
.length_dw
++] = upper_32_bits(gpu_addr
);
1639 /* write dispatch packet */
1640 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_DISPATCH_DIRECT
, 3);
1641 ib
.ptr
[ib
.length_dw
++] = 8; /* x */
1642 ib
.ptr
[ib
.length_dw
++] = 1; /* y */
1643 ib
.ptr
[ib
.length_dw
++] = 1; /* z */
1644 ib
.ptr
[ib
.length_dw
++] =
1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR
, COMPUTE_SHADER_EN
, 1);
1647 /* write CS partial flush packet */
1648 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_EVENT_WRITE
, 0);
1649 ib
.ptr
[ib
.length_dw
++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1652 /* write the register state for the compute dispatch */
1653 for (i
= 0; i
< ARRAY_SIZE(sgpr2_init_regs
); i
+= 2) {
1654 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_SET_SH_REG
, 1);
1655 ib
.ptr
[ib
.length_dw
++] = sgpr2_init_regs
[i
] - PACKET3_SET_SH_REG_START
;
1656 ib
.ptr
[ib
.length_dw
++] = sgpr2_init_regs
[i
+ 1];
1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 gpu_addr
= (ib
.gpu_addr
+ (u64
)sgpr_offset
) >> 8;
1660 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_SET_SH_REG
, 2);
1661 ib
.ptr
[ib
.length_dw
++] = mmCOMPUTE_PGM_LO
- PACKET3_SET_SH_REG_START
;
1662 ib
.ptr
[ib
.length_dw
++] = lower_32_bits(gpu_addr
);
1663 ib
.ptr
[ib
.length_dw
++] = upper_32_bits(gpu_addr
);
1665 /* write dispatch packet */
1666 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_DISPATCH_DIRECT
, 3);
1667 ib
.ptr
[ib
.length_dw
++] = 8; /* x */
1668 ib
.ptr
[ib
.length_dw
++] = 1; /* y */
1669 ib
.ptr
[ib
.length_dw
++] = 1; /* z */
1670 ib
.ptr
[ib
.length_dw
++] =
1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR
, COMPUTE_SHADER_EN
, 1);
1673 /* write CS partial flush packet */
1674 ib
.ptr
[ib
.length_dw
++] = PACKET3(PACKET3_EVENT_WRITE
, 0);
1675 ib
.ptr
[ib
.length_dw
++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677 /* shedule the ib on the ring */
1678 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
1680 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r
);
1684 /* wait for the GPU to finish processing the IB */
1685 r
= dma_fence_wait(f
, false);
1687 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r
);
1691 tmp
= REG_SET_FIELD(tmp
, GB_EDC_MODE
, DED_MODE
, 2);
1692 tmp
= REG_SET_FIELD(tmp
, GB_EDC_MODE
, PROP_FED
, 1);
1693 WREG32(mmGB_EDC_MODE
, tmp
);
1695 tmp
= RREG32(mmCC_GC_EDC_CONFIG
);
1696 tmp
= REG_SET_FIELD(tmp
, CC_GC_EDC_CONFIG
, DIS_EDC
, 0) | 1;
1697 WREG32(mmCC_GC_EDC_CONFIG
, tmp
);
1700 /* read back registers to clear the counters */
1701 for (i
= 0; i
< ARRAY_SIZE(sec_ded_counter_registers
); i
++)
1702 RREG32(sec_ded_counter_registers
[i
]);
1705 amdgpu_ib_free(adev
, &ib
, NULL
);
1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device
*adev
)
1714 u32 mc_shared_chmap
, mc_arb_ramcfg
;
1715 u32 dimm00_addr_map
, dimm01_addr_map
, dimm10_addr_map
, dimm11_addr_map
;
1719 switch (adev
->asic_type
) {
1721 adev
->gfx
.config
.max_shader_engines
= 1;
1722 adev
->gfx
.config
.max_tile_pipes
= 2;
1723 adev
->gfx
.config
.max_cu_per_sh
= 6;
1724 adev
->gfx
.config
.max_sh_per_se
= 1;
1725 adev
->gfx
.config
.max_backends_per_se
= 2;
1726 adev
->gfx
.config
.max_texture_channel_caches
= 2;
1727 adev
->gfx
.config
.max_gprs
= 256;
1728 adev
->gfx
.config
.max_gs_threads
= 32;
1729 adev
->gfx
.config
.max_hw_contexts
= 8;
1731 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1732 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1733 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1734 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1735 gb_addr_config
= TOPAZ_GB_ADDR_CONFIG_GOLDEN
;
1738 adev
->gfx
.config
.max_shader_engines
= 4;
1739 adev
->gfx
.config
.max_tile_pipes
= 16;
1740 adev
->gfx
.config
.max_cu_per_sh
= 16;
1741 adev
->gfx
.config
.max_sh_per_se
= 1;
1742 adev
->gfx
.config
.max_backends_per_se
= 4;
1743 adev
->gfx
.config
.max_texture_channel_caches
= 16;
1744 adev
->gfx
.config
.max_gprs
= 256;
1745 adev
->gfx
.config
.max_gs_threads
= 32;
1746 adev
->gfx
.config
.max_hw_contexts
= 8;
1748 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1749 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1750 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1751 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1752 gb_addr_config
= TONGA_GB_ADDR_CONFIG_GOLDEN
;
1754 case CHIP_POLARIS11
:
1755 case CHIP_POLARIS12
:
1756 ret
= amdgpu_atombios_get_gfx_info(adev
);
1759 adev
->gfx
.config
.max_gprs
= 256;
1760 adev
->gfx
.config
.max_gs_threads
= 32;
1761 adev
->gfx
.config
.max_hw_contexts
= 8;
1763 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1764 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1765 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1766 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1767 gb_addr_config
= POLARIS11_GB_ADDR_CONFIG_GOLDEN
;
1769 case CHIP_POLARIS10
:
1770 ret
= amdgpu_atombios_get_gfx_info(adev
);
1773 adev
->gfx
.config
.max_gprs
= 256;
1774 adev
->gfx
.config
.max_gs_threads
= 32;
1775 adev
->gfx
.config
.max_hw_contexts
= 8;
1777 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1778 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1779 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1780 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1781 gb_addr_config
= TONGA_GB_ADDR_CONFIG_GOLDEN
;
1784 adev
->gfx
.config
.max_shader_engines
= 4;
1785 adev
->gfx
.config
.max_tile_pipes
= 8;
1786 adev
->gfx
.config
.max_cu_per_sh
= 8;
1787 adev
->gfx
.config
.max_sh_per_se
= 1;
1788 adev
->gfx
.config
.max_backends_per_se
= 2;
1789 adev
->gfx
.config
.max_texture_channel_caches
= 8;
1790 adev
->gfx
.config
.max_gprs
= 256;
1791 adev
->gfx
.config
.max_gs_threads
= 32;
1792 adev
->gfx
.config
.max_hw_contexts
= 8;
1794 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1795 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1796 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1797 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1798 gb_addr_config
= TONGA_GB_ADDR_CONFIG_GOLDEN
;
1801 adev
->gfx
.config
.max_shader_engines
= 1;
1802 adev
->gfx
.config
.max_tile_pipes
= 2;
1803 adev
->gfx
.config
.max_sh_per_se
= 1;
1804 adev
->gfx
.config
.max_backends_per_se
= 2;
1805 adev
->gfx
.config
.max_cu_per_sh
= 8;
1806 adev
->gfx
.config
.max_texture_channel_caches
= 2;
1807 adev
->gfx
.config
.max_gprs
= 256;
1808 adev
->gfx
.config
.max_gs_threads
= 32;
1809 adev
->gfx
.config
.max_hw_contexts
= 8;
1811 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1812 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1813 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1814 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1815 gb_addr_config
= CARRIZO_GB_ADDR_CONFIG_GOLDEN
;
1818 adev
->gfx
.config
.max_shader_engines
= 1;
1819 adev
->gfx
.config
.max_tile_pipes
= 2;
1820 adev
->gfx
.config
.max_sh_per_se
= 1;
1821 adev
->gfx
.config
.max_backends_per_se
= 1;
1822 adev
->gfx
.config
.max_cu_per_sh
= 3;
1823 adev
->gfx
.config
.max_texture_channel_caches
= 2;
1824 adev
->gfx
.config
.max_gprs
= 256;
1825 adev
->gfx
.config
.max_gs_threads
= 16;
1826 adev
->gfx
.config
.max_hw_contexts
= 8;
1828 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1829 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1830 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1831 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1832 gb_addr_config
= CARRIZO_GB_ADDR_CONFIG_GOLDEN
;
1835 adev
->gfx
.config
.max_shader_engines
= 2;
1836 adev
->gfx
.config
.max_tile_pipes
= 4;
1837 adev
->gfx
.config
.max_cu_per_sh
= 2;
1838 adev
->gfx
.config
.max_sh_per_se
= 1;
1839 adev
->gfx
.config
.max_backends_per_se
= 2;
1840 adev
->gfx
.config
.max_texture_channel_caches
= 4;
1841 adev
->gfx
.config
.max_gprs
= 256;
1842 adev
->gfx
.config
.max_gs_threads
= 32;
1843 adev
->gfx
.config
.max_hw_contexts
= 8;
1845 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1846 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1847 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1848 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x130;
1849 gb_addr_config
= TONGA_GB_ADDR_CONFIG_GOLDEN
;
1853 mc_shared_chmap
= RREG32(mmMC_SHARED_CHMAP
);
1854 adev
->gfx
.config
.mc_arb_ramcfg
= RREG32(mmMC_ARB_RAMCFG
);
1855 mc_arb_ramcfg
= adev
->gfx
.config
.mc_arb_ramcfg
;
1857 adev
->gfx
.config
.num_tile_pipes
= adev
->gfx
.config
.max_tile_pipes
;
1858 adev
->gfx
.config
.mem_max_burst_length_bytes
= 256;
1859 if (adev
->flags
& AMD_IS_APU
) {
1860 /* Get memory bank mapping mode. */
1861 tmp
= RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING
);
1862 dimm00_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM0_BANK_ADDR_MAPPING
, DIMM0ADDRMAP
);
1863 dimm01_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM0_BANK_ADDR_MAPPING
, DIMM1ADDRMAP
);
1865 tmp
= RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING
);
1866 dimm10_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM1_BANK_ADDR_MAPPING
, DIMM0ADDRMAP
);
1867 dimm11_addr_map
= REG_GET_FIELD(tmp
, MC_FUS_DRAM1_BANK_ADDR_MAPPING
, DIMM1ADDRMAP
);
1869 /* Validate settings in case only one DIMM installed. */
1870 if ((dimm00_addr_map
== 0) || (dimm00_addr_map
== 3) || (dimm00_addr_map
== 4) || (dimm00_addr_map
> 12))
1871 dimm00_addr_map
= 0;
1872 if ((dimm01_addr_map
== 0) || (dimm01_addr_map
== 3) || (dimm01_addr_map
== 4) || (dimm01_addr_map
> 12))
1873 dimm01_addr_map
= 0;
1874 if ((dimm10_addr_map
== 0) || (dimm10_addr_map
== 3) || (dimm10_addr_map
== 4) || (dimm10_addr_map
> 12))
1875 dimm10_addr_map
= 0;
1876 if ((dimm11_addr_map
== 0) || (dimm11_addr_map
== 3) || (dimm11_addr_map
== 4) || (dimm11_addr_map
> 12))
1877 dimm11_addr_map
= 0;
1879 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881 if ((dimm00_addr_map
== 11) || (dimm01_addr_map
== 11) || (dimm10_addr_map
== 11) || (dimm11_addr_map
== 11))
1882 adev
->gfx
.config
.mem_row_size_in_kb
= 2;
1884 adev
->gfx
.config
.mem_row_size_in_kb
= 1;
1886 tmp
= REG_GET_FIELD(mc_arb_ramcfg
, MC_ARB_RAMCFG
, NOOFCOLS
);
1887 adev
->gfx
.config
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
1888 if (adev
->gfx
.config
.mem_row_size_in_kb
> 4)
1889 adev
->gfx
.config
.mem_row_size_in_kb
= 4;
1892 adev
->gfx
.config
.shader_engine_tile_size
= 32;
1893 adev
->gfx
.config
.num_gpus
= 1;
1894 adev
->gfx
.config
.multi_gpu_tile_size
= 64;
1896 /* fix up row size */
1897 switch (adev
->gfx
.config
.mem_row_size_in_kb
) {
1900 gb_addr_config
= REG_SET_FIELD(gb_addr_config
, GB_ADDR_CONFIG
, ROW_SIZE
, 0);
1903 gb_addr_config
= REG_SET_FIELD(gb_addr_config
, GB_ADDR_CONFIG
, ROW_SIZE
, 1);
1906 gb_addr_config
= REG_SET_FIELD(gb_addr_config
, GB_ADDR_CONFIG
, ROW_SIZE
, 2);
1909 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device
*adev
, int ring_id
,
1915 int mec
, int pipe
, int queue
)
1919 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[ring_id
];
1921 ring
= &adev
->gfx
.compute_ring
[ring_id
];
1926 ring
->queue
= queue
;
1928 ring
->ring_obj
= NULL
;
1929 ring
->use_doorbell
= true;
1930 ring
->doorbell_index
= AMDGPU_DOORBELL_MEC_RING0
+ ring_id
;
1931 ring
->eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
1932 + (ring_id
* GFX8_MEC_HPD_SIZE
);
1933 sprintf(ring
->name
, "comp_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1935 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936 + ((ring
->me
- 1) * adev
->gfx
.mec
.num_pipe_per_mec
)
1939 /* type-2 packets are deprecated on MEC, use type-3 instead */
1940 r
= amdgpu_ring_init(adev
, ring
, 1024,
1941 &adev
->gfx
.eop_irq
, irq_type
);
1949 static int gfx_v8_0_sw_init(void *handle
)
1951 int i
, j
, k
, r
, ring_id
;
1952 struct amdgpu_ring
*ring
;
1953 struct amdgpu_kiq
*kiq
;
1954 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1956 switch (adev
->asic_type
) {
1959 case CHIP_POLARIS11
:
1960 case CHIP_POLARIS12
:
1961 case CHIP_POLARIS10
:
1963 adev
->gfx
.mec
.num_mec
= 2;
1968 adev
->gfx
.mec
.num_mec
= 1;
1972 adev
->gfx
.mec
.num_pipe_per_mec
= 4;
1973 adev
->gfx
.mec
.num_queue_per_pipe
= 8;
1976 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_LEGACY
, 178, &adev
->gfx
.kiq
.irq
);
1981 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_LEGACY
, 181, &adev
->gfx
.eop_irq
);
1985 /* Privileged reg */
1986 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_LEGACY
, 184,
1987 &adev
->gfx
.priv_reg_irq
);
1991 /* Privileged inst */
1992 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_LEGACY
, 185,
1993 &adev
->gfx
.priv_inst_irq
);
1997 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
1999 gfx_v8_0_scratch_init(adev
);
2001 r
= gfx_v8_0_init_microcode(adev
);
2003 DRM_ERROR("Failed to load gfx firmware!\n");
2007 r
= gfx_v8_0_rlc_init(adev
);
2009 DRM_ERROR("Failed to init rlc BOs!\n");
2013 r
= gfx_v8_0_mec_init(adev
);
2015 DRM_ERROR("Failed to init MEC BOs!\n");
2019 /* set up the gfx ring */
2020 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
2021 ring
= &adev
->gfx
.gfx_ring
[i
];
2022 ring
->ring_obj
= NULL
;
2023 sprintf(ring
->name
, "gfx");
2024 /* no gfx doorbells on iceland */
2025 if (adev
->asic_type
!= CHIP_TOPAZ
) {
2026 ring
->use_doorbell
= true;
2027 ring
->doorbell_index
= AMDGPU_DOORBELL_GFX_RING0
;
2030 r
= amdgpu_ring_init(adev
, ring
, 1024, &adev
->gfx
.eop_irq
,
2031 AMDGPU_CP_IRQ_GFX_EOP
);
2037 /* set up the compute queues - allocate horizontally across pipes */
2039 for (i
= 0; i
< adev
->gfx
.mec
.num_mec
; ++i
) {
2040 for (j
= 0; j
< adev
->gfx
.mec
.num_queue_per_pipe
; j
++) {
2041 for (k
= 0; k
< adev
->gfx
.mec
.num_pipe_per_mec
; k
++) {
2042 if (!amdgpu_gfx_is_mec_queue_enabled(adev
, i
, k
, j
))
2045 r
= gfx_v8_0_compute_ring_init(adev
,
2056 r
= amdgpu_gfx_kiq_init(adev
, GFX8_MEC_HPD_SIZE
);
2058 DRM_ERROR("Failed to init KIQ BOs!\n");
2062 kiq
= &adev
->gfx
.kiq
;
2063 r
= amdgpu_gfx_kiq_init_ring(adev
, &kiq
->ring
, &kiq
->irq
);
2067 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2068 r
= amdgpu_gfx_compute_mqd_sw_init(adev
, sizeof(struct vi_mqd_allocation
));
2072 /* reserve GDS, GWS and OA resource for gfx */
2073 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.mem
.gfx_partition_size
,
2074 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GDS
,
2075 &adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
2079 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.gws
.gfx_partition_size
,
2080 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GWS
,
2081 &adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
2085 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.oa
.gfx_partition_size
,
2086 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_OA
,
2087 &adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
2091 adev
->gfx
.ce_ram_size
= 0x8000;
2093 r
= gfx_v8_0_gpu_early_init(adev
);
2100 static int gfx_v8_0_sw_fini(void *handle
)
2103 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2105 amdgpu_bo_free_kernel(&adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
2106 amdgpu_bo_free_kernel(&adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
2107 amdgpu_bo_free_kernel(&adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
2109 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
2110 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
2111 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
2112 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
2114 amdgpu_gfx_compute_mqd_sw_fini(adev
);
2115 amdgpu_gfx_kiq_free_ring(&adev
->gfx
.kiq
.ring
, &adev
->gfx
.kiq
.irq
);
2116 amdgpu_gfx_kiq_fini(adev
);
2117 amdgpu_bo_free_kernel(&adev
->virt
.csa_obj
, &adev
->virt
.csa_vmid0_addr
, NULL
);
2119 gfx_v8_0_mec_fini(adev
);
2120 gfx_v8_0_rlc_fini(adev
);
2121 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.clear_state_obj
,
2122 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
2123 (void **)&adev
->gfx
.rlc
.cs_ptr
);
2124 if ((adev
->asic_type
== CHIP_CARRIZO
) ||
2125 (adev
->asic_type
== CHIP_STONEY
)) {
2126 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.cp_table_obj
,
2127 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
2128 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
2130 gfx_v8_0_free_microcode(adev
);
2135 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
2137 uint32_t *modearray
, *mod2array
;
2138 const u32 num_tile_mode_states
= ARRAY_SIZE(adev
->gfx
.config
.tile_mode_array
);
2139 const u32 num_secondary_tile_mode_states
= ARRAY_SIZE(adev
->gfx
.config
.macrotile_mode_array
);
2142 modearray
= adev
->gfx
.config
.tile_mode_array
;
2143 mod2array
= adev
->gfx
.config
.macrotile_mode_array
;
2145 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
2146 modearray
[reg_offset
] = 0;
2148 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
2149 mod2array
[reg_offset
] = 0;
2151 switch (adev
->asic_type
) {
2153 modearray
[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2154 PIPE_CONFIG(ADDR_SURF_P2
) |
2155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2157 modearray
[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2158 PIPE_CONFIG(ADDR_SURF_P2
) |
2159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2161 modearray
[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2162 PIPE_CONFIG(ADDR_SURF_P2
) |
2163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2165 modearray
[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2166 PIPE_CONFIG(ADDR_SURF_P2
) |
2167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2169 modearray
[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2170 PIPE_CONFIG(ADDR_SURF_P2
) |
2171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2173 modearray
[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2174 PIPE_CONFIG(ADDR_SURF_P2
) |
2175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2177 modearray
[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2178 PIPE_CONFIG(ADDR_SURF_P2
) |
2179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2181 modearray
[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2182 PIPE_CONFIG(ADDR_SURF_P2
));
2183 modearray
[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2184 PIPE_CONFIG(ADDR_SURF_P2
) |
2185 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2187 modearray
[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2188 PIPE_CONFIG(ADDR_SURF_P2
) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2191 modearray
[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2192 PIPE_CONFIG(ADDR_SURF_P2
) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2195 modearray
[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2196 PIPE_CONFIG(ADDR_SURF_P2
) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2199 modearray
[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2200 PIPE_CONFIG(ADDR_SURF_P2
) |
2201 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2203 modearray
[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
2204 PIPE_CONFIG(ADDR_SURF_P2
) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2207 modearray
[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2208 PIPE_CONFIG(ADDR_SURF_P2
) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2211 modearray
[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2212 PIPE_CONFIG(ADDR_SURF_P2
) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2215 modearray
[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2216 PIPE_CONFIG(ADDR_SURF_P2
) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2219 modearray
[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2220 PIPE_CONFIG(ADDR_SURF_P2
) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2223 modearray
[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
2224 PIPE_CONFIG(ADDR_SURF_P2
) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2227 modearray
[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2228 PIPE_CONFIG(ADDR_SURF_P2
) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2231 modearray
[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2232 PIPE_CONFIG(ADDR_SURF_P2
) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2235 modearray
[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
2236 PIPE_CONFIG(ADDR_SURF_P2
) |
2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2239 modearray
[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
2240 PIPE_CONFIG(ADDR_SURF_P2
) |
2241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2243 modearray
[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2244 PIPE_CONFIG(ADDR_SURF_P2
) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2247 modearray
[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2248 PIPE_CONFIG(ADDR_SURF_P2
) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2251 modearray
[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2252 PIPE_CONFIG(ADDR_SURF_P2
) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2256 mod2array
[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2259 NUM_BANKS(ADDR_SURF_8_BANK
));
2260 mod2array
[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2263 NUM_BANKS(ADDR_SURF_8_BANK
));
2264 mod2array
[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2267 NUM_BANKS(ADDR_SURF_8_BANK
));
2268 mod2array
[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2271 NUM_BANKS(ADDR_SURF_8_BANK
));
2272 mod2array
[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2275 NUM_BANKS(ADDR_SURF_8_BANK
));
2276 mod2array
[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2279 NUM_BANKS(ADDR_SURF_8_BANK
));
2280 mod2array
[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2283 NUM_BANKS(ADDR_SURF_8_BANK
));
2284 mod2array
[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2287 NUM_BANKS(ADDR_SURF_16_BANK
));
2288 mod2array
[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2291 NUM_BANKS(ADDR_SURF_16_BANK
));
2292 mod2array
[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2295 NUM_BANKS(ADDR_SURF_16_BANK
));
2296 mod2array
[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2299 NUM_BANKS(ADDR_SURF_16_BANK
));
2300 mod2array
[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2303 NUM_BANKS(ADDR_SURF_16_BANK
));
2304 mod2array
[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2307 NUM_BANKS(ADDR_SURF_16_BANK
));
2308 mod2array
[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2311 NUM_BANKS(ADDR_SURF_8_BANK
));
2313 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
2314 if (reg_offset
!= 7 && reg_offset
!= 12 && reg_offset
!= 17 &&
2316 WREG32(mmGB_TILE_MODE0
+ reg_offset
, modearray
[reg_offset
]);
2318 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
2319 if (reg_offset
!= 7)
2320 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, mod2array
[reg_offset
]);
2324 modearray
[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2328 modearray
[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2330 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2332 modearray
[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2334 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2336 modearray
[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2340 modearray
[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2344 modearray
[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2345 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2346 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2348 modearray
[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2352 modearray
[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2353 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2356 modearray
[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
));
2358 modearray
[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2362 modearray
[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2366 modearray
[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2367 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2370 modearray
[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2371 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2372 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2374 modearray
[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2378 modearray
[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2380 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2382 modearray
[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2386 modearray
[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2390 modearray
[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2391 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2394 modearray
[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2398 modearray
[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2402 modearray
[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2406 modearray
[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2410 modearray
[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2414 modearray
[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2415 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2418 modearray
[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2422 modearray
[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2426 modearray
[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2430 modearray
[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2434 modearray
[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2438 modearray
[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16
) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2442 modearray
[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2443 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2447 mod2array
[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2450 NUM_BANKS(ADDR_SURF_8_BANK
));
2451 mod2array
[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2454 NUM_BANKS(ADDR_SURF_8_BANK
));
2455 mod2array
[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2458 NUM_BANKS(ADDR_SURF_8_BANK
));
2459 mod2array
[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2462 NUM_BANKS(ADDR_SURF_8_BANK
));
2463 mod2array
[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2466 NUM_BANKS(ADDR_SURF_8_BANK
));
2467 mod2array
[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2470 NUM_BANKS(ADDR_SURF_8_BANK
));
2471 mod2array
[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2474 NUM_BANKS(ADDR_SURF_8_BANK
));
2475 mod2array
[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2478 NUM_BANKS(ADDR_SURF_8_BANK
));
2479 mod2array
[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2482 NUM_BANKS(ADDR_SURF_8_BANK
));
2483 mod2array
[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2486 NUM_BANKS(ADDR_SURF_8_BANK
));
2487 mod2array
[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2490 NUM_BANKS(ADDR_SURF_8_BANK
));
2491 mod2array
[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2494 NUM_BANKS(ADDR_SURF_8_BANK
));
2495 mod2array
[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2498 NUM_BANKS(ADDR_SURF_8_BANK
));
2499 mod2array
[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2502 NUM_BANKS(ADDR_SURF_4_BANK
));
2504 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
2505 WREG32(mmGB_TILE_MODE0
+ reg_offset
, modearray
[reg_offset
]);
2507 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
2508 if (reg_offset
!= 7)
2509 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, mod2array
[reg_offset
]);
2513 modearray
[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2515 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
2516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2517 modearray
[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
2520 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2521 modearray
[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2523 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2525 modearray
[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2527 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
2528 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2529 modearray
[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2533 modearray
[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2535 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2537 modearray
[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2541 modearray
[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2542 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2543 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2545 modearray
[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
));
2547 modearray
[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2549 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2551 modearray
[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2553 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2555 modearray
[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2556 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2559 modearray
[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2560 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2563 modearray
[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2567 modearray
[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2570 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2571 modearray
[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2575 modearray
[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2579 modearray
[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2580 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2583 modearray
[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2587 modearray
[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2591 modearray
[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2595 modearray
[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2599 modearray
[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2603 modearray
[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2607 modearray
[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2611 modearray
[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2615 modearray
[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2619 modearray
[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2623 modearray
[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2627 modearray
[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2629 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2631 modearray
[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2632 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2636 mod2array
[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2639 NUM_BANKS(ADDR_SURF_16_BANK
));
2640 mod2array
[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2643 NUM_BANKS(ADDR_SURF_16_BANK
));
2644 mod2array
[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2647 NUM_BANKS(ADDR_SURF_16_BANK
));
2648 mod2array
[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2649 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2650 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2651 NUM_BANKS(ADDR_SURF_16_BANK
));
2652 mod2array
[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2655 NUM_BANKS(ADDR_SURF_16_BANK
));
2656 mod2array
[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2659 NUM_BANKS(ADDR_SURF_16_BANK
));
2660 mod2array
[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2663 NUM_BANKS(ADDR_SURF_16_BANK
));
2664 mod2array
[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2667 NUM_BANKS(ADDR_SURF_16_BANK
));
2668 mod2array
[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2671 NUM_BANKS(ADDR_SURF_16_BANK
));
2672 mod2array
[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2675 NUM_BANKS(ADDR_SURF_16_BANK
));
2676 mod2array
[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2679 NUM_BANKS(ADDR_SURF_16_BANK
));
2680 mod2array
[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2683 NUM_BANKS(ADDR_SURF_8_BANK
));
2684 mod2array
[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2687 NUM_BANKS(ADDR_SURF_4_BANK
));
2688 mod2array
[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2691 NUM_BANKS(ADDR_SURF_4_BANK
));
2693 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
2694 WREG32(mmGB_TILE_MODE0
+ reg_offset
, modearray
[reg_offset
]);
2696 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
2697 if (reg_offset
!= 7)
2698 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, mod2array
[reg_offset
]);
2701 case CHIP_POLARIS11
:
2702 case CHIP_POLARIS12
:
2703 modearray
[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2707 modearray
[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2709 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2711 modearray
[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2712 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2713 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
2714 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2715 modearray
[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2716 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2717 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
2718 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2719 modearray
[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2720 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2721 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2722 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2723 modearray
[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2725 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2726 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2727 modearray
[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2731 modearray
[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2735 modearray
[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16
));
2737 modearray
[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2741 modearray
[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2742 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2743 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2745 modearray
[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2746 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2747 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2749 modearray
[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2750 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2751 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2753 modearray
[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2754 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2755 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2757 modearray
[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2758 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2759 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2761 modearray
[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2763 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2765 modearray
[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2767 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2769 modearray
[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2771 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2773 modearray
[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2775 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2777 modearray
[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2781 modearray
[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2785 modearray
[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2789 modearray
[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2793 modearray
[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2797 modearray
[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2801 modearray
[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
2802 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2805 modearray
[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
2806 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2807 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2809 modearray
[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2811 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2813 modearray
[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2817 modearray
[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2819 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2821 modearray
[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2826 mod2array
[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2829 NUM_BANKS(ADDR_SURF_16_BANK
));
2831 mod2array
[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2834 NUM_BANKS(ADDR_SURF_16_BANK
));
2836 mod2array
[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2839 NUM_BANKS(ADDR_SURF_16_BANK
));
2841 mod2array
[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2844 NUM_BANKS(ADDR_SURF_16_BANK
));
2846 mod2array
[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2849 NUM_BANKS(ADDR_SURF_16_BANK
));
2851 mod2array
[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2854 NUM_BANKS(ADDR_SURF_16_BANK
));
2856 mod2array
[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2859 NUM_BANKS(ADDR_SURF_16_BANK
));
2861 mod2array
[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2864 NUM_BANKS(ADDR_SURF_16_BANK
));
2866 mod2array
[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2867 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2868 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2869 NUM_BANKS(ADDR_SURF_16_BANK
));
2871 mod2array
[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2872 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2873 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2874 NUM_BANKS(ADDR_SURF_16_BANK
));
2876 mod2array
[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2879 NUM_BANKS(ADDR_SURF_16_BANK
));
2881 mod2array
[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2884 NUM_BANKS(ADDR_SURF_16_BANK
));
2886 mod2array
[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2887 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2888 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2889 NUM_BANKS(ADDR_SURF_8_BANK
));
2891 mod2array
[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2894 NUM_BANKS(ADDR_SURF_4_BANK
));
2896 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
2897 WREG32(mmGB_TILE_MODE0
+ reg_offset
, modearray
[reg_offset
]);
2899 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
2900 if (reg_offset
!= 7)
2901 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, mod2array
[reg_offset
]);
2904 case CHIP_POLARIS10
:
2905 modearray
[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2907 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2909 modearray
[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2911 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
2912 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2913 modearray
[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
2916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2917 modearray
[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2919 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
2920 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2921 modearray
[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2923 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2924 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2925 modearray
[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2927 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2929 modearray
[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2933 modearray
[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2934 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2937 modearray
[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
));
2939 modearray
[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2941 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2943 modearray
[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2945 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2947 modearray
[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2948 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2949 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2951 modearray
[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2952 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2953 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2955 modearray
[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2959 modearray
[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2961 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2962 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2963 modearray
[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2965 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2967 modearray
[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2969 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2971 modearray
[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2972 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2973 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
2975 modearray
[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2977 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2979 modearray
[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2983 modearray
[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2987 modearray
[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2991 modearray
[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2995 modearray
[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
2996 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
2999 modearray
[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3003 modearray
[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3007 modearray
[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
3009 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3011 modearray
[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
3013 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3015 modearray
[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
3017 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3019 modearray
[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
3021 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3023 modearray
[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3024 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
3025 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3028 mod2array
[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3031 NUM_BANKS(ADDR_SURF_16_BANK
));
3033 mod2array
[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3036 NUM_BANKS(ADDR_SURF_16_BANK
));
3038 mod2array
[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3041 NUM_BANKS(ADDR_SURF_16_BANK
));
3043 mod2array
[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3046 NUM_BANKS(ADDR_SURF_16_BANK
));
3048 mod2array
[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3051 NUM_BANKS(ADDR_SURF_16_BANK
));
3053 mod2array
[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3054 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3055 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
3056 NUM_BANKS(ADDR_SURF_16_BANK
));
3058 mod2array
[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
3061 NUM_BANKS(ADDR_SURF_16_BANK
));
3063 mod2array
[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
3065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3066 NUM_BANKS(ADDR_SURF_16_BANK
));
3068 mod2array
[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3071 NUM_BANKS(ADDR_SURF_16_BANK
));
3073 mod2array
[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3076 NUM_BANKS(ADDR_SURF_16_BANK
));
3078 mod2array
[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3081 NUM_BANKS(ADDR_SURF_16_BANK
));
3083 mod2array
[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
3086 NUM_BANKS(ADDR_SURF_8_BANK
));
3088 mod2array
[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
3091 NUM_BANKS(ADDR_SURF_4_BANK
));
3093 mod2array
[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
3096 NUM_BANKS(ADDR_SURF_4_BANK
));
3098 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
3099 WREG32(mmGB_TILE_MODE0
+ reg_offset
, modearray
[reg_offset
]);
3101 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
3102 if (reg_offset
!= 7)
3103 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, mod2array
[reg_offset
]);
3107 modearray
[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3108 PIPE_CONFIG(ADDR_SURF_P2
) |
3109 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
3110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3111 modearray
[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3112 PIPE_CONFIG(ADDR_SURF_P2
) |
3113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
3114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3115 modearray
[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3116 PIPE_CONFIG(ADDR_SURF_P2
) |
3117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
3118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3119 modearray
[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3120 PIPE_CONFIG(ADDR_SURF_P2
) |
3121 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
3122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3123 modearray
[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3124 PIPE_CONFIG(ADDR_SURF_P2
) |
3125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
3126 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3127 modearray
[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3128 PIPE_CONFIG(ADDR_SURF_P2
) |
3129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
3130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3131 modearray
[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3132 PIPE_CONFIG(ADDR_SURF_P2
) |
3133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3135 modearray
[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
3136 PIPE_CONFIG(ADDR_SURF_P2
));
3137 modearray
[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3138 PIPE_CONFIG(ADDR_SURF_P2
) |
3139 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3141 modearray
[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3142 PIPE_CONFIG(ADDR_SURF_P2
) |
3143 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
3144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3145 modearray
[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3146 PIPE_CONFIG(ADDR_SURF_P2
) |
3147 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
3148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3149 modearray
[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3150 PIPE_CONFIG(ADDR_SURF_P2
) |
3151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3153 modearray
[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3154 PIPE_CONFIG(ADDR_SURF_P2
) |
3155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3157 modearray
[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
3158 PIPE_CONFIG(ADDR_SURF_P2
) |
3159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3161 modearray
[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3162 PIPE_CONFIG(ADDR_SURF_P2
) |
3163 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3165 modearray
[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
3166 PIPE_CONFIG(ADDR_SURF_P2
) |
3167 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3169 modearray
[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
3170 PIPE_CONFIG(ADDR_SURF_P2
) |
3171 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3173 modearray
[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
3174 PIPE_CONFIG(ADDR_SURF_P2
) |
3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3177 modearray
[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
3178 PIPE_CONFIG(ADDR_SURF_P2
) |
3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3181 modearray
[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
3182 PIPE_CONFIG(ADDR_SURF_P2
) |
3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3185 modearray
[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
3186 PIPE_CONFIG(ADDR_SURF_P2
) |
3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3189 modearray
[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
3190 PIPE_CONFIG(ADDR_SURF_P2
) |
3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3193 modearray
[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
3194 PIPE_CONFIG(ADDR_SURF_P2
) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3197 modearray
[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3198 PIPE_CONFIG(ADDR_SURF_P2
) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3201 modearray
[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3202 PIPE_CONFIG(ADDR_SURF_P2
) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3205 modearray
[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3206 PIPE_CONFIG(ADDR_SURF_P2
) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3210 mod2array
[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3213 NUM_BANKS(ADDR_SURF_8_BANK
));
3214 mod2array
[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3217 NUM_BANKS(ADDR_SURF_8_BANK
));
3218 mod2array
[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3221 NUM_BANKS(ADDR_SURF_8_BANK
));
3222 mod2array
[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3225 NUM_BANKS(ADDR_SURF_8_BANK
));
3226 mod2array
[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3229 NUM_BANKS(ADDR_SURF_8_BANK
));
3230 mod2array
[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3233 NUM_BANKS(ADDR_SURF_8_BANK
));
3234 mod2array
[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3237 NUM_BANKS(ADDR_SURF_8_BANK
));
3238 mod2array
[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3241 NUM_BANKS(ADDR_SURF_16_BANK
));
3242 mod2array
[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3245 NUM_BANKS(ADDR_SURF_16_BANK
));
3246 mod2array
[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3249 NUM_BANKS(ADDR_SURF_16_BANK
));
3250 mod2array
[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3253 NUM_BANKS(ADDR_SURF_16_BANK
));
3254 mod2array
[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3257 NUM_BANKS(ADDR_SURF_16_BANK
));
3258 mod2array
[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3261 NUM_BANKS(ADDR_SURF_16_BANK
));
3262 mod2array
[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3265 NUM_BANKS(ADDR_SURF_8_BANK
));
3267 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
3268 if (reg_offset
!= 7 && reg_offset
!= 12 && reg_offset
!= 17 &&
3270 WREG32(mmGB_TILE_MODE0
+ reg_offset
, modearray
[reg_offset
]);
3272 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
3273 if (reg_offset
!= 7)
3274 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, mod2array
[reg_offset
]);
3279 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3283 modearray
[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3284 PIPE_CONFIG(ADDR_SURF_P2
) |
3285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3287 modearray
[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3288 PIPE_CONFIG(ADDR_SURF_P2
) |
3289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3291 modearray
[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3292 PIPE_CONFIG(ADDR_SURF_P2
) |
3293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3295 modearray
[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3296 PIPE_CONFIG(ADDR_SURF_P2
) |
3297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3299 modearray
[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3300 PIPE_CONFIG(ADDR_SURF_P2
) |
3301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3303 modearray
[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3304 PIPE_CONFIG(ADDR_SURF_P2
) |
3305 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3307 modearray
[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3308 PIPE_CONFIG(ADDR_SURF_P2
) |
3309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB
) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
3311 modearray
[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
3312 PIPE_CONFIG(ADDR_SURF_P2
));
3313 modearray
[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3314 PIPE_CONFIG(ADDR_SURF_P2
) |
3315 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
3316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3317 modearray
[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3318 PIPE_CONFIG(ADDR_SURF_P2
) |
3319 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
3320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3321 modearray
[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3322 PIPE_CONFIG(ADDR_SURF_P2
) |
3323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
3324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3325 modearray
[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3326 PIPE_CONFIG(ADDR_SURF_P2
) |
3327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3329 modearray
[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3330 PIPE_CONFIG(ADDR_SURF_P2
) |
3331 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3333 modearray
[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1
) |
3334 PIPE_CONFIG(ADDR_SURF_P2
) |
3335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3337 modearray
[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3338 PIPE_CONFIG(ADDR_SURF_P2
) |
3339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3341 modearray
[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
3342 PIPE_CONFIG(ADDR_SURF_P2
) |
3343 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3345 modearray
[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK
) |
3346 PIPE_CONFIG(ADDR_SURF_P2
) |
3347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3349 modearray
[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
3350 PIPE_CONFIG(ADDR_SURF_P2
) |
3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3353 modearray
[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK
) |
3354 PIPE_CONFIG(ADDR_SURF_P2
) |
3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3357 modearray
[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK
) |
3358 PIPE_CONFIG(ADDR_SURF_P2
) |
3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3361 modearray
[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK
) |
3362 PIPE_CONFIG(ADDR_SURF_P2
) |
3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3365 modearray
[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK
) |
3366 PIPE_CONFIG(ADDR_SURF_P2
) |
3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3369 modearray
[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK
) |
3370 PIPE_CONFIG(ADDR_SURF_P2
) |
3371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING
) |
3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1
));
3373 modearray
[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
3374 PIPE_CONFIG(ADDR_SURF_P2
) |
3375 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3377 modearray
[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
3378 PIPE_CONFIG(ADDR_SURF_P2
) |
3379 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
3381 modearray
[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
3382 PIPE_CONFIG(ADDR_SURF_P2
) |
3383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
3384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8
));
3386 mod2array
[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3389 NUM_BANKS(ADDR_SURF_8_BANK
));
3390 mod2array
[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3393 NUM_BANKS(ADDR_SURF_8_BANK
));
3394 mod2array
[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3397 NUM_BANKS(ADDR_SURF_8_BANK
));
3398 mod2array
[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3401 NUM_BANKS(ADDR_SURF_8_BANK
));
3402 mod2array
[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3405 NUM_BANKS(ADDR_SURF_8_BANK
));
3406 mod2array
[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3409 NUM_BANKS(ADDR_SURF_8_BANK
));
3410 mod2array
[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3413 NUM_BANKS(ADDR_SURF_8_BANK
));
3414 mod2array
[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3417 NUM_BANKS(ADDR_SURF_16_BANK
));
3418 mod2array
[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3421 NUM_BANKS(ADDR_SURF_16_BANK
));
3422 mod2array
[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3425 NUM_BANKS(ADDR_SURF_16_BANK
));
3426 mod2array
[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3429 NUM_BANKS(ADDR_SURF_16_BANK
));
3430 mod2array
[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3433 NUM_BANKS(ADDR_SURF_16_BANK
));
3434 mod2array
[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
3437 NUM_BANKS(ADDR_SURF_16_BANK
));
3438 mod2array
[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
3439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
3440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
3441 NUM_BANKS(ADDR_SURF_8_BANK
));
3443 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++)
3444 if (reg_offset
!= 7 && reg_offset
!= 12 && reg_offset
!= 17 &&
3446 WREG32(mmGB_TILE_MODE0
+ reg_offset
, modearray
[reg_offset
]);
3448 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++)
3449 if (reg_offset
!= 7)
3450 WREG32(mmGB_MACROTILE_MODE0
+ reg_offset
, mod2array
[reg_offset
]);
3456 static void gfx_v8_0_select_se_sh(struct amdgpu_device
*adev
,
3457 u32 se_num
, u32 sh_num
, u32 instance
)
3461 if (instance
== 0xffffffff)
3462 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
, 1);
3464 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_INDEX
, instance
);
3466 if (se_num
== 0xffffffff)
3467 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
3469 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
3471 if (sh_num
== 0xffffffff)
3472 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
3474 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
3476 WREG32(mmGRBM_GFX_INDEX
, data
);
3479 static u32
gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device
*adev
)
3483 data
= RREG32(mmCC_RB_BACKEND_DISABLE
) |
3484 RREG32(mmGC_USER_RB_BACKEND_DISABLE
);
3486 data
= REG_GET_FIELD(data
, GC_USER_RB_BACKEND_DISABLE
, BACKEND_DISABLE
);
3488 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_backends_per_se
/
3489 adev
->gfx
.config
.max_sh_per_se
);
3491 return (~data
) & mask
;
3495 gfx_v8_0_raster_config(struct amdgpu_device
*adev
, u32
*rconf
, u32
*rconf1
)
3497 switch (adev
->asic_type
) {
3499 *rconf
|= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500 RB_XSEL2(1) | PKR_MAP(2) |
3501 PKR_XSEL(1) | PKR_YSEL(1) |
3502 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503 *rconf1
|= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3507 case CHIP_POLARIS10
:
3508 *rconf
|= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509 SE_XSEL(1) | SE_YSEL(1);
3510 *rconf1
|= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3515 *rconf
|= RB_MAP_PKR0(2);
3518 case CHIP_POLARIS11
:
3519 case CHIP_POLARIS12
:
3520 *rconf
|= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521 SE_XSEL(1) | SE_YSEL(1);
3529 DRM_ERROR("unknown asic: 0x%x\n", adev
->asic_type
);
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device
*adev
,
3536 u32 raster_config
, u32 raster_config_1
,
3537 unsigned rb_mask
, unsigned num_rb
)
3539 unsigned sh_per_se
= max_t(unsigned, adev
->gfx
.config
.max_sh_per_se
, 1);
3540 unsigned num_se
= max_t(unsigned, adev
->gfx
.config
.max_shader_engines
, 1);
3541 unsigned rb_per_pkr
= min_t(unsigned, num_rb
/ num_se
/ sh_per_se
, 2);
3542 unsigned rb_per_se
= num_rb
/ num_se
;
3543 unsigned se_mask
[4];
3546 se_mask
[0] = ((1 << rb_per_se
) - 1) & rb_mask
;
3547 se_mask
[1] = (se_mask
[0] << rb_per_se
) & rb_mask
;
3548 se_mask
[2] = (se_mask
[1] << rb_per_se
) & rb_mask
;
3549 se_mask
[3] = (se_mask
[2] << rb_per_se
) & rb_mask
;
3551 WARN_ON(!(num_se
== 1 || num_se
== 2 || num_se
== 4));
3552 WARN_ON(!(sh_per_se
== 1 || sh_per_se
== 2));
3553 WARN_ON(!(rb_per_pkr
== 1 || rb_per_pkr
== 2));
3555 if ((num_se
> 2) && ((!se_mask
[0] && !se_mask
[1]) ||
3556 (!se_mask
[2] && !se_mask
[3]))) {
3557 raster_config_1
&= ~SE_PAIR_MAP_MASK
;
3559 if (!se_mask
[0] && !se_mask
[1]) {
3561 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3
);
3564 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0
);
3568 for (se
= 0; se
< num_se
; se
++) {
3569 unsigned raster_config_se
= raster_config
;
3570 unsigned pkr0_mask
= ((1 << rb_per_pkr
) - 1) << (se
* rb_per_se
);
3571 unsigned pkr1_mask
= pkr0_mask
<< rb_per_pkr
;
3572 int idx
= (se
/ 2) * 2;
3574 if ((num_se
> 1) && (!se_mask
[idx
] || !se_mask
[idx
+ 1])) {
3575 raster_config_se
&= ~SE_MAP_MASK
;
3577 if (!se_mask
[idx
]) {
3578 raster_config_se
|= SE_MAP(RASTER_CONFIG_SE_MAP_3
);
3580 raster_config_se
|= SE_MAP(RASTER_CONFIG_SE_MAP_0
);
3584 pkr0_mask
&= rb_mask
;
3585 pkr1_mask
&= rb_mask
;
3586 if (rb_per_se
> 2 && (!pkr0_mask
|| !pkr1_mask
)) {
3587 raster_config_se
&= ~PKR_MAP_MASK
;
3590 raster_config_se
|= PKR_MAP(RASTER_CONFIG_PKR_MAP_3
);
3592 raster_config_se
|= PKR_MAP(RASTER_CONFIG_PKR_MAP_0
);
3596 if (rb_per_se
>= 2) {
3597 unsigned rb0_mask
= 1 << (se
* rb_per_se
);
3598 unsigned rb1_mask
= rb0_mask
<< 1;
3600 rb0_mask
&= rb_mask
;
3601 rb1_mask
&= rb_mask
;
3602 if (!rb0_mask
|| !rb1_mask
) {
3603 raster_config_se
&= ~RB_MAP_PKR0_MASK
;
3607 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3
);
3610 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0
);
3614 if (rb_per_se
> 2) {
3615 rb0_mask
= 1 << (se
* rb_per_se
+ rb_per_pkr
);
3616 rb1_mask
= rb0_mask
<< 1;
3617 rb0_mask
&= rb_mask
;
3618 rb1_mask
&= rb_mask
;
3619 if (!rb0_mask
|| !rb1_mask
) {
3620 raster_config_se
&= ~RB_MAP_PKR1_MASK
;
3624 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3
);
3627 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0
);
3633 /* GRBM_GFX_INDEX has a different offset on VI */
3634 gfx_v8_0_select_se_sh(adev
, se
, 0xffffffff, 0xffffffff);
3635 WREG32(mmPA_SC_RASTER_CONFIG
, raster_config_se
);
3636 WREG32(mmPA_SC_RASTER_CONFIG_1
, raster_config_1
);
3639 /* GRBM_GFX_INDEX has a different offset on VI */
3640 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device
*adev
)
3647 u32 raster_config
= 0, raster_config_1
= 0;
3649 u32 rb_bitmap_width_per_sh
= adev
->gfx
.config
.max_backends_per_se
/
3650 adev
->gfx
.config
.max_sh_per_se
;
3651 unsigned num_rb_pipes
;
3653 mutex_lock(&adev
->grbm_idx_mutex
);
3654 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
3655 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
3656 gfx_v8_0_select_se_sh(adev
, i
, j
, 0xffffffff);
3657 data
= gfx_v8_0_get_rb_active_bitmap(adev
);
3658 active_rbs
|= data
<< ((i
* adev
->gfx
.config
.max_sh_per_se
+ j
) *
3659 rb_bitmap_width_per_sh
);
3662 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
3664 adev
->gfx
.config
.backend_enable_mask
= active_rbs
;
3665 adev
->gfx
.config
.num_rbs
= hweight32(active_rbs
);
3667 num_rb_pipes
= min_t(unsigned, adev
->gfx
.config
.max_backends_per_se
*
3668 adev
->gfx
.config
.max_shader_engines
, 16);
3670 gfx_v8_0_raster_config(adev
, &raster_config
, &raster_config_1
);
3672 if (!adev
->gfx
.config
.backend_enable_mask
||
3673 adev
->gfx
.config
.num_rbs
>= num_rb_pipes
) {
3674 WREG32(mmPA_SC_RASTER_CONFIG
, raster_config
);
3675 WREG32(mmPA_SC_RASTER_CONFIG_1
, raster_config_1
);
3677 gfx_v8_0_write_harvested_raster_configs(adev
, raster_config
, raster_config_1
,
3678 adev
->gfx
.config
.backend_enable_mask
,
3682 /* cache the values for userspace */
3683 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
3684 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
3685 gfx_v8_0_select_se_sh(adev
, i
, j
, 0xffffffff);
3686 adev
->gfx
.config
.rb_config
[i
][j
].rb_backend_disable
=
3687 RREG32(mmCC_RB_BACKEND_DISABLE
);
3688 adev
->gfx
.config
.rb_config
[i
][j
].user_rb_backend_disable
=
3689 RREG32(mmGC_USER_RB_BACKEND_DISABLE
);
3690 adev
->gfx
.config
.rb_config
[i
][j
].raster_config
=
3691 RREG32(mmPA_SC_RASTER_CONFIG
);
3692 adev
->gfx
.config
.rb_config
[i
][j
].raster_config_1
=
3693 RREG32(mmPA_SC_RASTER_CONFIG_1
);
3696 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
3697 mutex_unlock(&adev
->grbm_idx_mutex
);
3701 * gfx_v8_0_init_compute_vmid - gart enable
3703 * @adev: amdgpu_device pointer
3705 * Initialize compute vmid sh_mem registers
3708 #define DEFAULT_SH_MEM_BASES (0x6000)
3709 #define FIRST_COMPUTE_VMID (8)
3710 #define LAST_COMPUTE_VMID (16)
3711 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device
*adev
)
3714 uint32_t sh_mem_config
;
3715 uint32_t sh_mem_bases
;
3718 * Configure apertures:
3719 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3723 sh_mem_bases
= DEFAULT_SH_MEM_BASES
| (DEFAULT_SH_MEM_BASES
<< 16);
3725 sh_mem_config
= SH_MEM_ADDRESS_MODE_HSA64
<<
3726 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT
|
3727 SH_MEM_ALIGNMENT_MODE_UNALIGNED
<<
3728 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT
|
3729 MTYPE_CC
<< SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT
|
3730 SH_MEM_CONFIG__PRIVATE_ATC_MASK
;
3732 mutex_lock(&adev
->srbm_mutex
);
3733 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
3734 vi_srbm_select(adev
, 0, 0, 0, i
);
3735 /* CP and shaders */
3736 WREG32(mmSH_MEM_CONFIG
, sh_mem_config
);
3737 WREG32(mmSH_MEM_APE1_BASE
, 1);
3738 WREG32(mmSH_MEM_APE1_LIMIT
, 0);
3739 WREG32(mmSH_MEM_BASES
, sh_mem_bases
);
3741 vi_srbm_select(adev
, 0, 0, 0, 0);
3742 mutex_unlock(&adev
->srbm_mutex
);
3745 static void gfx_v8_0_config_init(struct amdgpu_device
*adev
)
3747 switch (adev
->asic_type
) {
3749 adev
->gfx
.config
.double_offchip_lds_buf
= 1;
3753 adev
->gfx
.config
.double_offchip_lds_buf
= 0;
3758 static void gfx_v8_0_gpu_init(struct amdgpu_device
*adev
)
3760 u32 tmp
, sh_static_mem_cfg
;
3763 WREG32_FIELD(GRBM_CNTL
, READ_TIMEOUT
, 0xFF);
3764 WREG32(mmGB_ADDR_CONFIG
, adev
->gfx
.config
.gb_addr_config
);
3765 WREG32(mmHDP_ADDR_CONFIG
, adev
->gfx
.config
.gb_addr_config
);
3766 WREG32(mmDMIF_ADDR_CALC
, adev
->gfx
.config
.gb_addr_config
);
3768 gfx_v8_0_tiling_mode_table_init(adev
);
3769 gfx_v8_0_setup_rb(adev
);
3770 gfx_v8_0_get_cu_info(adev
);
3771 gfx_v8_0_config_init(adev
);
3773 /* XXX SH_MEM regs */
3774 /* where to put LDS, scratch, GPUVM in FSA64 space */
3775 sh_static_mem_cfg
= REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG
,
3777 sh_static_mem_cfg
= REG_SET_FIELD(sh_static_mem_cfg
, SH_STATIC_MEM_CONFIG
,
3779 sh_static_mem_cfg
= REG_SET_FIELD(sh_static_mem_cfg
, SH_STATIC_MEM_CONFIG
,
3781 WREG32(mmSH_STATIC_MEM_CONFIG
, sh_static_mem_cfg
);
3783 mutex_lock(&adev
->srbm_mutex
);
3784 for (i
= 0; i
< adev
->vm_manager
.id_mgr
[0].num_ids
; i
++) {
3785 vi_srbm_select(adev
, 0, 0, 0, i
);
3786 /* CP and shaders */
3788 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, DEFAULT_MTYPE
, MTYPE_UC
);
3789 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, APE1_MTYPE
, MTYPE_UC
);
3790 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
3791 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
3792 WREG32(mmSH_MEM_CONFIG
, tmp
);
3793 WREG32(mmSH_MEM_BASES
, 0);
3795 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, DEFAULT_MTYPE
, MTYPE_NC
);
3796 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, APE1_MTYPE
, MTYPE_UC
);
3797 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
3798 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
3799 WREG32(mmSH_MEM_CONFIG
, tmp
);
3800 tmp
= adev
->mc
.shared_aperture_start
>> 48;
3801 WREG32(mmSH_MEM_BASES
, tmp
);
3804 WREG32(mmSH_MEM_APE1_BASE
, 1);
3805 WREG32(mmSH_MEM_APE1_LIMIT
, 0);
3807 vi_srbm_select(adev
, 0, 0, 0, 0);
3808 mutex_unlock(&adev
->srbm_mutex
);
3810 gfx_v8_0_init_compute_vmid(adev
);
3812 mutex_lock(&adev
->grbm_idx_mutex
);
3814 * making sure that the following register writes will be broadcasted
3815 * to all the shaders
3817 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
3819 WREG32(mmPA_SC_FIFO_SIZE
,
3820 (adev
->gfx
.config
.sc_prim_fifo_size_frontend
<<
3821 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT
) |
3822 (adev
->gfx
.config
.sc_prim_fifo_size_backend
<<
3823 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT
) |
3824 (adev
->gfx
.config
.sc_hiz_tile_fifo_size
<<
3825 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT
) |
3826 (adev
->gfx
.config
.sc_earlyz_tile_fifo_size
<<
3827 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT
));
3829 tmp
= RREG32(mmSPI_ARB_PRIORITY
);
3830 tmp
= REG_SET_FIELD(tmp
, SPI_ARB_PRIORITY
, PIPE_ORDER_TS0
, 2);
3831 tmp
= REG_SET_FIELD(tmp
, SPI_ARB_PRIORITY
, PIPE_ORDER_TS1
, 2);
3832 tmp
= REG_SET_FIELD(tmp
, SPI_ARB_PRIORITY
, PIPE_ORDER_TS2
, 2);
3833 tmp
= REG_SET_FIELD(tmp
, SPI_ARB_PRIORITY
, PIPE_ORDER_TS3
, 2);
3834 WREG32(mmSPI_ARB_PRIORITY
, tmp
);
3836 mutex_unlock(&adev
->grbm_idx_mutex
);
3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device
*adev
)
3845 mutex_lock(&adev
->grbm_idx_mutex
);
3846 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
3847 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
3848 gfx_v8_0_select_se_sh(adev
, i
, j
, 0xffffffff);
3849 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
3850 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY
) == 0)
3854 if (k
== adev
->usec_timeout
) {
3855 gfx_v8_0_select_se_sh(adev
, 0xffffffff,
3856 0xffffffff, 0xffffffff);
3857 mutex_unlock(&adev
->grbm_idx_mutex
);
3858 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3864 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
3865 mutex_unlock(&adev
->grbm_idx_mutex
);
3867 mask
= RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK
|
3868 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK
|
3869 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK
|
3870 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK
;
3871 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
3872 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
3878 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
3881 u32 tmp
= RREG32(mmCP_INT_CNTL_RING0
);
3883 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
, enable
? 1 : 0);
3884 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
, enable
? 1 : 0);
3885 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
, enable
? 1 : 0);
3886 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
, enable
? 1 : 0);
3888 WREG32(mmCP_INT_CNTL_RING0
, tmp
);
3891 static void gfx_v8_0_init_csb(struct amdgpu_device
*adev
)
3894 WREG32(mmRLC_CSIB_ADDR_HI
,
3895 adev
->gfx
.rlc
.clear_state_gpu_addr
>> 32);
3896 WREG32(mmRLC_CSIB_ADDR_LO
,
3897 adev
->gfx
.rlc
.clear_state_gpu_addr
& 0xfffffffc);
3898 WREG32(mmRLC_CSIB_LENGTH
,
3899 adev
->gfx
.rlc
.clear_state_size
);
3902 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format
,
3905 int *unique_indices
,
3908 int *ind_start_offsets
,
3913 bool new_entry
= true;
3915 for (; ind_offset
< list_size
; ind_offset
++) {
3919 ind_start_offsets
[*offset_count
] = ind_offset
;
3920 *offset_count
= *offset_count
+ 1;
3921 BUG_ON(*offset_count
>= max_offset
);
3924 if (register_list_format
[ind_offset
] == 0xFFFFFFFF) {
3931 /* look for the matching indice */
3933 indices
< *indices_count
;
3935 if (unique_indices
[indices
] ==
3936 register_list_format
[ind_offset
])
3940 if (indices
>= *indices_count
) {
3941 unique_indices
[*indices_count
] =
3942 register_list_format
[ind_offset
];
3943 indices
= *indices_count
;
3944 *indices_count
= *indices_count
+ 1;
3945 BUG_ON(*indices_count
>= max_indices
);
3948 register_list_format
[ind_offset
] = indices
;
3952 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device
*adev
)
3955 int unique_indices
[] = {0, 0, 0, 0, 0, 0, 0, 0};
3956 int indices_count
= 0;
3957 int indirect_start_offsets
[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3958 int offset_count
= 0;
3961 unsigned int *register_list_format
=
3962 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
, GFP_KERNEL
);
3963 if (!register_list_format
)
3965 memcpy(register_list_format
, adev
->gfx
.rlc
.register_list_format
,
3966 adev
->gfx
.rlc
.reg_list_format_size_bytes
);
3968 gfx_v8_0_parse_ind_reg_list(register_list_format
,
3969 RLC_FormatDirectRegListLength
,
3970 adev
->gfx
.rlc
.reg_list_format_size_bytes
>> 2,
3973 ARRAY_SIZE(unique_indices
),
3974 indirect_start_offsets
,
3976 ARRAY_SIZE(indirect_start_offsets
));
3978 /* save and restore list */
3979 WREG32_FIELD(RLC_SRM_CNTL
, AUTO_INCR_ADDR
, 1);
3981 WREG32(mmRLC_SRM_ARAM_ADDR
, 0);
3982 for (i
= 0; i
< adev
->gfx
.rlc
.reg_list_size_bytes
>> 2; i
++)
3983 WREG32(mmRLC_SRM_ARAM_DATA
, adev
->gfx
.rlc
.register_restore
[i
]);
3986 WREG32(mmRLC_GPM_SCRATCH_ADDR
, adev
->gfx
.rlc
.reg_list_format_start
);
3987 for (i
= 0; i
< adev
->gfx
.rlc
.reg_list_format_size_bytes
>> 2; i
++)
3988 WREG32(mmRLC_GPM_SCRATCH_DATA
, register_list_format
[i
]);
3990 list_size
= adev
->gfx
.rlc
.reg_list_size_bytes
>> 2;
3991 list_size
= list_size
>> 1;
3992 WREG32(mmRLC_GPM_SCRATCH_ADDR
, adev
->gfx
.rlc
.reg_restore_list_size
);
3993 WREG32(mmRLC_GPM_SCRATCH_DATA
, list_size
);
3995 /* starting offsets starts */
3996 WREG32(mmRLC_GPM_SCRATCH_ADDR
,
3997 adev
->gfx
.rlc
.starting_offsets_start
);
3998 for (i
= 0; i
< ARRAY_SIZE(indirect_start_offsets
); i
++)
3999 WREG32(mmRLC_GPM_SCRATCH_DATA
,
4000 indirect_start_offsets
[i
]);
4002 /* unique indices */
4003 temp
= mmRLC_SRM_INDEX_CNTL_ADDR_0
;
4004 data
= mmRLC_SRM_INDEX_CNTL_DATA_0
;
4005 for (i
= 0; i
< ARRAY_SIZE(unique_indices
); i
++) {
4006 if (unique_indices
[i
] != 0) {
4007 WREG32(temp
+ i
, unique_indices
[i
] & 0x3FFFF);
4008 WREG32(data
+ i
, unique_indices
[i
] >> 20);
4011 kfree(register_list_format
);
4016 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device
*adev
)
4018 WREG32_FIELD(RLC_SRM_CNTL
, SRM_ENABLE
, 1);
4021 static void gfx_v8_0_init_power_gating(struct amdgpu_device
*adev
)
4025 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL
, IDLE_POLL_COUNT
, 0x60);
4027 data
= REG_SET_FIELD(0, RLC_PG_DELAY
, POWER_UP_DELAY
, 0x10);
4028 data
= REG_SET_FIELD(data
, RLC_PG_DELAY
, POWER_DOWN_DELAY
, 0x10);
4029 data
= REG_SET_FIELD(data
, RLC_PG_DELAY
, CMD_PROPAGATE_DELAY
, 0x10);
4030 data
= REG_SET_FIELD(data
, RLC_PG_DELAY
, MEM_SLEEP_DELAY
, 0x10);
4031 WREG32(mmRLC_PG_DELAY
, data
);
4033 WREG32_FIELD(RLC_PG_DELAY_2
, SERDES_CMD_DELAY
, 0x3);
4034 WREG32_FIELD(RLC_AUTO_PG_CTRL
, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD
, 0x55f0);
4038 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device
*adev
,
4041 WREG32_FIELD(RLC_PG_CNTL
, SMU_CLK_SLOWDOWN_ON_PU_ENABLE
, enable
? 1 : 0);
4044 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device
*adev
,
4047 WREG32_FIELD(RLC_PG_CNTL
, SMU_CLK_SLOWDOWN_ON_PD_ENABLE
, enable
? 1 : 0);
4050 static void cz_enable_cp_power_gating(struct amdgpu_device
*adev
, bool enable
)
4052 WREG32_FIELD(RLC_PG_CNTL
, CP_PG_DISABLE
, enable
? 0 : 1);
4055 static void gfx_v8_0_init_pg(struct amdgpu_device
*adev
)
4057 if ((adev
->asic_type
== CHIP_CARRIZO
) ||
4058 (adev
->asic_type
== CHIP_STONEY
)) {
4059 gfx_v8_0_init_csb(adev
);
4060 gfx_v8_0_init_save_restore_list(adev
);
4061 gfx_v8_0_enable_save_restore_machine(adev
);
4062 WREG32(mmRLC_JUMP_TABLE_RESTORE
, adev
->gfx
.rlc
.cp_table_gpu_addr
>> 8);
4063 gfx_v8_0_init_power_gating(adev
);
4064 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK
, adev
->gfx
.cu_info
.ao_cu_mask
);
4065 } else if ((adev
->asic_type
== CHIP_POLARIS11
) ||
4066 (adev
->asic_type
== CHIP_POLARIS12
)) {
4067 gfx_v8_0_init_csb(adev
);
4068 gfx_v8_0_init_save_restore_list(adev
);
4069 gfx_v8_0_enable_save_restore_machine(adev
);
4070 gfx_v8_0_init_power_gating(adev
);
4075 static void gfx_v8_0_rlc_stop(struct amdgpu_device
*adev
)
4077 WREG32_FIELD(RLC_CNTL
, RLC_ENABLE_F32
, 0);
4079 gfx_v8_0_enable_gui_idle_interrupt(adev
, false);
4080 gfx_v8_0_wait_for_rlc_serdes(adev
);
4083 static void gfx_v8_0_rlc_reset(struct amdgpu_device
*adev
)
4085 WREG32_FIELD(GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
4088 WREG32_FIELD(GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
4092 static void gfx_v8_0_rlc_start(struct amdgpu_device
*adev
)
4094 WREG32_FIELD(RLC_CNTL
, RLC_ENABLE_F32
, 1);
4096 /* carrizo do enable cp interrupt after cp inited */
4097 if (!(adev
->flags
& AMD_IS_APU
))
4098 gfx_v8_0_enable_gui_idle_interrupt(adev
, true);
4103 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device
*adev
)
4105 const struct rlc_firmware_header_v2_0
*hdr
;
4106 const __le32
*fw_data
;
4107 unsigned i
, fw_size
;
4109 if (!adev
->gfx
.rlc_fw
)
4112 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
4113 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
4115 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
4116 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
4117 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
4119 WREG32(mmRLC_GPM_UCODE_ADDR
, 0);
4120 for (i
= 0; i
< fw_size
; i
++)
4121 WREG32(mmRLC_GPM_UCODE_DATA
, le32_to_cpup(fw_data
++));
4122 WREG32(mmRLC_GPM_UCODE_ADDR
, adev
->gfx
.rlc_fw_version
);
4127 static int gfx_v8_0_rlc_resume(struct amdgpu_device
*adev
)
4132 gfx_v8_0_rlc_stop(adev
);
4135 tmp
= RREG32(mmRLC_CGCG_CGLS_CTRL
);
4136 tmp
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
|
4137 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
4138 WREG32(mmRLC_CGCG_CGLS_CTRL
, tmp
);
4139 if (adev
->asic_type
== CHIP_POLARIS11
||
4140 adev
->asic_type
== CHIP_POLARIS10
||
4141 adev
->asic_type
== CHIP_POLARIS12
) {
4142 tmp
= RREG32(mmRLC_CGCG_CGLS_CTRL_3D
);
4144 WREG32(mmRLC_CGCG_CGLS_CTRL_3D
, tmp
);
4148 WREG32(mmRLC_PG_CNTL
, 0);
4150 gfx_v8_0_rlc_reset(adev
);
4151 gfx_v8_0_init_pg(adev
);
4154 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
4155 /* legacy rlc firmware loading */
4156 r
= gfx_v8_0_rlc_load_microcode(adev
);
4161 gfx_v8_0_rlc_start(adev
);
4166 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
4169 u32 tmp
= RREG32(mmCP_ME_CNTL
);
4172 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, 0);
4173 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, 0);
4174 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, 0);
4176 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, 1);
4177 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, 1);
4178 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, 1);
4179 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
4180 adev
->gfx
.gfx_ring
[i
].ready
= false;
4182 WREG32(mmCP_ME_CNTL
, tmp
);
4186 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
4188 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
4189 const struct gfx_firmware_header_v1_0
*ce_hdr
;
4190 const struct gfx_firmware_header_v1_0
*me_hdr
;
4191 const __le32
*fw_data
;
4192 unsigned i
, fw_size
;
4194 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
4197 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
4198 adev
->gfx
.pfp_fw
->data
;
4199 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
4200 adev
->gfx
.ce_fw
->data
;
4201 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
4202 adev
->gfx
.me_fw
->data
;
4204 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
4205 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
4206 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
4208 gfx_v8_0_cp_gfx_enable(adev
, false);
4211 fw_data
= (const __le32
*)
4212 (adev
->gfx
.pfp_fw
->data
+
4213 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
4214 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
) / 4;
4215 WREG32(mmCP_PFP_UCODE_ADDR
, 0);
4216 for (i
= 0; i
< fw_size
; i
++)
4217 WREG32(mmCP_PFP_UCODE_DATA
, le32_to_cpup(fw_data
++));
4218 WREG32(mmCP_PFP_UCODE_ADDR
, adev
->gfx
.pfp_fw_version
);
4221 fw_data
= (const __le32
*)
4222 (adev
->gfx
.ce_fw
->data
+
4223 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
4224 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
) / 4;
4225 WREG32(mmCP_CE_UCODE_ADDR
, 0);
4226 for (i
= 0; i
< fw_size
; i
++)
4227 WREG32(mmCP_CE_UCODE_DATA
, le32_to_cpup(fw_data
++));
4228 WREG32(mmCP_CE_UCODE_ADDR
, adev
->gfx
.ce_fw_version
);
4231 fw_data
= (const __le32
*)
4232 (adev
->gfx
.me_fw
->data
+
4233 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
4234 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
) / 4;
4235 WREG32(mmCP_ME_RAM_WADDR
, 0);
4236 for (i
= 0; i
< fw_size
; i
++)
4237 WREG32(mmCP_ME_RAM_DATA
, le32_to_cpup(fw_data
++));
4238 WREG32(mmCP_ME_RAM_WADDR
, adev
->gfx
.me_fw_version
);
4243 static u32
gfx_v8_0_get_csb_size(struct amdgpu_device
*adev
)
4246 const struct cs_section_def
*sect
= NULL
;
4247 const struct cs_extent_def
*ext
= NULL
;
4249 /* begin clear state */
4251 /* context control state */
4254 for (sect
= vi_cs_data
; sect
->section
!= NULL
; ++sect
) {
4255 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
4256 if (sect
->id
== SECT_CONTEXT
)
4257 count
+= 2 + ext
->reg_count
;
4262 /* pa_sc_raster_config/pa_sc_raster_config1 */
4264 /* end clear state */
4272 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device
*adev
)
4274 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
4275 const struct cs_section_def
*sect
= NULL
;
4276 const struct cs_extent_def
*ext
= NULL
;
4280 WREG32(mmCP_MAX_CONTEXT
, adev
->gfx
.config
.max_hw_contexts
- 1);
4281 WREG32(mmCP_ENDIAN_SWAP
, 0);
4282 WREG32(mmCP_DEVICE_ID
, 1);
4284 gfx_v8_0_cp_gfx_enable(adev
, true);
4286 r
= amdgpu_ring_alloc(ring
, gfx_v8_0_get_csb_size(adev
) + 4);
4288 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
4292 /* clear state buffer */
4293 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
4294 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
4296 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
4297 amdgpu_ring_write(ring
, 0x80000000);
4298 amdgpu_ring_write(ring
, 0x80000000);
4300 for (sect
= vi_cs_data
; sect
->section
!= NULL
; ++sect
) {
4301 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
4302 if (sect
->id
== SECT_CONTEXT
) {
4303 amdgpu_ring_write(ring
,
4304 PACKET3(PACKET3_SET_CONTEXT_REG
,
4306 amdgpu_ring_write(ring
,
4307 ext
->reg_index
- PACKET3_SET_CONTEXT_REG_START
);
4308 for (i
= 0; i
< ext
->reg_count
; i
++)
4309 amdgpu_ring_write(ring
, ext
->extent
[i
]);
4314 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
4315 amdgpu_ring_write(ring
, mmPA_SC_RASTER_CONFIG
- PACKET3_SET_CONTEXT_REG_START
);
4316 switch (adev
->asic_type
) {
4318 case CHIP_POLARIS10
:
4319 amdgpu_ring_write(ring
, 0x16000012);
4320 amdgpu_ring_write(ring
, 0x0000002A);
4322 case CHIP_POLARIS11
:
4323 case CHIP_POLARIS12
:
4324 amdgpu_ring_write(ring
, 0x16000012);
4325 amdgpu_ring_write(ring
, 0x00000000);
4328 amdgpu_ring_write(ring
, 0x3a00161a);
4329 amdgpu_ring_write(ring
, 0x0000002e);
4332 amdgpu_ring_write(ring
, 0x00000002);
4333 amdgpu_ring_write(ring
, 0x00000000);
4336 amdgpu_ring_write(ring
, adev
->gfx
.config
.num_rbs
== 1 ?
4337 0x00000000 : 0x00000002);
4338 amdgpu_ring_write(ring
, 0x00000000);
4341 amdgpu_ring_write(ring
, 0x00000000);
4342 amdgpu_ring_write(ring
, 0x00000000);
4348 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
4349 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
4351 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
4352 amdgpu_ring_write(ring
, 0);
4354 /* init the CE partitions */
4355 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
4356 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
4357 amdgpu_ring_write(ring
, 0x8000);
4358 amdgpu_ring_write(ring
, 0x8000);
4360 amdgpu_ring_commit(ring
);
4364 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device
*adev
, struct amdgpu_ring
*ring
)
4367 /* no gfx doorbells on iceland */
4368 if (adev
->asic_type
== CHIP_TOPAZ
)
4371 tmp
= RREG32(mmCP_RB_DOORBELL_CONTROL
);
4373 if (ring
->use_doorbell
) {
4374 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
4375 DOORBELL_OFFSET
, ring
->doorbell_index
);
4376 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
4378 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
4381 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
4384 WREG32(mmCP_RB_DOORBELL_CONTROL
, tmp
);
4386 if (adev
->flags
& AMD_IS_APU
)
4389 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
4390 DOORBELL_RANGE_LOWER
,
4391 AMDGPU_DOORBELL_GFX_RING0
);
4392 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER
, tmp
);
4394 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER
,
4395 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
4398 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device
*adev
)
4400 struct amdgpu_ring
*ring
;
4403 u64 rb_addr
, rptr_addr
, wptr_gpu_addr
;
4406 /* Set the write pointer delay */
4407 WREG32(mmCP_RB_WPTR_DELAY
, 0);
4409 /* set the RB to use vmid 0 */
4410 WREG32(mmCP_RB_VMID
, 0);
4412 /* Set ring buffer size */
4413 ring
= &adev
->gfx
.gfx_ring
[0];
4414 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
4415 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
4416 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
4417 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, MTYPE
, 3);
4418 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, MIN_IB_AVAILSZ
, 1);
4420 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
4422 WREG32(mmCP_RB0_CNTL
, tmp
);
4424 /* Initialize the ring buffer's read and write pointers */
4425 WREG32(mmCP_RB0_CNTL
, tmp
| CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK
);
4427 WREG32(mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
4429 /* set the wb address wether it's enabled or not */
4430 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
4431 WREG32(mmCP_RB0_RPTR_ADDR
, lower_32_bits(rptr_addr
));
4432 WREG32(mmCP_RB0_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) & 0xFF);
4434 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
4435 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO
, lower_32_bits(wptr_gpu_addr
));
4436 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI
, upper_32_bits(wptr_gpu_addr
));
4438 WREG32(mmCP_RB0_CNTL
, tmp
);
4440 rb_addr
= ring
->gpu_addr
>> 8;
4441 WREG32(mmCP_RB0_BASE
, rb_addr
);
4442 WREG32(mmCP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
4444 gfx_v8_0_set_cpg_door_bell(adev
, ring
);
4445 /* start the ring */
4446 amdgpu_ring_clear_ring(ring
);
4447 gfx_v8_0_cp_gfx_start(adev
);
4449 r
= amdgpu_ring_test_ring(ring
);
4451 ring
->ready
= false;
4456 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
4461 WREG32(mmCP_MEC_CNTL
, 0);
4463 WREG32(mmCP_MEC_CNTL
, (CP_MEC_CNTL__MEC_ME1_HALT_MASK
| CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
4464 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
4465 adev
->gfx
.compute_ring
[i
].ready
= false;
4466 adev
->gfx
.kiq
.ring
.ready
= false;
4471 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
4473 const struct gfx_firmware_header_v1_0
*mec_hdr
;
4474 const __le32
*fw_data
;
4475 unsigned i
, fw_size
;
4477 if (!adev
->gfx
.mec_fw
)
4480 gfx_v8_0_cp_compute_enable(adev
, false);
4482 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
4483 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
4485 fw_data
= (const __le32
*)
4486 (adev
->gfx
.mec_fw
->data
+
4487 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
4488 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
) / 4;
4491 WREG32(mmCP_MEC_ME1_UCODE_ADDR
, 0);
4492 for (i
= 0; i
< fw_size
; i
++)
4493 WREG32(mmCP_MEC_ME1_UCODE_DATA
, le32_to_cpup(fw_data
+i
));
4494 WREG32(mmCP_MEC_ME1_UCODE_ADDR
, adev
->gfx
.mec_fw_version
);
4496 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4497 if (adev
->gfx
.mec2_fw
) {
4498 const struct gfx_firmware_header_v1_0
*mec2_hdr
;
4500 mec2_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec2_fw
->data
;
4501 amdgpu_ucode_print_gfx_hdr(&mec2_hdr
->header
);
4503 fw_data
= (const __le32
*)
4504 (adev
->gfx
.mec2_fw
->data
+
4505 le32_to_cpu(mec2_hdr
->header
.ucode_array_offset_bytes
));
4506 fw_size
= le32_to_cpu(mec2_hdr
->header
.ucode_size_bytes
) / 4;
4508 WREG32(mmCP_MEC_ME2_UCODE_ADDR
, 0);
4509 for (i
= 0; i
< fw_size
; i
++)
4510 WREG32(mmCP_MEC_ME2_UCODE_DATA
, le32_to_cpup(fw_data
+i
));
4511 WREG32(mmCP_MEC_ME2_UCODE_ADDR
, adev
->gfx
.mec2_fw_version
);
4518 static void gfx_v8_0_kiq_setting(struct amdgpu_ring
*ring
)
4521 struct amdgpu_device
*adev
= ring
->adev
;
4523 /* tell RLC which is KIQ queue */
4524 tmp
= RREG32(mmRLC_CP_SCHEDULERS
);
4526 tmp
|= (ring
->me
<< 5) | (ring
->pipe
<< 3) | (ring
->queue
);
4527 WREG32(mmRLC_CP_SCHEDULERS
, tmp
);
4529 WREG32(mmRLC_CP_SCHEDULERS
, tmp
);
4532 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device
*adev
)
4534 struct amdgpu_ring
*kiq_ring
= &adev
->gfx
.kiq
.ring
;
4535 uint32_t scratch
, tmp
= 0;
4536 uint64_t queue_mask
= 0;
4539 for (i
= 0; i
< AMDGPU_MAX_COMPUTE_QUEUES
; ++i
) {
4540 if (!test_bit(i
, adev
->gfx
.mec
.queue_bitmap
))
4543 /* This situation may be hit in the future if a new HW
4544 * generation exposes more than 64 queues. If so, the
4545 * definition of queue_mask needs updating */
4546 if (WARN_ON(i
>= (sizeof(queue_mask
)*8))) {
4547 DRM_ERROR("Invalid KCQ enabled: %d\n", i
);
4551 queue_mask
|= (1ull << i
);
4554 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
4556 DRM_ERROR("Failed to get scratch reg (%d).\n", r
);
4559 WREG32(scratch
, 0xCAFEDEAD);
4561 r
= amdgpu_ring_alloc(kiq_ring
, (8 * adev
->gfx
.num_compute_rings
) + 11);
4563 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
4564 amdgpu_gfx_scratch_free(adev
, scratch
);
4568 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_RESOURCES
, 6));
4569 amdgpu_ring_write(kiq_ring
, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4570 amdgpu_ring_write(kiq_ring
, lower_32_bits(queue_mask
)); /* queue mask lo */
4571 amdgpu_ring_write(kiq_ring
, upper_32_bits(queue_mask
)); /* queue mask hi */
4572 amdgpu_ring_write(kiq_ring
, 0); /* gws mask lo */
4573 amdgpu_ring_write(kiq_ring
, 0); /* gws mask hi */
4574 amdgpu_ring_write(kiq_ring
, 0); /* oac mask */
4575 amdgpu_ring_write(kiq_ring
, 0); /* gds heap base:0, gds heap size:0 */
4576 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4577 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
4578 uint64_t mqd_addr
= amdgpu_bo_gpu_offset(ring
->mqd_obj
);
4579 uint64_t wptr_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
4582 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
4583 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4584 amdgpu_ring_write(kiq_ring
,
4585 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4586 amdgpu_ring_write(kiq_ring
,
4587 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring
->doorbell_index
) |
4588 PACKET3_MAP_QUEUES_QUEUE(ring
->queue
) |
4589 PACKET3_MAP_QUEUES_PIPE(ring
->pipe
) |
4590 PACKET3_MAP_QUEUES_ME(ring
->me
== 1 ? 0 : 1)); /* doorbell */
4591 amdgpu_ring_write(kiq_ring
, lower_32_bits(mqd_addr
));
4592 amdgpu_ring_write(kiq_ring
, upper_32_bits(mqd_addr
));
4593 amdgpu_ring_write(kiq_ring
, lower_32_bits(wptr_addr
));
4594 amdgpu_ring_write(kiq_ring
, upper_32_bits(wptr_addr
));
4596 /* write to scratch for completion */
4597 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
4598 amdgpu_ring_write(kiq_ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
4599 amdgpu_ring_write(kiq_ring
, 0xDEADBEEF);
4600 amdgpu_ring_commit(kiq_ring
);
4602 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
4603 tmp
= RREG32(scratch
);
4604 if (tmp
== 0xDEADBEEF)
4608 if (i
>= adev
->usec_timeout
) {
4609 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4613 amdgpu_gfx_scratch_free(adev
, scratch
);
4618 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device
*adev
, u32 req
)
4622 if (RREG32(mmCP_HQD_ACTIVE
) & CP_HQD_ACTIVE__ACTIVE_MASK
) {
4623 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST
, DEQUEUE_REQ
, req
);
4624 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
4625 if (!(RREG32(mmCP_HQD_ACTIVE
) & CP_HQD_ACTIVE__ACTIVE_MASK
))
4629 if (i
== adev
->usec_timeout
)
4632 WREG32(mmCP_HQD_DEQUEUE_REQUEST
, 0);
4633 WREG32(mmCP_HQD_PQ_RPTR
, 0);
4634 WREG32(mmCP_HQD_PQ_WPTR
, 0);
4639 static int gfx_v8_0_mqd_init(struct amdgpu_ring
*ring
)
4641 struct amdgpu_device
*adev
= ring
->adev
;
4642 struct vi_mqd
*mqd
= ring
->mqd_ptr
;
4643 uint64_t hqd_gpu_addr
, wb_gpu_addr
, eop_base_addr
;
4646 mqd
->header
= 0xC0310800;
4647 mqd
->compute_pipelinestat_enable
= 0x00000001;
4648 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
4649 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
4650 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
4651 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
4652 mqd
->compute_misc_reserved
= 0x00000003;
4653 mqd
->dynamic_cu_mask_addr_lo
= lower_32_bits(ring
->mqd_gpu_addr
4654 + offsetof(struct vi_mqd_allocation
, dynamic_cu_mask
));
4655 mqd
->dynamic_cu_mask_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
4656 + offsetof(struct vi_mqd_allocation
, dynamic_cu_mask
));
4657 eop_base_addr
= ring
->eop_gpu_addr
>> 8;
4658 mqd
->cp_hqd_eop_base_addr_lo
= eop_base_addr
;
4659 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_base_addr
);
4661 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4662 tmp
= RREG32(mmCP_HQD_EOP_CONTROL
);
4663 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
4664 (order_base_2(GFX8_MEC_HPD_SIZE
/ 4) - 1));
4666 mqd
->cp_hqd_eop_control
= tmp
;
4668 /* enable doorbell? */
4669 tmp
= REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL
),
4670 CP_HQD_PQ_DOORBELL_CONTROL
,
4672 ring
->use_doorbell
? 1 : 0);
4674 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
4676 /* set the pointer to the MQD */
4677 mqd
->cp_mqd_base_addr_lo
= ring
->mqd_gpu_addr
& 0xfffffffc;
4678 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
4680 /* set MQD vmid to 0 */
4681 tmp
= RREG32(mmCP_MQD_CONTROL
);
4682 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
4683 mqd
->cp_mqd_control
= tmp
;
4685 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4686 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
4687 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
4688 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
4690 /* set up the HQD, this is similar to CP_RB0_CNTL */
4691 tmp
= RREG32(mmCP_HQD_PQ_CONTROL
);
4692 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
4693 (order_base_2(ring
->ring_size
/ 4) - 1));
4694 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
4695 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
4697 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
4699 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
4700 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
4701 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
4702 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
4703 mqd
->cp_hqd_pq_control
= tmp
;
4705 /* set the wb address whether it's enabled or not */
4706 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
4707 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
4708 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
4709 upper_32_bits(wb_gpu_addr
) & 0xffff;
4711 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4712 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
4713 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
4714 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
4717 /* enable the doorbell if requested */
4718 if (ring
->use_doorbell
) {
4719 tmp
= RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL
);
4720 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
4721 DOORBELL_OFFSET
, ring
->doorbell_index
);
4723 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
4725 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
4726 DOORBELL_SOURCE
, 0);
4727 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
4731 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
4733 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4735 mqd
->cp_hqd_pq_wptr
= ring
->wptr
;
4736 mqd
->cp_hqd_pq_rptr
= RREG32(mmCP_HQD_PQ_RPTR
);
4738 /* set the vmid for the queue */
4739 mqd
->cp_hqd_vmid
= 0;
4741 tmp
= RREG32(mmCP_HQD_PERSISTENT_STATE
);
4742 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
4743 mqd
->cp_hqd_persistent_state
= tmp
;
4746 tmp
= RREG32(mmCP_HQD_IB_CONTROL
);
4747 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IB_CONTROL
, MIN_IB_AVAIL_SIZE
, 3);
4748 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IB_CONTROL
, MTYPE
, 3);
4749 mqd
->cp_hqd_ib_control
= tmp
;
4751 tmp
= RREG32(mmCP_HQD_IQ_TIMER
);
4752 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IQ_TIMER
, MTYPE
, 3);
4753 mqd
->cp_hqd_iq_timer
= tmp
;
4755 tmp
= RREG32(mmCP_HQD_CTX_SAVE_CONTROL
);
4756 tmp
= REG_SET_FIELD(tmp
, CP_HQD_CTX_SAVE_CONTROL
, MTYPE
, 3);
4757 mqd
->cp_hqd_ctx_save_control
= tmp
;
4760 mqd
->cp_hqd_eop_rptr
= RREG32(mmCP_HQD_EOP_RPTR
);
4761 mqd
->cp_hqd_eop_wptr
= RREG32(mmCP_HQD_EOP_WPTR
);
4762 mqd
->cp_hqd_pipe_priority
= RREG32(mmCP_HQD_PIPE_PRIORITY
);
4763 mqd
->cp_hqd_queue_priority
= RREG32(mmCP_HQD_QUEUE_PRIORITY
);
4764 mqd
->cp_hqd_quantum
= RREG32(mmCP_HQD_QUANTUM
);
4765 mqd
->cp_hqd_ctx_save_base_addr_lo
= RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO
);
4766 mqd
->cp_hqd_ctx_save_base_addr_hi
= RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI
);
4767 mqd
->cp_hqd_cntl_stack_offset
= RREG32(mmCP_HQD_CNTL_STACK_OFFSET
);
4768 mqd
->cp_hqd_cntl_stack_size
= RREG32(mmCP_HQD_CNTL_STACK_SIZE
);
4769 mqd
->cp_hqd_wg_state_offset
= RREG32(mmCP_HQD_WG_STATE_OFFSET
);
4770 mqd
->cp_hqd_ctx_save_size
= RREG32(mmCP_HQD_CTX_SAVE_SIZE
);
4771 mqd
->cp_hqd_eop_done_events
= RREG32(mmCP_HQD_EOP_EVENTS
);
4772 mqd
->cp_hqd_error
= RREG32(mmCP_HQD_ERROR
);
4773 mqd
->cp_hqd_eop_wptr_mem
= RREG32(mmCP_HQD_EOP_WPTR_MEM
);
4774 mqd
->cp_hqd_eop_dones
= RREG32(mmCP_HQD_EOP_DONES
);
4776 /* activate the queue */
4777 mqd
->cp_hqd_active
= 1;
4782 int gfx_v8_0_mqd_commit(struct amdgpu_device
*adev
,
4788 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4789 mqd_data
= &mqd
->cp_mqd_base_addr_lo
;
4791 /* disable wptr polling */
4792 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
4794 /* program all HQD registers */
4795 for (mqd_reg
= mmCP_HQD_VMID
; mqd_reg
<= mmCP_HQD_EOP_CONTROL
; mqd_reg
++)
4796 WREG32(mqd_reg
, mqd_data
[mqd_reg
- mmCP_MQD_BASE_ADDR
]);
4798 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4799 * This is safe since EOP RPTR==WPTR for any inactive HQD
4800 * on ASICs that do not support context-save.
4801 * EOP writes/reads can start anywhere in the ring.
4803 if (adev
->asic_type
!= CHIP_TONGA
) {
4804 WREG32(mmCP_HQD_EOP_RPTR
, mqd
->cp_hqd_eop_rptr
);
4805 WREG32(mmCP_HQD_EOP_WPTR
, mqd
->cp_hqd_eop_wptr
);
4806 WREG32(mmCP_HQD_EOP_WPTR_MEM
, mqd
->cp_hqd_eop_wptr_mem
);
4809 for (mqd_reg
= mmCP_HQD_EOP_EVENTS
; mqd_reg
<= mmCP_HQD_ERROR
; mqd_reg
++)
4810 WREG32(mqd_reg
, mqd_data
[mqd_reg
- mmCP_MQD_BASE_ADDR
]);
4812 /* activate the HQD */
4813 for (mqd_reg
= mmCP_MQD_BASE_ADDR
; mqd_reg
<= mmCP_HQD_ACTIVE
; mqd_reg
++)
4814 WREG32(mqd_reg
, mqd_data
[mqd_reg
- mmCP_MQD_BASE_ADDR
]);
4819 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring
*ring
)
4821 struct amdgpu_device
*adev
= ring
->adev
;
4822 struct vi_mqd
*mqd
= ring
->mqd_ptr
;
4823 int mqd_idx
= AMDGPU_MAX_COMPUTE_RINGS
;
4825 gfx_v8_0_kiq_setting(ring
);
4827 if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
4828 /* reset MQD to a clean status */
4829 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
4830 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(struct vi_mqd_allocation
));
4832 /* reset ring buffer */
4834 amdgpu_ring_clear_ring(ring
);
4835 mutex_lock(&adev
->srbm_mutex
);
4836 vi_srbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
4837 gfx_v8_0_mqd_commit(adev
, mqd
);
4838 vi_srbm_select(adev
, 0, 0, 0, 0);
4839 mutex_unlock(&adev
->srbm_mutex
);
4841 memset((void *)mqd
, 0, sizeof(struct vi_mqd_allocation
));
4842 ((struct vi_mqd_allocation
*)mqd
)->dynamic_cu_mask
= 0xFFFFFFFF;
4843 ((struct vi_mqd_allocation
*)mqd
)->dynamic_rb_mask
= 0xFFFFFFFF;
4844 mutex_lock(&adev
->srbm_mutex
);
4845 vi_srbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
4846 gfx_v8_0_mqd_init(ring
);
4847 gfx_v8_0_mqd_commit(adev
, mqd
);
4848 vi_srbm_select(adev
, 0, 0, 0, 0);
4849 mutex_unlock(&adev
->srbm_mutex
);
4851 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
4852 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(struct vi_mqd_allocation
));
4858 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring
*ring
)
4860 struct amdgpu_device
*adev
= ring
->adev
;
4861 struct vi_mqd
*mqd
= ring
->mqd_ptr
;
4862 int mqd_idx
= ring
- &adev
->gfx
.compute_ring
[0];
4864 if (!adev
->in_gpu_reset
&& !adev
->gfx
.in_suspend
) {
4865 memset((void *)mqd
, 0, sizeof(struct vi_mqd_allocation
));
4866 ((struct vi_mqd_allocation
*)mqd
)->dynamic_cu_mask
= 0xFFFFFFFF;
4867 ((struct vi_mqd_allocation
*)mqd
)->dynamic_rb_mask
= 0xFFFFFFFF;
4868 mutex_lock(&adev
->srbm_mutex
);
4869 vi_srbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
4870 gfx_v8_0_mqd_init(ring
);
4871 vi_srbm_select(adev
, 0, 0, 0, 0);
4872 mutex_unlock(&adev
->srbm_mutex
);
4874 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
4875 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(struct vi_mqd_allocation
));
4876 } else if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
4877 /* reset MQD to a clean status */
4878 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
4879 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(struct vi_mqd_allocation
));
4881 amdgpu_ring_clear_ring(ring
);
4886 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device
*adev
)
4888 if (adev
->asic_type
> CHIP_TONGA
) {
4889 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER
, AMDGPU_DOORBELL_KIQ
<< 2);
4890 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER
, AMDGPU_DOORBELL_MEC_RING7
<< 2);
4892 /* enable doorbells */
4893 WREG32_FIELD(CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
4896 static int gfx_v8_0_kiq_resume(struct amdgpu_device
*adev
)
4898 struct amdgpu_ring
*ring
= NULL
;
4901 gfx_v8_0_cp_compute_enable(adev
, true);
4903 ring
= &adev
->gfx
.kiq
.ring
;
4905 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
4906 if (unlikely(r
!= 0))
4909 r
= amdgpu_bo_kmap(ring
->mqd_obj
, &ring
->mqd_ptr
);
4911 r
= gfx_v8_0_kiq_init_queue(ring
);
4912 amdgpu_bo_kunmap(ring
->mqd_obj
);
4913 ring
->mqd_ptr
= NULL
;
4915 amdgpu_bo_unreserve(ring
->mqd_obj
);
4919 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4920 ring
= &adev
->gfx
.compute_ring
[i
];
4922 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
4923 if (unlikely(r
!= 0))
4925 r
= amdgpu_bo_kmap(ring
->mqd_obj
, &ring
->mqd_ptr
);
4927 r
= gfx_v8_0_kcq_init_queue(ring
);
4928 amdgpu_bo_kunmap(ring
->mqd_obj
);
4929 ring
->mqd_ptr
= NULL
;
4931 amdgpu_bo_unreserve(ring
->mqd_obj
);
4936 gfx_v8_0_set_mec_doorbell_range(adev
);
4938 r
= gfx_v8_0_kiq_kcq_enable(adev
);
4943 ring
= &adev
->gfx
.kiq
.ring
;
4945 r
= amdgpu_ring_test_ring(ring
);
4947 ring
->ready
= false;
4952 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4953 ring
= &adev
->gfx
.compute_ring
[i
];
4954 if (adev
->in_gpu_reset
) {
4955 /* move reset ring buffer to here to workaround
4956 * compute ring test failed
4959 amdgpu_ring_clear_ring(ring
);
4962 r
= amdgpu_ring_test_ring(ring
);
4964 ring
->ready
= false;
4971 static int gfx_v8_0_cp_resume(struct amdgpu_device
*adev
)
4975 if (!(adev
->flags
& AMD_IS_APU
))
4976 gfx_v8_0_enable_gui_idle_interrupt(adev
, false);
4978 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
4979 /* legacy firmware loading */
4980 r
= gfx_v8_0_cp_gfx_load_microcode(adev
);
4984 r
= gfx_v8_0_cp_compute_load_microcode(adev
);
4989 r
= gfx_v8_0_cp_gfx_resume(adev
);
4993 r
= gfx_v8_0_kiq_resume(adev
);
4997 gfx_v8_0_enable_gui_idle_interrupt(adev
, true);
5002 static void gfx_v8_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
5004 gfx_v8_0_cp_gfx_enable(adev
, enable
);
5005 gfx_v8_0_cp_compute_enable(adev
, enable
);
5008 static int gfx_v8_0_hw_init(void *handle
)
5011 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5013 gfx_v8_0_init_golden_registers(adev
);
5014 gfx_v8_0_gpu_init(adev
);
5016 r
= gfx_v8_0_rlc_resume(adev
);
5020 r
= gfx_v8_0_cp_resume(adev
);
5025 static int gfx_v8_0_kcq_disable(struct amdgpu_ring
*kiq_ring
,struct amdgpu_ring
*ring
)
5027 struct amdgpu_device
*adev
= kiq_ring
->adev
;
5028 uint32_t scratch
, tmp
= 0;
5031 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
5033 DRM_ERROR("Failed to get scratch reg (%d).\n", r
);
5036 WREG32(scratch
, 0xCAFEDEAD);
5038 r
= amdgpu_ring_alloc(kiq_ring
, 10);
5040 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
5041 amdgpu_gfx_scratch_free(adev
, scratch
);
5046 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_UNMAP_QUEUES
, 4));
5047 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5048 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5049 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5050 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5051 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5052 amdgpu_ring_write(kiq_ring
, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring
->doorbell_index
));
5053 amdgpu_ring_write(kiq_ring
, 0);
5054 amdgpu_ring_write(kiq_ring
, 0);
5055 amdgpu_ring_write(kiq_ring
, 0);
5056 /* write to scratch for completion */
5057 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
5058 amdgpu_ring_write(kiq_ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
5059 amdgpu_ring_write(kiq_ring
, 0xDEADBEEF);
5060 amdgpu_ring_commit(kiq_ring
);
5062 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
5063 tmp
= RREG32(scratch
);
5064 if (tmp
== 0xDEADBEEF)
5068 if (i
>= adev
->usec_timeout
) {
5069 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch
, tmp
);
5072 amdgpu_gfx_scratch_free(adev
, scratch
);
5076 static int gfx_v8_0_hw_fini(void *handle
)
5078 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5081 amdgpu_irq_put(adev
, &adev
->gfx
.priv_reg_irq
, 0);
5082 amdgpu_irq_put(adev
, &adev
->gfx
.priv_inst_irq
, 0);
5084 /* disable KCQ to avoid CPC touch memory not valid anymore */
5085 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
5086 gfx_v8_0_kcq_disable(&adev
->gfx
.kiq
.ring
, &adev
->gfx
.compute_ring
[i
]);
5088 if (amdgpu_sriov_vf(adev
)) {
5089 pr_debug("For SRIOV client, shouldn't do anything.\n");
5092 gfx_v8_0_cp_enable(adev
, false);
5093 gfx_v8_0_rlc_stop(adev
);
5095 amdgpu_set_powergating_state(adev
,
5096 AMD_IP_BLOCK_TYPE_GFX
, AMD_PG_STATE_UNGATE
);
5101 static int gfx_v8_0_suspend(void *handle
)
5103 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5104 adev
->gfx
.in_suspend
= true;
5105 return gfx_v8_0_hw_fini(adev
);
5108 static int gfx_v8_0_resume(void *handle
)
5111 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5113 r
= gfx_v8_0_hw_init(adev
);
5114 adev
->gfx
.in_suspend
= false;
5118 static bool gfx_v8_0_is_idle(void *handle
)
5120 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5122 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS
), GRBM_STATUS
, GUI_ACTIVE
))
5128 static int gfx_v8_0_wait_for_idle(void *handle
)
5131 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5133 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
5134 if (gfx_v8_0_is_idle(handle
))
5142 static bool gfx_v8_0_check_soft_reset(void *handle
)
5144 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5145 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
5149 tmp
= RREG32(mmGRBM_STATUS
);
5150 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
5151 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
5152 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__VGT_BUSY_MASK
|
5153 GRBM_STATUS__DB_BUSY_MASK
| GRBM_STATUS__CB_BUSY_MASK
|
5154 GRBM_STATUS__GDS_BUSY_MASK
| GRBM_STATUS__SPI_BUSY_MASK
|
5155 GRBM_STATUS__IA_BUSY_MASK
| GRBM_STATUS__IA_BUSY_NO_DMA_MASK
|
5156 GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
5157 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
5158 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
5159 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
5160 GRBM_SOFT_RESET
, SOFT_RESET_GFX
, 1);
5161 srbm_soft_reset
= REG_SET_FIELD(srbm_soft_reset
,
5162 SRBM_SOFT_RESET
, SOFT_RESET_GRBM
, 1);
5166 tmp
= RREG32(mmGRBM_STATUS2
);
5167 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
5168 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
5169 GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
5171 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, CPF_BUSY
) ||
5172 REG_GET_FIELD(tmp
, GRBM_STATUS2
, CPC_BUSY
) ||
5173 REG_GET_FIELD(tmp
, GRBM_STATUS2
, CPG_BUSY
)) {
5174 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
,
5176 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
,
5178 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
,
5180 srbm_soft_reset
= REG_SET_FIELD(srbm_soft_reset
, SRBM_SOFT_RESET
,
5181 SOFT_RESET_GRBM
, 1);
5185 tmp
= RREG32(mmSRBM_STATUS
);
5186 if (REG_GET_FIELD(tmp
, SRBM_STATUS
, GRBM_RQ_PENDING
))
5187 srbm_soft_reset
= REG_SET_FIELD(srbm_soft_reset
,
5188 SRBM_SOFT_RESET
, SOFT_RESET_GRBM
, 1);
5189 if (REG_GET_FIELD(tmp
, SRBM_STATUS
, SEM_BUSY
))
5190 srbm_soft_reset
= REG_SET_FIELD(srbm_soft_reset
,
5191 SRBM_SOFT_RESET
, SOFT_RESET_SEM
, 1);
5193 if (grbm_soft_reset
|| srbm_soft_reset
) {
5194 adev
->gfx
.grbm_soft_reset
= grbm_soft_reset
;
5195 adev
->gfx
.srbm_soft_reset
= srbm_soft_reset
;
5198 adev
->gfx
.grbm_soft_reset
= 0;
5199 adev
->gfx
.srbm_soft_reset
= 0;
5204 static int gfx_v8_0_pre_soft_reset(void *handle
)
5206 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5207 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
5209 if ((!adev
->gfx
.grbm_soft_reset
) &&
5210 (!adev
->gfx
.srbm_soft_reset
))
5213 grbm_soft_reset
= adev
->gfx
.grbm_soft_reset
;
5214 srbm_soft_reset
= adev
->gfx
.srbm_soft_reset
;
5217 gfx_v8_0_rlc_stop(adev
);
5219 if (REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CP
) ||
5220 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_GFX
))
5221 /* Disable GFX parsing/prefetching */
5222 gfx_v8_0_cp_gfx_enable(adev
, false);
5224 if (REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CP
) ||
5225 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CPF
) ||
5226 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CPC
) ||
5227 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CPG
)) {
5230 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
5231 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
5233 mutex_lock(&adev
->srbm_mutex
);
5234 vi_srbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
5235 gfx_v8_0_deactivate_hqd(adev
, 2);
5236 vi_srbm_select(adev
, 0, 0, 0, 0);
5237 mutex_unlock(&adev
->srbm_mutex
);
5239 /* Disable MEC parsing/prefetching */
5240 gfx_v8_0_cp_compute_enable(adev
, false);
5246 static int gfx_v8_0_soft_reset(void *handle
)
5248 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5249 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
5252 if ((!adev
->gfx
.grbm_soft_reset
) &&
5253 (!adev
->gfx
.srbm_soft_reset
))
5256 grbm_soft_reset
= adev
->gfx
.grbm_soft_reset
;
5257 srbm_soft_reset
= adev
->gfx
.srbm_soft_reset
;
5259 if (grbm_soft_reset
|| srbm_soft_reset
) {
5260 tmp
= RREG32(mmGMCON_DEBUG
);
5261 tmp
= REG_SET_FIELD(tmp
, GMCON_DEBUG
, GFX_STALL
, 1);
5262 tmp
= REG_SET_FIELD(tmp
, GMCON_DEBUG
, GFX_CLEAR
, 1);
5263 WREG32(mmGMCON_DEBUG
, tmp
);
5267 if (grbm_soft_reset
) {
5268 tmp
= RREG32(mmGRBM_SOFT_RESET
);
5269 tmp
|= grbm_soft_reset
;
5270 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
5271 WREG32(mmGRBM_SOFT_RESET
, tmp
);
5272 tmp
= RREG32(mmGRBM_SOFT_RESET
);
5276 tmp
&= ~grbm_soft_reset
;
5277 WREG32(mmGRBM_SOFT_RESET
, tmp
);
5278 tmp
= RREG32(mmGRBM_SOFT_RESET
);
5281 if (srbm_soft_reset
) {
5282 tmp
= RREG32(mmSRBM_SOFT_RESET
);
5283 tmp
|= srbm_soft_reset
;
5284 dev_info(adev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
5285 WREG32(mmSRBM_SOFT_RESET
, tmp
);
5286 tmp
= RREG32(mmSRBM_SOFT_RESET
);
5290 tmp
&= ~srbm_soft_reset
;
5291 WREG32(mmSRBM_SOFT_RESET
, tmp
);
5292 tmp
= RREG32(mmSRBM_SOFT_RESET
);
5295 if (grbm_soft_reset
|| srbm_soft_reset
) {
5296 tmp
= RREG32(mmGMCON_DEBUG
);
5297 tmp
= REG_SET_FIELD(tmp
, GMCON_DEBUG
, GFX_STALL
, 0);
5298 tmp
= REG_SET_FIELD(tmp
, GMCON_DEBUG
, GFX_CLEAR
, 0);
5299 WREG32(mmGMCON_DEBUG
, tmp
);
5302 /* Wait a little for things to settle down */
5308 static int gfx_v8_0_post_soft_reset(void *handle
)
5310 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5311 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
5313 if ((!adev
->gfx
.grbm_soft_reset
) &&
5314 (!adev
->gfx
.srbm_soft_reset
))
5317 grbm_soft_reset
= adev
->gfx
.grbm_soft_reset
;
5318 srbm_soft_reset
= adev
->gfx
.srbm_soft_reset
;
5320 if (REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CP
) ||
5321 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_GFX
))
5322 gfx_v8_0_cp_gfx_resume(adev
);
5324 if (REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CP
) ||
5325 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CPF
) ||
5326 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CPC
) ||
5327 REG_GET_FIELD(grbm_soft_reset
, GRBM_SOFT_RESET
, SOFT_RESET_CPG
)) {
5330 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
5331 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
5333 mutex_lock(&adev
->srbm_mutex
);
5334 vi_srbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
5335 gfx_v8_0_deactivate_hqd(adev
, 2);
5336 vi_srbm_select(adev
, 0, 0, 0, 0);
5337 mutex_unlock(&adev
->srbm_mutex
);
5339 gfx_v8_0_kiq_resume(adev
);
5341 gfx_v8_0_rlc_start(adev
);
5347 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5349 * @adev: amdgpu_device pointer
5351 * Fetches a GPU clock counter snapshot.
5352 * Returns the 64 bit clock counter snapshot.
5354 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
5358 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
5359 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
5360 clock
= (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB
) |
5361 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
5362 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
5366 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
5368 uint32_t gds_base
, uint32_t gds_size
,
5369 uint32_t gws_base
, uint32_t gws_size
,
5370 uint32_t oa_base
, uint32_t oa_size
)
5372 gds_base
= gds_base
>> AMDGPU_GDS_SHIFT
;
5373 gds_size
= gds_size
>> AMDGPU_GDS_SHIFT
;
5375 gws_base
= gws_base
>> AMDGPU_GWS_SHIFT
;
5376 gws_size
= gws_size
>> AMDGPU_GWS_SHIFT
;
5378 oa_base
= oa_base
>> AMDGPU_OA_SHIFT
;
5379 oa_size
= oa_size
>> AMDGPU_OA_SHIFT
;
5382 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5383 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5384 WRITE_DATA_DST_SEL(0)));
5385 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].mem_base
);
5386 amdgpu_ring_write(ring
, 0);
5387 amdgpu_ring_write(ring
, gds_base
);
5390 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5391 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5392 WRITE_DATA_DST_SEL(0)));
5393 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].mem_size
);
5394 amdgpu_ring_write(ring
, 0);
5395 amdgpu_ring_write(ring
, gds_size
);
5398 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5399 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5400 WRITE_DATA_DST_SEL(0)));
5401 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].gws
);
5402 amdgpu_ring_write(ring
, 0);
5403 amdgpu_ring_write(ring
, gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
5406 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5407 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5408 WRITE_DATA_DST_SEL(0)));
5409 amdgpu_ring_write(ring
, amdgpu_gds_reg_offset
[vmid
].oa
);
5410 amdgpu_ring_write(ring
, 0);
5411 amdgpu_ring_write(ring
, (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
5414 static uint32_t wave_read_ind(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t address
)
5416 WREG32(mmSQ_IND_INDEX
,
5417 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
5418 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
5419 (address
<< SQ_IND_INDEX__INDEX__SHIFT
) |
5420 (SQ_IND_INDEX__FORCE_READ_MASK
));
5421 return RREG32(mmSQ_IND_DATA
);
5424 static void wave_read_regs(struct amdgpu_device
*adev
, uint32_t simd
,
5425 uint32_t wave
, uint32_t thread
,
5426 uint32_t regno
, uint32_t num
, uint32_t *out
)
5428 WREG32(mmSQ_IND_INDEX
,
5429 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
5430 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
5431 (regno
<< SQ_IND_INDEX__INDEX__SHIFT
) |
5432 (thread
<< SQ_IND_INDEX__THREAD_ID__SHIFT
) |
5433 (SQ_IND_INDEX__FORCE_READ_MASK
) |
5434 (SQ_IND_INDEX__AUTO_INCR_MASK
));
5436 *(out
++) = RREG32(mmSQ_IND_DATA
);
5439 static void gfx_v8_0_read_wave_data(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t *dst
, int *no_fields
)
5441 /* type 0 wave data */
5442 dst
[(*no_fields
)++] = 0;
5443 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_STATUS
);
5444 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_LO
);
5445 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_HI
);
5446 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_LO
);
5447 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_HI
);
5448 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_HW_ID
);
5449 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW0
);
5450 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW1
);
5451 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_GPR_ALLOC
);
5452 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_LDS_ALLOC
);
5453 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TRAPSTS
);
5454 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_STS
);
5455 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TBA_LO
);
5456 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TBA_HI
);
5457 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TMA_LO
);
5458 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TMA_HI
);
5459 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_DBG0
);
5460 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_M0
);
5463 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device
*adev
, uint32_t simd
,
5464 uint32_t wave
, uint32_t start
,
5465 uint32_t size
, uint32_t *dst
)
5468 adev
, simd
, wave
, 0,
5469 start
+ SQIND_WAVE_SGPRS_OFFSET
, size
, dst
);
5473 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs
= {
5474 .get_gpu_clock_counter
= &gfx_v8_0_get_gpu_clock_counter
,
5475 .select_se_sh
= &gfx_v8_0_select_se_sh
,
5476 .read_wave_data
= &gfx_v8_0_read_wave_data
,
5477 .read_wave_sgprs
= &gfx_v8_0_read_wave_sgprs
,
5480 static int gfx_v8_0_early_init(void *handle
)
5482 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5484 adev
->gfx
.num_gfx_rings
= GFX8_NUM_GFX_RINGS
;
5485 adev
->gfx
.num_compute_rings
= AMDGPU_MAX_COMPUTE_RINGS
;
5486 adev
->gfx
.funcs
= &gfx_v8_0_gfx_funcs
;
5487 gfx_v8_0_set_ring_funcs(adev
);
5488 gfx_v8_0_set_irq_funcs(adev
);
5489 gfx_v8_0_set_gds_init(adev
);
5490 gfx_v8_0_set_rlc_funcs(adev
);
5495 static int gfx_v8_0_late_init(void *handle
)
5497 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5500 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_reg_irq
, 0);
5504 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_inst_irq
, 0);
5508 /* requires IBs so do in late init after IB pool is initialized */
5509 r
= gfx_v8_0_do_edc_gpr_workarounds(adev
);
5513 amdgpu_set_powergating_state(adev
,
5514 AMD_IP_BLOCK_TYPE_GFX
, AMD_PG_STATE_GATE
);
5519 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device
*adev
,
5522 if ((adev
->asic_type
== CHIP_POLARIS11
) ||
5523 (adev
->asic_type
== CHIP_POLARIS12
))
5524 /* Send msg to SMU via Powerplay */
5525 amdgpu_set_powergating_state(adev
,
5526 AMD_IP_BLOCK_TYPE_SMC
,
5528 AMD_PG_STATE_GATE
: AMD_PG_STATE_UNGATE
);
5530 WREG32_FIELD(RLC_PG_CNTL
, STATIC_PER_CU_PG_ENABLE
, enable
? 1 : 0);
5533 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device
*adev
,
5536 WREG32_FIELD(RLC_PG_CNTL
, DYN_PER_CU_PG_ENABLE
, enable
? 1 : 0);
5539 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device
*adev
,
5542 WREG32_FIELD(RLC_PG_CNTL
, QUICK_PG_ENABLE
, enable
? 1 : 0);
5545 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device
*adev
,
5548 WREG32_FIELD(RLC_PG_CNTL
, GFX_POWER_GATING_ENABLE
, enable
? 1 : 0);
5551 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device
*adev
,
5554 WREG32_FIELD(RLC_PG_CNTL
, GFX_PIPELINE_PG_ENABLE
, enable
? 1 : 0);
5556 /* Read any GFX register to wake up GFX. */
5558 RREG32(mmDB_RENDER_CONTROL
);
5561 static void cz_update_gfx_cg_power_gating(struct amdgpu_device
*adev
,
5564 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_PG
) && enable
) {
5565 cz_enable_gfx_cg_power_gating(adev
, true);
5566 if (adev
->pg_flags
& AMD_PG_SUPPORT_GFX_PIPELINE
)
5567 cz_enable_gfx_pipeline_power_gating(adev
, true);
5569 cz_enable_gfx_cg_power_gating(adev
, false);
5570 cz_enable_gfx_pipeline_power_gating(adev
, false);
5574 static int gfx_v8_0_set_powergating_state(void *handle
,
5575 enum amd_powergating_state state
)
5577 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5578 bool enable
= (state
== AMD_PG_STATE_GATE
);
5580 if (amdgpu_sriov_vf(adev
))
5583 switch (adev
->asic_type
) {
5587 if (adev
->pg_flags
& AMD_PG_SUPPORT_RLC_SMU_HS
) {
5588 cz_enable_sck_slow_down_on_power_up(adev
, true);
5589 cz_enable_sck_slow_down_on_power_down(adev
, true);
5591 cz_enable_sck_slow_down_on_power_up(adev
, false);
5592 cz_enable_sck_slow_down_on_power_down(adev
, false);
5594 if (adev
->pg_flags
& AMD_PG_SUPPORT_CP
)
5595 cz_enable_cp_power_gating(adev
, true);
5597 cz_enable_cp_power_gating(adev
, false);
5599 cz_update_gfx_cg_power_gating(adev
, enable
);
5601 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_SMG
) && enable
)
5602 gfx_v8_0_enable_gfx_static_mg_power_gating(adev
, true);
5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev
, false);
5606 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_DMG
) && enable
)
5607 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev
, true);
5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev
, false);
5611 case CHIP_POLARIS11
:
5612 case CHIP_POLARIS12
:
5613 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_SMG
) && enable
)
5614 gfx_v8_0_enable_gfx_static_mg_power_gating(adev
, true);
5616 gfx_v8_0_enable_gfx_static_mg_power_gating(adev
, false);
5618 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_DMG
) && enable
)
5619 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev
, true);
5621 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev
, false);
5623 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_QUICK_MG
) && enable
)
5624 polaris11_enable_gfx_quick_mg_power_gating(adev
, true);
5626 polaris11_enable_gfx_quick_mg_power_gating(adev
, false);
5635 static void gfx_v8_0_get_clockgating_state(void *handle
, u32
*flags
)
5637 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
5640 if (amdgpu_sriov_vf(adev
))
5643 /* AMD_CG_SUPPORT_GFX_MGCG */
5644 data
= RREG32(mmRLC_CGTT_MGCG_OVERRIDE
);
5645 if (!(data
& RLC_CGTT_MGCG_OVERRIDE__CPF_MASK
))
5646 *flags
|= AMD_CG_SUPPORT_GFX_MGCG
;
5648 /* AMD_CG_SUPPORT_GFX_CGLG */
5649 data
= RREG32(mmRLC_CGCG_CGLS_CTRL
);
5650 if (data
& RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
)
5651 *flags
|= AMD_CG_SUPPORT_GFX_CGCG
;
5653 /* AMD_CG_SUPPORT_GFX_CGLS */
5654 if (data
& RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
)
5655 *flags
|= AMD_CG_SUPPORT_GFX_CGLS
;
5657 /* AMD_CG_SUPPORT_GFX_CGTS */
5658 data
= RREG32(mmCGTS_SM_CTRL_REG
);
5659 if (!(data
& CGTS_SM_CTRL_REG__OVERRIDE_MASK
))
5660 *flags
|= AMD_CG_SUPPORT_GFX_CGTS
;
5662 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5663 if (!(data
& CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK
))
5664 *flags
|= AMD_CG_SUPPORT_GFX_CGTS_LS
;
5666 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5667 data
= RREG32(mmRLC_MEM_SLP_CNTL
);
5668 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
)
5669 *flags
|= AMD_CG_SUPPORT_GFX_RLC_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
5671 /* AMD_CG_SUPPORT_GFX_CP_LS */
5672 data
= RREG32(mmCP_MEM_SLP_CNTL
);
5673 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
)
5674 *flags
|= AMD_CG_SUPPORT_GFX_CP_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
5677 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device
*adev
,
5678 uint32_t reg_addr
, uint32_t cmd
)
5682 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
5684 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK
, 0xffffffff);
5685 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK
, 0xffffffff);
5687 data
= RREG32(mmRLC_SERDES_WR_CTRL
);
5688 if (adev
->asic_type
== CHIP_STONEY
)
5689 data
&= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK
|
5690 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK
|
5691 RLC_SERDES_WR_CTRL__P1_SELECT_MASK
|
5692 RLC_SERDES_WR_CTRL__P2_SELECT_MASK
|
5693 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK
|
5694 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK
|
5695 RLC_SERDES_WR_CTRL__POWER_UP_MASK
|
5696 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK
|
5697 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK
);
5699 data
&= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK
|
5700 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK
|
5701 RLC_SERDES_WR_CTRL__P1_SELECT_MASK
|
5702 RLC_SERDES_WR_CTRL__P2_SELECT_MASK
|
5703 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK
|
5704 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK
|
5705 RLC_SERDES_WR_CTRL__POWER_UP_MASK
|
5706 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK
|
5707 RLC_SERDES_WR_CTRL__BPM_DATA_MASK
|
5708 RLC_SERDES_WR_CTRL__REG_ADDR_MASK
|
5709 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK
);
5710 data
|= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK
|
5711 (cmd
<< RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT
) |
5712 (reg_addr
<< RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT
) |
5713 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT
));
5715 WREG32(mmRLC_SERDES_WR_CTRL
, data
);
5718 #define MSG_ENTER_RLC_SAFE_MODE 1
5719 #define MSG_EXIT_RLC_SAFE_MODE 0
5720 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5721 #define RLC_GPR_REG2__REQ__SHIFT 0
5722 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5723 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5725 static void iceland_enter_rlc_safe_mode(struct amdgpu_device
*adev
)
5730 data
= RREG32(mmRLC_CNTL
);
5731 if (!(data
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
5734 if (adev
->cg_flags
& (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
)) {
5735 data
|= RLC_SAFE_MODE__CMD_MASK
;
5736 data
&= ~RLC_SAFE_MODE__MESSAGE_MASK
;
5737 data
|= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT
);
5738 WREG32(mmRLC_SAFE_MODE
, data
);
5740 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
5741 if ((RREG32(mmRLC_GPM_STAT
) &
5742 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK
|
5743 RLC_GPM_STAT__GFX_POWER_STATUS_MASK
)) ==
5744 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK
|
5745 RLC_GPM_STAT__GFX_POWER_STATUS_MASK
))
5750 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
5751 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
5755 adev
->gfx
.rlc
.in_safe_mode
= true;
5759 static void iceland_exit_rlc_safe_mode(struct amdgpu_device
*adev
)
5764 data
= RREG32(mmRLC_CNTL
);
5765 if (!(data
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
5768 if (adev
->cg_flags
& (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
)) {
5769 if (adev
->gfx
.rlc
.in_safe_mode
) {
5770 data
|= RLC_SAFE_MODE__CMD_MASK
;
5771 data
&= ~RLC_SAFE_MODE__MESSAGE_MASK
;
5772 WREG32(mmRLC_SAFE_MODE
, data
);
5773 adev
->gfx
.rlc
.in_safe_mode
= false;
5777 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
5778 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
5784 static const struct amdgpu_rlc_funcs iceland_rlc_funcs
= {
5785 .enter_safe_mode
= iceland_enter_rlc_safe_mode
,
5786 .exit_safe_mode
= iceland_exit_rlc_safe_mode
5789 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device
*adev
,
5792 uint32_t temp
, data
;
5794 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
5796 /* It is disabled by HW by default */
5797 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
)) {
5798 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
5799 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
)
5800 /* 1 - RLC memory Light sleep */
5801 WREG32_FIELD(RLC_MEM_SLP_CNTL
, RLC_MEM_LS_EN
, 1);
5803 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
)
5804 WREG32_FIELD(CP_MEM_SLP_CNTL
, CP_MEM_LS_EN
, 1);
5807 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5808 temp
= data
= RREG32(mmRLC_CGTT_MGCG_OVERRIDE
);
5809 if (adev
->flags
& AMD_IS_APU
)
5810 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK
|
5811 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK
|
5812 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK
);
5814 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK
|
5815 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK
|
5816 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK
|
5817 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK
);
5820 WREG32(mmRLC_CGTT_MGCG_OVERRIDE
, data
);
5822 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823 gfx_v8_0_wait_for_rlc_serdes(adev
);
5825 /* 5 - clear mgcg override */
5826 gfx_v8_0_send_serdes_cmd(adev
, BPM_REG_MGCG_OVERRIDE
, CLE_BPM_SERDES_CMD
);
5828 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGTS
) {
5829 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5830 temp
= data
= RREG32(mmCGTS_SM_CTRL_REG
);
5831 data
&= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK
);
5832 data
|= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT
);
5833 data
|= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK
;
5834 data
&= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK
;
5835 if ((adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) &&
5836 (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGTS_LS
))
5837 data
&= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK
;
5838 data
|= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK
;
5839 data
|= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT
);
5841 WREG32(mmCGTS_SM_CTRL_REG
, data
);
5845 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5846 gfx_v8_0_wait_for_rlc_serdes(adev
);
5848 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5849 temp
= data
= RREG32(mmRLC_CGTT_MGCG_OVERRIDE
);
5850 data
|= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK
|
5851 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK
|
5852 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK
|
5853 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK
);
5855 WREG32(mmRLC_CGTT_MGCG_OVERRIDE
, data
);
5857 /* 2 - disable MGLS in RLC */
5858 data
= RREG32(mmRLC_MEM_SLP_CNTL
);
5859 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
) {
5860 data
&= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
5861 WREG32(mmRLC_MEM_SLP_CNTL
, data
);
5864 /* 3 - disable MGLS in CP */
5865 data
= RREG32(mmCP_MEM_SLP_CNTL
);
5866 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
) {
5867 data
&= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
5868 WREG32(mmCP_MEM_SLP_CNTL
, data
);
5871 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5872 temp
= data
= RREG32(mmCGTS_SM_CTRL_REG
);
5873 data
|= (CGTS_SM_CTRL_REG__OVERRIDE_MASK
|
5874 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK
);
5876 WREG32(mmCGTS_SM_CTRL_REG
, data
);
5878 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5879 gfx_v8_0_wait_for_rlc_serdes(adev
);
5881 /* 6 - set mgcg override */
5882 gfx_v8_0_send_serdes_cmd(adev
, BPM_REG_MGCG_OVERRIDE
, SET_BPM_SERDES_CMD
);
5886 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5887 gfx_v8_0_wait_for_rlc_serdes(adev
);
5890 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
5893 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device
*adev
,
5896 uint32_t temp
, temp1
, data
, data1
;
5898 temp
= data
= RREG32(mmRLC_CGCG_CGLS_CTRL
);
5900 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
5902 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
)) {
5903 temp1
= data1
= RREG32(mmRLC_CGTT_MGCG_OVERRIDE
);
5904 data1
&= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK
;
5906 WREG32(mmRLC_CGTT_MGCG_OVERRIDE
, data1
);
5908 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5909 gfx_v8_0_wait_for_rlc_serdes(adev
);
5911 /* 2 - clear cgcg override */
5912 gfx_v8_0_send_serdes_cmd(adev
, BPM_REG_CGCG_OVERRIDE
, CLE_BPM_SERDES_CMD
);
5914 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5915 gfx_v8_0_wait_for_rlc_serdes(adev
);
5917 /* 3 - write cmd to set CGLS */
5918 gfx_v8_0_send_serdes_cmd(adev
, BPM_REG_CGLS_EN
, SET_BPM_SERDES_CMD
);
5920 /* 4 - enable cgcg */
5921 data
|= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
;
5923 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
) {
5925 data
|= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
5927 temp1
= data1
= RREG32(mmRLC_CGTT_MGCG_OVERRIDE
);
5928 data1
&= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK
;
5931 WREG32(mmRLC_CGTT_MGCG_OVERRIDE
, data1
);
5933 data
&= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
5937 WREG32(mmRLC_CGCG_CGLS_CTRL
, data
);
5939 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5940 * Cmp_busy/GFX_Idle interrupts
5942 gfx_v8_0_enable_gui_idle_interrupt(adev
, true);
5944 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5945 gfx_v8_0_enable_gui_idle_interrupt(adev
, false);
5948 temp1
= data1
= RREG32(mmRLC_CGTT_MGCG_OVERRIDE
);
5949 data1
|= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK
|
5950 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK
);
5952 WREG32(mmRLC_CGTT_MGCG_OVERRIDE
, data1
);
5954 /* read gfx register to wake up cgcg */
5955 RREG32(mmCB_CGTT_SCLK_CTRL
);
5956 RREG32(mmCB_CGTT_SCLK_CTRL
);
5957 RREG32(mmCB_CGTT_SCLK_CTRL
);
5958 RREG32(mmCB_CGTT_SCLK_CTRL
);
5960 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5961 gfx_v8_0_wait_for_rlc_serdes(adev
);
5963 /* write cmd to Set CGCG Overrride */
5964 gfx_v8_0_send_serdes_cmd(adev
, BPM_REG_CGCG_OVERRIDE
, SET_BPM_SERDES_CMD
);
5966 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5967 gfx_v8_0_wait_for_rlc_serdes(adev
);
5969 /* write cmd to Clear CGLS */
5970 gfx_v8_0_send_serdes_cmd(adev
, BPM_REG_CGLS_EN
, CLE_BPM_SERDES_CMD
);
5972 /* disable cgcg, cgls should be disabled too. */
5973 data
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
|
5974 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
5976 WREG32(mmRLC_CGCG_CGLS_CTRL
, data
);
5977 /* enable interrupts again for PG */
5978 gfx_v8_0_enable_gui_idle_interrupt(adev
, true);
5981 gfx_v8_0_wait_for_rlc_serdes(adev
);
5983 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
5985 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device
*adev
,
5989 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5990 * === MGCG + MGLS + TS(CG/LS) ===
5992 gfx_v8_0_update_medium_grain_clock_gating(adev
, enable
);
5993 gfx_v8_0_update_coarse_grain_clock_gating(adev
, enable
);
5995 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5996 * === CGCG + CGLS ===
5998 gfx_v8_0_update_coarse_grain_clock_gating(adev
, enable
);
5999 gfx_v8_0_update_medium_grain_clock_gating(adev
, enable
);
6004 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device
*adev
,
6005 enum amd_clockgating_state state
)
6007 uint32_t msg_id
, pp_state
= 0;
6008 uint32_t pp_support_state
= 0;
6010 if (adev
->cg_flags
& (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_CGLS
)) {
6011 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
) {
6012 pp_support_state
= PP_STATE_SUPPORT_LS
;
6013 pp_state
= PP_STATE_LS
;
6015 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
) {
6016 pp_support_state
|= PP_STATE_SUPPORT_CG
;
6017 pp_state
|= PP_STATE_CG
;
6019 if (state
== AMD_CG_STATE_UNGATE
)
6022 msg_id
= PP_CG_MSG_ID(PP_GROUP_GFX
,
6026 if (adev
->powerplay
.pp_funcs
->set_clockgating_by_smu
)
6027 amdgpu_dpm_set_clockgating_by_smu(adev
, msg_id
);
6030 if (adev
->cg_flags
& (AMD_CG_SUPPORT_GFX_MGCG
| AMD_CG_SUPPORT_GFX_MGLS
)) {
6031 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
6032 pp_support_state
= PP_STATE_SUPPORT_LS
;
6033 pp_state
= PP_STATE_LS
;
6036 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
) {
6037 pp_support_state
|= PP_STATE_SUPPORT_CG
;
6038 pp_state
|= PP_STATE_CG
;
6041 if (state
== AMD_CG_STATE_UNGATE
)
6044 msg_id
= PP_CG_MSG_ID(PP_GROUP_GFX
,
6048 if (adev
->powerplay
.pp_funcs
->set_clockgating_by_smu
)
6049 amdgpu_dpm_set_clockgating_by_smu(adev
, msg_id
);
6055 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device
*adev
,
6056 enum amd_clockgating_state state
)
6059 uint32_t msg_id
, pp_state
= 0;
6060 uint32_t pp_support_state
= 0;
6062 if (adev
->cg_flags
& (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_CGLS
)) {
6063 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
) {
6064 pp_support_state
= PP_STATE_SUPPORT_LS
;
6065 pp_state
= PP_STATE_LS
;
6067 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
) {
6068 pp_support_state
|= PP_STATE_SUPPORT_CG
;
6069 pp_state
|= PP_STATE_CG
;
6071 if (state
== AMD_CG_STATE_UNGATE
)
6074 msg_id
= PP_CG_MSG_ID(PP_GROUP_GFX
,
6078 if (adev
->powerplay
.pp_funcs
->set_clockgating_by_smu
)
6079 amdgpu_dpm_set_clockgating_by_smu(adev
, msg_id
);
6082 if (adev
->cg_flags
& (AMD_CG_SUPPORT_GFX_3D_CGCG
| AMD_CG_SUPPORT_GFX_3D_CGLS
)) {
6083 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGLS
) {
6084 pp_support_state
= PP_STATE_SUPPORT_LS
;
6085 pp_state
= PP_STATE_LS
;
6087 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGCG
) {
6088 pp_support_state
|= PP_STATE_SUPPORT_CG
;
6089 pp_state
|= PP_STATE_CG
;
6091 if (state
== AMD_CG_STATE_UNGATE
)
6094 msg_id
= PP_CG_MSG_ID(PP_GROUP_GFX
,
6098 if (adev
->powerplay
.pp_funcs
->set_clockgating_by_smu
)
6099 amdgpu_dpm_set_clockgating_by_smu(adev
, msg_id
);
6102 if (adev
->cg_flags
& (AMD_CG_SUPPORT_GFX_MGCG
| AMD_CG_SUPPORT_GFX_MGLS
)) {
6103 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
6104 pp_support_state
= PP_STATE_SUPPORT_LS
;
6105 pp_state
= PP_STATE_LS
;
6108 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
) {
6109 pp_support_state
|= PP_STATE_SUPPORT_CG
;
6110 pp_state
|= PP_STATE_CG
;
6113 if (state
== AMD_CG_STATE_UNGATE
)
6116 msg_id
= PP_CG_MSG_ID(PP_GROUP_GFX
,
6120 if (adev
->powerplay
.pp_funcs
->set_clockgating_by_smu
)
6121 amdgpu_dpm_set_clockgating_by_smu(adev
, msg_id
);
6124 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
) {
6125 pp_support_state
= PP_STATE_SUPPORT_LS
;
6127 if (state
== AMD_CG_STATE_UNGATE
)
6130 pp_state
= PP_STATE_LS
;
6132 msg_id
= PP_CG_MSG_ID(PP_GROUP_GFX
,
6136 if (adev
->powerplay
.pp_funcs
->set_clockgating_by_smu
)
6137 amdgpu_dpm_set_clockgating_by_smu(adev
, msg_id
);
6140 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
) {
6141 pp_support_state
= PP_STATE_SUPPORT_LS
;
6143 if (state
== AMD_CG_STATE_UNGATE
)
6146 pp_state
= PP_STATE_LS
;
6147 msg_id
= PP_CG_MSG_ID(PP_GROUP_GFX
,
6151 if (adev
->powerplay
.pp_funcs
->set_clockgating_by_smu
)
6152 amdgpu_dpm_set_clockgating_by_smu(adev
, msg_id
);
6158 static int gfx_v8_0_set_clockgating_state(void *handle
,
6159 enum amd_clockgating_state state
)
6161 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
6163 if (amdgpu_sriov_vf(adev
))
6166 switch (adev
->asic_type
) {
6170 gfx_v8_0_update_gfx_clock_gating(adev
,
6171 state
== AMD_CG_STATE_GATE
);
6174 gfx_v8_0_tonga_update_gfx_clock_gating(adev
, state
);
6176 case CHIP_POLARIS10
:
6177 case CHIP_POLARIS11
:
6178 case CHIP_POLARIS12
:
6179 gfx_v8_0_polaris_update_gfx_clock_gating(adev
, state
);
6187 static u64
gfx_v8_0_ring_get_rptr(struct amdgpu_ring
*ring
)
6189 return ring
->adev
->wb
.wb
[ring
->rptr_offs
];
6192 static u64
gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
6194 struct amdgpu_device
*adev
= ring
->adev
;
6196 if (ring
->use_doorbell
)
6197 /* XXX check if swapping is necessary on BE */
6198 return ring
->adev
->wb
.wb
[ring
->wptr_offs
];
6200 return RREG32(mmCP_RB0_WPTR
);
6203 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
6205 struct amdgpu_device
*adev
= ring
->adev
;
6207 if (ring
->use_doorbell
) {
6208 /* XXX check if swapping is necessary on BE */
6209 adev
->wb
.wb
[ring
->wptr_offs
] = lower_32_bits(ring
->wptr
);
6210 WDOORBELL32(ring
->doorbell_index
, lower_32_bits(ring
->wptr
));
6212 WREG32(mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
6213 (void)RREG32(mmCP_RB0_WPTR
);
6217 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring
*ring
)
6219 u32 ref_and_mask
, reg_mem_engine
;
6221 if ((ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) ||
6222 (ring
->funcs
->type
== AMDGPU_RING_TYPE_KIQ
)) {
6225 ref_and_mask
= GPU_HDP_FLUSH_DONE__CP2_MASK
<< ring
->pipe
;
6228 ref_and_mask
= GPU_HDP_FLUSH_DONE__CP6_MASK
<< ring
->pipe
;
6235 ref_and_mask
= GPU_HDP_FLUSH_DONE__CP0_MASK
;
6236 reg_mem_engine
= WAIT_REG_MEM_ENGINE(1); /* pfp */
6239 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
6240 amdgpu_ring_write(ring
, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6241 WAIT_REG_MEM_FUNCTION(3) | /* == */
6243 amdgpu_ring_write(ring
, mmGPU_HDP_FLUSH_REQ
);
6244 amdgpu_ring_write(ring
, mmGPU_HDP_FLUSH_DONE
);
6245 amdgpu_ring_write(ring
, ref_and_mask
);
6246 amdgpu_ring_write(ring
, ref_and_mask
);
6247 amdgpu_ring_write(ring
, 0x20); /* poll interval */
6250 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring
*ring
)
6252 amdgpu_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE
, 0));
6253 amdgpu_ring_write(ring
, EVENT_TYPE(VS_PARTIAL_FLUSH
) |
6256 amdgpu_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE
, 0));
6257 amdgpu_ring_write(ring
, EVENT_TYPE(VGT_FLUSH
) |
6262 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring
*ring
)
6264 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
6265 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
6266 WRITE_DATA_DST_SEL(0) |
6268 amdgpu_ring_write(ring
, mmHDP_DEBUG0
);
6269 amdgpu_ring_write(ring
, 0);
6270 amdgpu_ring_write(ring
, 1);
6274 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring
*ring
,
6275 struct amdgpu_ib
*ib
,
6276 unsigned vm_id
, bool ctx_switch
)
6278 u32 header
, control
= 0;
6280 if (ib
->flags
& AMDGPU_IB_FLAG_CE
)
6281 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
6283 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
6285 control
|= ib
->length_dw
| (vm_id
<< 24);
6287 if (amdgpu_sriov_vf(ring
->adev
) && (ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
)) {
6288 control
|= INDIRECT_BUFFER_PRE_ENB(1);
6290 if (!(ib
->flags
& AMDGPU_IB_FLAG_CE
))
6291 gfx_v8_0_ring_emit_de_meta(ring
);
6294 amdgpu_ring_write(ring
, header
);
6295 amdgpu_ring_write(ring
,
6299 (ib
->gpu_addr
& 0xFFFFFFFC));
6300 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
6301 amdgpu_ring_write(ring
, control
);
6304 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring
*ring
,
6305 struct amdgpu_ib
*ib
,
6306 unsigned vm_id
, bool ctx_switch
)
6308 u32 control
= INDIRECT_BUFFER_VALID
| ib
->length_dw
| (vm_id
<< 24);
6310 amdgpu_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
6311 amdgpu_ring_write(ring
,
6315 (ib
->gpu_addr
& 0xFFFFFFFC));
6316 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
6317 amdgpu_ring_write(ring
, control
);
6320 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring
*ring
, u64 addr
,
6321 u64 seq
, unsigned flags
)
6323 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
6324 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
6326 /* EVENT_WRITE_EOP - flush caches, send int */
6327 amdgpu_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
6328 amdgpu_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
6330 EOP_TC_WB_ACTION_EN
|
6331 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
6333 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
6334 amdgpu_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) |
6335 DATA_SEL(write64bit
? 2 : 1) | INT_SEL(int_sel
? 2 : 0));
6336 amdgpu_ring_write(ring
, lower_32_bits(seq
));
6337 amdgpu_ring_write(ring
, upper_32_bits(seq
));
6341 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
6343 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
6344 uint32_t seq
= ring
->fence_drv
.sync_seq
;
6345 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
6347 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
6348 amdgpu_ring_write(ring
, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6349 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6350 WAIT_REG_MEM_ENGINE(usepfp
))); /* pfp or me */
6351 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
6352 amdgpu_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
6353 amdgpu_ring_write(ring
, seq
);
6354 amdgpu_ring_write(ring
, 0xffffffff);
6355 amdgpu_ring_write(ring
, 4); /* poll interval */
6358 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
6359 unsigned vm_id
, uint64_t pd_addr
)
6361 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
6363 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
6364 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(usepfp
) |
6365 WRITE_DATA_DST_SEL(0)) |
6368 amdgpu_ring_write(ring
,
6369 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ vm_id
));
6371 amdgpu_ring_write(ring
,
6372 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ vm_id
- 8));
6374 amdgpu_ring_write(ring
, 0);
6375 amdgpu_ring_write(ring
, pd_addr
>> 12);
6377 /* bits 0-15 are the VM contexts0-15 */
6378 /* invalidate the cache */
6379 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
6380 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
6381 WRITE_DATA_DST_SEL(0)));
6382 amdgpu_ring_write(ring
, mmVM_INVALIDATE_REQUEST
);
6383 amdgpu_ring_write(ring
, 0);
6384 amdgpu_ring_write(ring
, 1 << vm_id
);
6386 /* wait for the invalidate to complete */
6387 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
6388 amdgpu_ring_write(ring
, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6389 WAIT_REG_MEM_FUNCTION(0) | /* always */
6390 WAIT_REG_MEM_ENGINE(0))); /* me */
6391 amdgpu_ring_write(ring
, mmVM_INVALIDATE_REQUEST
);
6392 amdgpu_ring_write(ring
, 0);
6393 amdgpu_ring_write(ring
, 0); /* ref */
6394 amdgpu_ring_write(ring
, 0); /* mask */
6395 amdgpu_ring_write(ring
, 0x20); /* poll interval */
6397 /* compute doesn't have PFP */
6399 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6400 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
6401 amdgpu_ring_write(ring
, 0x0);
6405 static u64
gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
6407 return ring
->adev
->wb
.wb
[ring
->wptr_offs
];
6410 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
6412 struct amdgpu_device
*adev
= ring
->adev
;
6414 /* XXX check if swapping is necessary on BE */
6415 adev
->wb
.wb
[ring
->wptr_offs
] = lower_32_bits(ring
->wptr
);
6416 WDOORBELL32(ring
->doorbell_index
, lower_32_bits(ring
->wptr
));
6419 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring
*ring
,
6422 struct amdgpu_device
*adev
= ring
->adev
;
6423 int pipe_num
, tmp
, reg
;
6424 int pipe_percent
= acquire
? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK
: 0x1;
6426 pipe_num
= ring
->me
* adev
->gfx
.mec
.num_pipe_per_mec
+ ring
->pipe
;
6428 /* first me only has 2 entries, GFX and HP3D */
6432 reg
= mmSPI_WCL_PIPE_PERCENT_GFX
+ pipe_num
;
6434 tmp
= REG_SET_FIELD(tmp
, SPI_WCL_PIPE_PERCENT_GFX
, VALUE
, pipe_percent
);
6438 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device
*adev
,
6439 struct amdgpu_ring
*ring
,
6444 struct amdgpu_ring
*iring
;
6446 mutex_lock(&adev
->gfx
.pipe_reserve_mutex
);
6447 pipe
= amdgpu_gfx_queue_to_bit(adev
, ring
->me
, ring
->pipe
, 0);
6449 set_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
6451 clear_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
6453 if (!bitmap_weight(adev
->gfx
.pipe_reserve_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
)) {
6454 /* Clear all reservations - everyone reacquires all resources */
6455 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; ++i
)
6456 gfx_v8_0_ring_set_pipe_percent(&adev
->gfx
.gfx_ring
[i
],
6459 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; ++i
)
6460 gfx_v8_0_ring_set_pipe_percent(&adev
->gfx
.compute_ring
[i
],
6463 /* Lower all pipes without a current reservation */
6464 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; ++i
) {
6465 iring
= &adev
->gfx
.gfx_ring
[i
];
6466 pipe
= amdgpu_gfx_queue_to_bit(adev
,
6470 reserve
= test_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
6471 gfx_v8_0_ring_set_pipe_percent(iring
, reserve
);
6474 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; ++i
) {
6475 iring
= &adev
->gfx
.compute_ring
[i
];
6476 pipe
= amdgpu_gfx_queue_to_bit(adev
,
6480 reserve
= test_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
6481 gfx_v8_0_ring_set_pipe_percent(iring
, reserve
);
6485 mutex_unlock(&adev
->gfx
.pipe_reserve_mutex
);
6488 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device
*adev
,
6489 struct amdgpu_ring
*ring
,
6492 uint32_t pipe_priority
= acquire
? 0x2 : 0x0;
6493 uint32_t queue_priority
= acquire
? 0xf : 0x0;
6495 mutex_lock(&adev
->srbm_mutex
);
6496 vi_srbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
6498 WREG32(mmCP_HQD_PIPE_PRIORITY
, pipe_priority
);
6499 WREG32(mmCP_HQD_QUEUE_PRIORITY
, queue_priority
);
6501 vi_srbm_select(adev
, 0, 0, 0, 0);
6502 mutex_unlock(&adev
->srbm_mutex
);
6504 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring
*ring
,
6505 enum amd_sched_priority priority
)
6507 struct amdgpu_device
*adev
= ring
->adev
;
6508 bool acquire
= priority
== AMD_SCHED_PRIORITY_HIGH_HW
;
6510 if (ring
->funcs
->type
!= AMDGPU_RING_TYPE_COMPUTE
)
6513 gfx_v8_0_hqd_set_priority(adev
, ring
, acquire
);
6514 gfx_v8_0_pipe_reserve_resources(adev
, ring
, acquire
);
6517 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring
*ring
,
6521 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
6522 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
6524 /* RELEASE_MEM - flush caches, send int */
6525 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 5));
6526 amdgpu_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
6528 EOP_TC_WB_ACTION_EN
|
6529 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
6531 amdgpu_ring_write(ring
, DATA_SEL(write64bit
? 2 : 1) | INT_SEL(int_sel
? 2 : 0));
6532 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
6533 amdgpu_ring_write(ring
, upper_32_bits(addr
));
6534 amdgpu_ring_write(ring
, lower_32_bits(seq
));
6535 amdgpu_ring_write(ring
, upper_32_bits(seq
));
6538 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring
*ring
, u64 addr
,
6539 u64 seq
, unsigned int flags
)
6541 /* we only allocate 32bit for each seq wb address */
6542 BUG_ON(flags
& AMDGPU_FENCE_FLAG_64BIT
);
6544 /* write fence seq to the "addr" */
6545 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
6546 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
6547 WRITE_DATA_DST_SEL(5) | WR_CONFIRM
));
6548 amdgpu_ring_write(ring
, lower_32_bits(addr
));
6549 amdgpu_ring_write(ring
, upper_32_bits(addr
));
6550 amdgpu_ring_write(ring
, lower_32_bits(seq
));
6552 if (flags
& AMDGPU_FENCE_FLAG_INT
) {
6553 /* set register to trigger INT */
6554 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
6555 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
6556 WRITE_DATA_DST_SEL(0) | WR_CONFIRM
));
6557 amdgpu_ring_write(ring
, mmCPC_INT_STATUS
);
6558 amdgpu_ring_write(ring
, 0);
6559 amdgpu_ring_write(ring
, 0x20000000); /* src_id is 178 */
6563 static void gfx_v8_ring_emit_sb(struct amdgpu_ring
*ring
)
6565 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
6566 amdgpu_ring_write(ring
, 0);
6569 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring
*ring
, uint32_t flags
)
6573 if (amdgpu_sriov_vf(ring
->adev
))
6574 gfx_v8_0_ring_emit_ce_meta(ring
);
6576 dw2
|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6577 if (flags
& AMDGPU_HAVE_CTX_SWITCH
) {
6578 gfx_v8_0_ring_emit_vgt_flush(ring
);
6579 /* set load_global_config & load_global_uconfig */
6581 /* set load_cs_sh_regs */
6583 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6586 /* set load_ce_ram if preamble presented */
6587 if (AMDGPU_PREAMBLE_IB_PRESENT
& flags
)
6590 /* still load_ce_ram if this is the first time preamble presented
6591 * although there is no context switch happens.
6593 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST
& flags
)
6597 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
6598 amdgpu_ring_write(ring
, dw2
);
6599 amdgpu_ring_write(ring
, 0);
6602 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring
*ring
)
6606 amdgpu_ring_write(ring
, PACKET3(PACKET3_COND_EXEC
, 3));
6607 amdgpu_ring_write(ring
, lower_32_bits(ring
->cond_exe_gpu_addr
));
6608 amdgpu_ring_write(ring
, upper_32_bits(ring
->cond_exe_gpu_addr
));
6609 amdgpu_ring_write(ring
, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6610 ret
= ring
->wptr
& ring
->buf_mask
;
6611 amdgpu_ring_write(ring
, 0x55aa55aa); /* patch dummy value later */
6615 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring
*ring
, unsigned offset
)
6619 BUG_ON(offset
> ring
->buf_mask
);
6620 BUG_ON(ring
->ring
[offset
] != 0x55aa55aa);
6622 cur
= (ring
->wptr
& ring
->buf_mask
) - 1;
6623 if (likely(cur
> offset
))
6624 ring
->ring
[offset
] = cur
- offset
;
6626 ring
->ring
[offset
] = (ring
->ring_size
>> 2) - offset
+ cur
;
6629 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring
*ring
, uint32_t reg
)
6631 struct amdgpu_device
*adev
= ring
->adev
;
6633 amdgpu_ring_write(ring
, PACKET3(PACKET3_COPY_DATA
, 4));
6634 amdgpu_ring_write(ring
, 0 | /* src: register*/
6635 (5 << 8) | /* dst: memory */
6636 (1 << 20)); /* write confirm */
6637 amdgpu_ring_write(ring
, reg
);
6638 amdgpu_ring_write(ring
, 0);
6639 amdgpu_ring_write(ring
, lower_32_bits(adev
->wb
.gpu_addr
+
6640 adev
->virt
.reg_val_offs
* 4));
6641 amdgpu_ring_write(ring
, upper_32_bits(adev
->wb
.gpu_addr
+
6642 adev
->virt
.reg_val_offs
* 4));
6645 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring
*ring
, uint32_t reg
,
6648 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
6649 amdgpu_ring_write(ring
, (1 << 16)); /* no inc addr */
6650 amdgpu_ring_write(ring
, reg
);
6651 amdgpu_ring_write(ring
, 0);
6652 amdgpu_ring_write(ring
, val
);
6655 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
6656 enum amdgpu_interrupt_state state
)
6658 WREG32_FIELD(CP_INT_CNTL_RING0
, TIME_STAMP_INT_ENABLE
,
6659 state
== AMDGPU_IRQ_STATE_DISABLE
? 0 : 1);
6662 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
6664 enum amdgpu_interrupt_state state
)
6666 u32 mec_int_cntl
, mec_int_cntl_reg
;
6669 * amdgpu controls only the first MEC. That's why this function only
6670 * handles the setting of interrupts for this specific MEC. All other
6671 * pipes' interrupts are set by amdkfd.
6677 mec_int_cntl_reg
= mmCP_ME1_PIPE0_INT_CNTL
;
6680 mec_int_cntl_reg
= mmCP_ME1_PIPE1_INT_CNTL
;
6683 mec_int_cntl_reg
= mmCP_ME1_PIPE2_INT_CNTL
;
6686 mec_int_cntl_reg
= mmCP_ME1_PIPE3_INT_CNTL
;
6689 DRM_DEBUG("invalid pipe %d\n", pipe
);
6693 DRM_DEBUG("invalid me %d\n", me
);
6698 case AMDGPU_IRQ_STATE_DISABLE
:
6699 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
6700 mec_int_cntl
&= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK
;
6701 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
6703 case AMDGPU_IRQ_STATE_ENABLE
:
6704 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
6705 mec_int_cntl
|= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK
;
6706 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
6713 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
6714 struct amdgpu_irq_src
*source
,
6716 enum amdgpu_interrupt_state state
)
6718 WREG32_FIELD(CP_INT_CNTL_RING0
, PRIV_REG_INT_ENABLE
,
6719 state
== AMDGPU_IRQ_STATE_DISABLE
? 0 : 1);
6724 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
6725 struct amdgpu_irq_src
*source
,
6727 enum amdgpu_interrupt_state state
)
6729 WREG32_FIELD(CP_INT_CNTL_RING0
, PRIV_INSTR_INT_ENABLE
,
6730 state
== AMDGPU_IRQ_STATE_DISABLE
? 0 : 1);
6735 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
6736 struct amdgpu_irq_src
*src
,
6738 enum amdgpu_interrupt_state state
)
6741 case AMDGPU_CP_IRQ_GFX_EOP
:
6742 gfx_v8_0_set_gfx_eop_interrupt_state(adev
, state
);
6744 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
6745 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
6747 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
6748 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
6750 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
6751 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
6753 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
6754 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
6756 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
6757 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
6759 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
6760 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
6762 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
6763 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
6765 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
6766 gfx_v8_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
6774 static int gfx_v8_0_eop_irq(struct amdgpu_device
*adev
,
6775 struct amdgpu_irq_src
*source
,
6776 struct amdgpu_iv_entry
*entry
)
6779 u8 me_id
, pipe_id
, queue_id
;
6780 struct amdgpu_ring
*ring
;
6782 DRM_DEBUG("IH: CP EOP\n");
6783 me_id
= (entry
->ring_id
& 0x0c) >> 2;
6784 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
6785 queue_id
= (entry
->ring_id
& 0x70) >> 4;
6789 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
6793 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
6794 ring
= &adev
->gfx
.compute_ring
[i
];
6795 /* Per-queue interrupt is supported for MEC starting from VI.
6796 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6798 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
6799 amdgpu_fence_process(ring
);
6806 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device
*adev
,
6807 struct amdgpu_irq_src
*source
,
6808 struct amdgpu_iv_entry
*entry
)
6810 DRM_ERROR("Illegal register access in command stream\n");
6811 schedule_work(&adev
->reset_work
);
6815 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device
*adev
,
6816 struct amdgpu_irq_src
*source
,
6817 struct amdgpu_iv_entry
*entry
)
6819 DRM_ERROR("Illegal instruction in command stream\n");
6820 schedule_work(&adev
->reset_work
);
6824 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device
*adev
,
6825 struct amdgpu_irq_src
*src
,
6827 enum amdgpu_interrupt_state state
)
6829 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
6832 case AMDGPU_CP_KIQ_IRQ_DRIVER0
:
6833 WREG32_FIELD(CPC_INT_CNTL
, GENERIC2_INT_ENABLE
,
6834 state
== AMDGPU_IRQ_STATE_DISABLE
? 0 : 1);
6836 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL
,
6838 GENERIC2_INT_ENABLE
,
6839 state
== AMDGPU_IRQ_STATE_DISABLE
? 0 : 1);
6841 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL
,
6843 GENERIC2_INT_ENABLE
,
6844 state
== AMDGPU_IRQ_STATE_DISABLE
? 0 : 1);
6847 BUG(); /* kiq only support GENERIC2_INT now */
6853 static int gfx_v8_0_kiq_irq(struct amdgpu_device
*adev
,
6854 struct amdgpu_irq_src
*source
,
6855 struct amdgpu_iv_entry
*entry
)
6857 u8 me_id
, pipe_id
, queue_id
;
6858 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
6860 me_id
= (entry
->ring_id
& 0x0c) >> 2;
6861 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
6862 queue_id
= (entry
->ring_id
& 0x70) >> 4;
6863 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6864 me_id
, pipe_id
, queue_id
);
6866 amdgpu_fence_process(ring
);
6870 static const struct amd_ip_funcs gfx_v8_0_ip_funcs
= {
6872 .early_init
= gfx_v8_0_early_init
,
6873 .late_init
= gfx_v8_0_late_init
,
6874 .sw_init
= gfx_v8_0_sw_init
,
6875 .sw_fini
= gfx_v8_0_sw_fini
,
6876 .hw_init
= gfx_v8_0_hw_init
,
6877 .hw_fini
= gfx_v8_0_hw_fini
,
6878 .suspend
= gfx_v8_0_suspend
,
6879 .resume
= gfx_v8_0_resume
,
6880 .is_idle
= gfx_v8_0_is_idle
,
6881 .wait_for_idle
= gfx_v8_0_wait_for_idle
,
6882 .check_soft_reset
= gfx_v8_0_check_soft_reset
,
6883 .pre_soft_reset
= gfx_v8_0_pre_soft_reset
,
6884 .soft_reset
= gfx_v8_0_soft_reset
,
6885 .post_soft_reset
= gfx_v8_0_post_soft_reset
,
6886 .set_clockgating_state
= gfx_v8_0_set_clockgating_state
,
6887 .set_powergating_state
= gfx_v8_0_set_powergating_state
,
6888 .get_clockgating_state
= gfx_v8_0_get_clockgating_state
,
6891 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx
= {
6892 .type
= AMDGPU_RING_TYPE_GFX
,
6894 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
6895 .support_64bit_ptrs
= false,
6896 .get_rptr
= gfx_v8_0_ring_get_rptr
,
6897 .get_wptr
= gfx_v8_0_ring_get_wptr_gfx
,
6898 .set_wptr
= gfx_v8_0_ring_set_wptr_gfx
,
6899 .emit_frame_size
= /* maximum 215dw if count 16 IBs in */
6901 7 + /* PIPELINE_SYNC */
6903 8 + /* FENCE for VM_FLUSH */
6904 20 + /* GDS switch */
6905 4 + /* double SWITCH_BUFFER,
6906 the first COND_EXEC jump to the place just
6907 prior to this double SWITCH_BUFFER */
6915 8 + 8 + /* FENCE x2 */
6916 2, /* SWITCH_BUFFER */
6917 .emit_ib_size
= 4, /* gfx_v8_0_ring_emit_ib_gfx */
6918 .emit_ib
= gfx_v8_0_ring_emit_ib_gfx
,
6919 .emit_fence
= gfx_v8_0_ring_emit_fence_gfx
,
6920 .emit_pipeline_sync
= gfx_v8_0_ring_emit_pipeline_sync
,
6921 .emit_vm_flush
= gfx_v8_0_ring_emit_vm_flush
,
6922 .emit_gds_switch
= gfx_v8_0_ring_emit_gds_switch
,
6923 .emit_hdp_flush
= gfx_v8_0_ring_emit_hdp_flush
,
6924 .emit_hdp_invalidate
= gfx_v8_0_ring_emit_hdp_invalidate
,
6925 .test_ring
= gfx_v8_0_ring_test_ring
,
6926 .test_ib
= gfx_v8_0_ring_test_ib
,
6927 .insert_nop
= amdgpu_ring_insert_nop
,
6928 .pad_ib
= amdgpu_ring_generic_pad_ib
,
6929 .emit_switch_buffer
= gfx_v8_ring_emit_sb
,
6930 .emit_cntxcntl
= gfx_v8_ring_emit_cntxcntl
,
6931 .init_cond_exec
= gfx_v8_0_ring_emit_init_cond_exec
,
6932 .patch_cond_exec
= gfx_v8_0_ring_emit_patch_cond_exec
,
6935 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute
= {
6936 .type
= AMDGPU_RING_TYPE_COMPUTE
,
6938 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
6939 .support_64bit_ptrs
= false,
6940 .get_rptr
= gfx_v8_0_ring_get_rptr
,
6941 .get_wptr
= gfx_v8_0_ring_get_wptr_compute
,
6942 .set_wptr
= gfx_v8_0_ring_set_wptr_compute
,
6944 20 + /* gfx_v8_0_ring_emit_gds_switch */
6945 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6946 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6947 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6948 17 + /* gfx_v8_0_ring_emit_vm_flush */
6949 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6950 .emit_ib_size
= 4, /* gfx_v8_0_ring_emit_ib_compute */
6951 .emit_ib
= gfx_v8_0_ring_emit_ib_compute
,
6952 .emit_fence
= gfx_v8_0_ring_emit_fence_compute
,
6953 .emit_pipeline_sync
= gfx_v8_0_ring_emit_pipeline_sync
,
6954 .emit_vm_flush
= gfx_v8_0_ring_emit_vm_flush
,
6955 .emit_gds_switch
= gfx_v8_0_ring_emit_gds_switch
,
6956 .emit_hdp_flush
= gfx_v8_0_ring_emit_hdp_flush
,
6957 .emit_hdp_invalidate
= gfx_v8_0_ring_emit_hdp_invalidate
,
6958 .test_ring
= gfx_v8_0_ring_test_ring
,
6959 .test_ib
= gfx_v8_0_ring_test_ib
,
6960 .insert_nop
= amdgpu_ring_insert_nop
,
6961 .pad_ib
= amdgpu_ring_generic_pad_ib
,
6962 .set_priority
= gfx_v8_0_ring_set_priority_compute
,
6965 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq
= {
6966 .type
= AMDGPU_RING_TYPE_KIQ
,
6968 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
6969 .support_64bit_ptrs
= false,
6970 .get_rptr
= gfx_v8_0_ring_get_rptr
,
6971 .get_wptr
= gfx_v8_0_ring_get_wptr_compute
,
6972 .set_wptr
= gfx_v8_0_ring_set_wptr_compute
,
6974 20 + /* gfx_v8_0_ring_emit_gds_switch */
6975 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6976 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6977 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6978 17 + /* gfx_v8_0_ring_emit_vm_flush */
6979 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6980 .emit_ib_size
= 4, /* gfx_v8_0_ring_emit_ib_compute */
6981 .emit_ib
= gfx_v8_0_ring_emit_ib_compute
,
6982 .emit_fence
= gfx_v8_0_ring_emit_fence_kiq
,
6983 .test_ring
= gfx_v8_0_ring_test_ring
,
6984 .test_ib
= gfx_v8_0_ring_test_ib
,
6985 .insert_nop
= amdgpu_ring_insert_nop
,
6986 .pad_ib
= amdgpu_ring_generic_pad_ib
,
6987 .emit_rreg
= gfx_v8_0_ring_emit_rreg
,
6988 .emit_wreg
= gfx_v8_0_ring_emit_wreg
,
6991 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device
*adev
)
6995 adev
->gfx
.kiq
.ring
.funcs
= &gfx_v8_0_ring_funcs_kiq
;
6997 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
6998 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v8_0_ring_funcs_gfx
;
7000 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
7001 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v8_0_ring_funcs_compute
;
7004 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs
= {
7005 .set
= gfx_v8_0_set_eop_interrupt_state
,
7006 .process
= gfx_v8_0_eop_irq
,
7009 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs
= {
7010 .set
= gfx_v8_0_set_priv_reg_fault_state
,
7011 .process
= gfx_v8_0_priv_reg_irq
,
7014 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs
= {
7015 .set
= gfx_v8_0_set_priv_inst_fault_state
,
7016 .process
= gfx_v8_0_priv_inst_irq
,
7019 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs
= {
7020 .set
= gfx_v8_0_kiq_set_interrupt_state
,
7021 .process
= gfx_v8_0_kiq_irq
,
7024 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device
*adev
)
7026 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
7027 adev
->gfx
.eop_irq
.funcs
= &gfx_v8_0_eop_irq_funcs
;
7029 adev
->gfx
.priv_reg_irq
.num_types
= 1;
7030 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v8_0_priv_reg_irq_funcs
;
7032 adev
->gfx
.priv_inst_irq
.num_types
= 1;
7033 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v8_0_priv_inst_irq_funcs
;
7035 adev
->gfx
.kiq
.irq
.num_types
= AMDGPU_CP_KIQ_IRQ_LAST
;
7036 adev
->gfx
.kiq
.irq
.funcs
= &gfx_v8_0_kiq_irq_funcs
;
7039 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device
*adev
)
7041 adev
->gfx
.rlc
.funcs
= &iceland_rlc_funcs
;
7044 static void gfx_v8_0_set_gds_init(struct amdgpu_device
*adev
)
7046 /* init asci gds info */
7047 adev
->gds
.mem
.total_size
= RREG32(mmGDS_VMID0_SIZE
);
7048 adev
->gds
.gws
.total_size
= 64;
7049 adev
->gds
.oa
.total_size
= 16;
7051 if (adev
->gds
.mem
.total_size
== 64 * 1024) {
7052 adev
->gds
.mem
.gfx_partition_size
= 4096;
7053 adev
->gds
.mem
.cs_partition_size
= 4096;
7055 adev
->gds
.gws
.gfx_partition_size
= 4;
7056 adev
->gds
.gws
.cs_partition_size
= 4;
7058 adev
->gds
.oa
.gfx_partition_size
= 4;
7059 adev
->gds
.oa
.cs_partition_size
= 1;
7061 adev
->gds
.mem
.gfx_partition_size
= 1024;
7062 adev
->gds
.mem
.cs_partition_size
= 1024;
7064 adev
->gds
.gws
.gfx_partition_size
= 16;
7065 adev
->gds
.gws
.cs_partition_size
= 16;
7067 adev
->gds
.oa
.gfx_partition_size
= 4;
7068 adev
->gds
.oa
.cs_partition_size
= 4;
7072 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device
*adev
,
7080 data
= bitmap
<< GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT
;
7081 data
&= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK
;
7083 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG
, data
);
7086 static u32
gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device
*adev
)
7090 data
= RREG32(mmCC_GC_SHADER_ARRAY_CONFIG
) |
7091 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG
);
7093 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_cu_per_sh
);
7095 return ~REG_GET_FIELD(data
, CC_GC_SHADER_ARRAY_CONFIG
, INACTIVE_CUS
) & mask
;
7098 static void gfx_v8_0_get_cu_info(struct amdgpu_device
*adev
)
7100 int i
, j
, k
, counter
, active_cu_number
= 0;
7101 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
7102 struct amdgpu_cu_info
*cu_info
= &adev
->gfx
.cu_info
;
7103 unsigned disable_masks
[4 * 2];
7106 memset(cu_info
, 0, sizeof(*cu_info
));
7108 if (adev
->flags
& AMD_IS_APU
)
7111 ao_cu_num
= adev
->gfx
.config
.max_cu_per_sh
;
7113 amdgpu_gfx_parse_disable_cu(disable_masks
, 4, 2);
7115 mutex_lock(&adev
->grbm_idx_mutex
);
7116 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
7117 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
7121 gfx_v8_0_select_se_sh(adev
, i
, j
, 0xffffffff);
7123 gfx_v8_0_set_user_cu_inactive_bitmap(
7124 adev
, disable_masks
[i
* 2 + j
]);
7125 bitmap
= gfx_v8_0_get_cu_active_bitmap(adev
);
7126 cu_info
->bitmap
[i
][j
] = bitmap
;
7128 for (k
= 0; k
< adev
->gfx
.config
.max_cu_per_sh
; k
++) {
7129 if (bitmap
& mask
) {
7130 if (counter
< ao_cu_num
)
7136 active_cu_number
+= counter
;
7138 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
7139 cu_info
->ao_cu_bitmap
[i
][j
] = ao_bitmap
;
7142 gfx_v8_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
7143 mutex_unlock(&adev
->grbm_idx_mutex
);
7145 cu_info
->number
= active_cu_number
;
7146 cu_info
->ao_cu_mask
= ao_cu_mask
;
7149 const struct amdgpu_ip_block_version gfx_v8_0_ip_block
=
7151 .type
= AMD_IP_BLOCK_TYPE_GFX
,
7155 .funcs
= &gfx_v8_0_ip_funcs
,
7158 const struct amdgpu_ip_block_version gfx_v8_1_ip_block
=
7160 .type
= AMD_IP_BLOCK_TYPE_GFX
,
7164 .funcs
= &gfx_v8_0_ip_funcs
,
7167 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
)
7169 uint64_t ce_payload_addr
;
7172 struct vi_ce_ib_state regular
;
7173 struct vi_ce_ib_state_chained_ib chained
;
7176 if (ring
->adev
->virt
.chained_ib_support
) {
7177 ce_payload_addr
= AMDGPU_VA_RESERVED_SIZE
- 2 * 4096 +
7178 offsetof(struct vi_gfx_meta_data_chained_ib
, ce_payload
);
7179 cnt_ce
= (sizeof(ce_payload
.chained
) >> 2) + 4 - 2;
7181 ce_payload_addr
= AMDGPU_VA_RESERVED_SIZE
- 2 * 4096 +
7182 offsetof(struct vi_gfx_meta_data
, ce_payload
);
7183 cnt_ce
= (sizeof(ce_payload
.regular
) >> 2) + 4 - 2;
7186 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt_ce
));
7187 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
7188 WRITE_DATA_DST_SEL(8) |
7190 WRITE_DATA_CACHE_POLICY(0));
7191 amdgpu_ring_write(ring
, lower_32_bits(ce_payload_addr
));
7192 amdgpu_ring_write(ring
, upper_32_bits(ce_payload_addr
));
7193 amdgpu_ring_write_multiple(ring
, (void *)&ce_payload
, cnt_ce
- 2);
7196 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring
*ring
)
7198 uint64_t de_payload_addr
, gds_addr
, csa_addr
;
7201 struct vi_de_ib_state regular
;
7202 struct vi_de_ib_state_chained_ib chained
;
7205 csa_addr
= AMDGPU_VA_RESERVED_SIZE
- 2 * 4096;
7206 gds_addr
= csa_addr
+ 4096;
7207 if (ring
->adev
->virt
.chained_ib_support
) {
7208 de_payload
.chained
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
7209 de_payload
.chained
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
7210 de_payload_addr
= csa_addr
+ offsetof(struct vi_gfx_meta_data_chained_ib
, de_payload
);
7211 cnt_de
= (sizeof(de_payload
.chained
) >> 2) + 4 - 2;
7213 de_payload
.regular
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
7214 de_payload
.regular
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
7215 de_payload_addr
= csa_addr
+ offsetof(struct vi_gfx_meta_data
, de_payload
);
7216 cnt_de
= (sizeof(de_payload
.regular
) >> 2) + 4 - 2;
7219 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt_de
));
7220 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(1) |
7221 WRITE_DATA_DST_SEL(8) |
7223 WRITE_DATA_CACHE_POLICY(0));
7224 amdgpu_ring_write(ring
, lower_32_bits(de_payload_addr
));
7225 amdgpu_ring_write(ring
, upper_32_bits(de_payload_addr
));
7226 amdgpu_ring_write_multiple(ring
, (void *)&de_payload
, cnt_de
- 2);