]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
96a3345e872e82f15caf9b289706b3aac7c892ff
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
82
83 /* BPM Register Address*/
84 enum {
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength 14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
165 {
166 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
167 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
168 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
169 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
170 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
171 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
172 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
173 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
174 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
175 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
176 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
177 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
178 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
179 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
180 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
181 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
182 };
183
184 static const u32 golden_settings_tonga_a11[] =
185 {
186 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
187 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
188 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
189 mmGB_GPU_ID, 0x0000000f, 0x00000000,
190 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
191 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
192 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
193 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
194 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
195 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
196 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
197 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
198 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
199 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
200 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
201 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
202 };
203
204 static const u32 tonga_golden_common_all[] =
205 {
206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
208 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
209 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
210 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
211 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
212 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
213 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
214 };
215
216 static const u32 tonga_mgcg_cgcg_init[] =
217 {
218 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
222 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
223 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
224 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
225 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
229 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
232 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
234 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
236 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
237 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
238 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
239 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
240 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
243 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
244 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
245 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
247 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
248 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
249 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
277 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
282 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
287 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
290 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
291 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
292 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
293 };
294
295 static const u32 golden_settings_polaris11_a11[] =
296 {
297 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
298 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
299 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
305 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307 mmSQ_CONFIG, 0x07f80000, 0x01180000,
308 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
311 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
314 };
315
316 static const u32 polaris11_golden_common_all[] =
317 {
318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
320 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
323 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
324 };
325
326 static const u32 golden_settings_polaris10_a11[] =
327 {
328 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
329 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
330 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
331 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
332 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
333 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
334 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
335 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
337 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
338 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
339 mmSQ_CONFIG, 0x07f80000, 0x07180000,
340 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
341 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
342 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
343 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
344 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
345 };
346
347 static const u32 polaris10_golden_common_all[] =
348 {
349 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
350 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
351 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
352 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
353 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
354 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
355 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
356 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
357 };
358
359 static const u32 fiji_golden_common_all[] =
360 {
361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
363 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
364 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
365 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
366 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
367 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
368 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
371 };
372
373 static const u32 golden_settings_fiji_a10[] =
374 {
375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
381 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
382 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
383 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
384 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
385 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
386 };
387
388 static const u32 fiji_mgcg_cgcg_init[] =
389 {
390 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
391 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
394 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
395 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
396 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
397 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
400 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
401 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
402 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
403 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
404 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
405 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
406 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
407 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
408 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
409 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
410 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
411 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
412 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
413 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
414 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
415 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
416 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
417 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
419 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
422 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
423 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
424 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
425 };
426
427 static const u32 golden_settings_iceland_a11[] =
428 {
429 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
430 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
431 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
432 mmGB_GPU_ID, 0x0000000f, 0x00000000,
433 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
434 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
435 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
437 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
438 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
439 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
440 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
441 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
442 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
443 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
444 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
445 };
446
447 static const u32 iceland_golden_common_all[] =
448 {
449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
451 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
452 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
453 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
454 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
455 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
456 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
457 };
458
459 static const u32 iceland_mgcg_cgcg_init[] =
460 {
461 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
462 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
465 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
466 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
467 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
468 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
471 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
472 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
473 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
474 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
475 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
476 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
477 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
478 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
479 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
480 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
481 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
482 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
483 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
484 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
485 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
486 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
487 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
488 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
490 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
491 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
495 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
500 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
508 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
509 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
510 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
511 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
512 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
513 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
514 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
515 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
516 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
517 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
518 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
519 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
520 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
521 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
522 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
523 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
524 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
525 };
526
527 static const u32 cz_golden_settings_a11[] =
528 {
529 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
530 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
531 mmGB_GPU_ID, 0x0000000f, 0x00000000,
532 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
533 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
534 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
535 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
536 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
537 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
538 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
539 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
540 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
541 };
542
543 static const u32 cz_golden_common_all[] =
544 {
545 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
546 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
547 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
548 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
549 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
550 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
551 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
552 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
553 };
554
555 static const u32 cz_mgcg_cgcg_init[] =
556 {
557 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
558 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
559 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
561 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
562 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
563 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
567 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
568 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
569 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
570 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
571 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
572 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
573 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
574 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
575 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
576 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
577 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
578 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
579 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
580 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
581 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
582 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
583 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
584 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
586 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
587 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
614 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
615 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
616 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
617 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
618 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
619 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
620 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
621 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
622 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
623 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
624 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
625 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
626 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
627 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
628 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
629 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
630 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
632 };
633
634 static const u32 stoney_golden_settings_a11[] =
635 {
636 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
637 mmGB_GPU_ID, 0x0000000f, 0x00000000,
638 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
639 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
640 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
641 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
642 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
643 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
644 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
645 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
646 };
647
648 static const u32 stoney_golden_common_all[] =
649 {
650 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
651 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
652 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
653 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
654 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
655 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
656 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
657 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
658 };
659
660 static const u32 stoney_mgcg_cgcg_init[] =
661 {
662 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
663 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
664 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
667 };
668
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
677
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
679 {
680 switch (adev->asic_type) {
681 case CHIP_TOPAZ:
682 amdgpu_program_register_sequence(adev,
683 iceland_mgcg_cgcg_init,
684 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
685 amdgpu_program_register_sequence(adev,
686 golden_settings_iceland_a11,
687 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
688 amdgpu_program_register_sequence(adev,
689 iceland_golden_common_all,
690 (const u32)ARRAY_SIZE(iceland_golden_common_all));
691 break;
692 case CHIP_FIJI:
693 amdgpu_program_register_sequence(adev,
694 fiji_mgcg_cgcg_init,
695 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
696 amdgpu_program_register_sequence(adev,
697 golden_settings_fiji_a10,
698 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
699 amdgpu_program_register_sequence(adev,
700 fiji_golden_common_all,
701 (const u32)ARRAY_SIZE(fiji_golden_common_all));
702 break;
703
704 case CHIP_TONGA:
705 amdgpu_program_register_sequence(adev,
706 tonga_mgcg_cgcg_init,
707 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
708 amdgpu_program_register_sequence(adev,
709 golden_settings_tonga_a11,
710 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
711 amdgpu_program_register_sequence(adev,
712 tonga_golden_common_all,
713 (const u32)ARRAY_SIZE(tonga_golden_common_all));
714 break;
715 case CHIP_POLARIS11:
716 case CHIP_POLARIS12:
717 amdgpu_program_register_sequence(adev,
718 golden_settings_polaris11_a11,
719 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
720 amdgpu_program_register_sequence(adev,
721 polaris11_golden_common_all,
722 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
723 break;
724 case CHIP_POLARIS10:
725 amdgpu_program_register_sequence(adev,
726 golden_settings_polaris10_a11,
727 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
728 amdgpu_program_register_sequence(adev,
729 polaris10_golden_common_all,
730 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
731 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
732 if (adev->pdev->revision == 0xc7 &&
733 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
734 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
735 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
736 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
737 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
738 }
739 break;
740 case CHIP_CARRIZO:
741 amdgpu_program_register_sequence(adev,
742 cz_mgcg_cgcg_init,
743 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
744 amdgpu_program_register_sequence(adev,
745 cz_golden_settings_a11,
746 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
747 amdgpu_program_register_sequence(adev,
748 cz_golden_common_all,
749 (const u32)ARRAY_SIZE(cz_golden_common_all));
750 break;
751 case CHIP_STONEY:
752 amdgpu_program_register_sequence(adev,
753 stoney_mgcg_cgcg_init,
754 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
755 amdgpu_program_register_sequence(adev,
756 stoney_golden_settings_a11,
757 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
758 amdgpu_program_register_sequence(adev,
759 stoney_golden_common_all,
760 (const u32)ARRAY_SIZE(stoney_golden_common_all));
761 break;
762 default:
763 break;
764 }
765 }
766
767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
768 {
769 adev->gfx.scratch.num_reg = 8;
770 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
775 {
776 struct amdgpu_device *adev = ring->adev;
777 uint32_t scratch;
778 uint32_t tmp = 0;
779 unsigned i;
780 int r;
781
782 r = amdgpu_gfx_scratch_get(adev, &scratch);
783 if (r) {
784 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
785 return r;
786 }
787 WREG32(scratch, 0xCAFEDEAD);
788 r = amdgpu_ring_alloc(ring, 3);
789 if (r) {
790 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
791 ring->idx, r);
792 amdgpu_gfx_scratch_free(adev, scratch);
793 return r;
794 }
795 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
796 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
797 amdgpu_ring_write(ring, 0xDEADBEEF);
798 amdgpu_ring_commit(ring);
799
800 for (i = 0; i < adev->usec_timeout; i++) {
801 tmp = RREG32(scratch);
802 if (tmp == 0xDEADBEEF)
803 break;
804 DRM_UDELAY(1);
805 }
806 if (i < adev->usec_timeout) {
807 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
808 ring->idx, i);
809 } else {
810 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811 ring->idx, scratch, tmp);
812 r = -EINVAL;
813 }
814 amdgpu_gfx_scratch_free(adev, scratch);
815 return r;
816 }
817
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
819 {
820 struct amdgpu_device *adev = ring->adev;
821 struct amdgpu_ib ib;
822 struct dma_fence *f = NULL;
823 uint32_t scratch;
824 uint32_t tmp = 0;
825 long r;
826
827 r = amdgpu_gfx_scratch_get(adev, &scratch);
828 if (r) {
829 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
830 return r;
831 }
832 WREG32(scratch, 0xCAFEDEAD);
833 memset(&ib, 0, sizeof(ib));
834 r = amdgpu_ib_get(adev, NULL, 256, &ib);
835 if (r) {
836 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
837 goto err1;
838 }
839 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
840 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
841 ib.ptr[2] = 0xDEADBEEF;
842 ib.length_dw = 3;
843
844 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
845 if (r)
846 goto err2;
847
848 r = dma_fence_wait_timeout(f, false, timeout);
849 if (r == 0) {
850 DRM_ERROR("amdgpu: IB test timed out.\n");
851 r = -ETIMEDOUT;
852 goto err2;
853 } else if (r < 0) {
854 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
855 goto err2;
856 }
857 tmp = RREG32(scratch);
858 if (tmp == 0xDEADBEEF) {
859 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
860 r = 0;
861 } else {
862 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
863 scratch, tmp);
864 r = -EINVAL;
865 }
866 err2:
867 amdgpu_ib_free(adev, &ib, NULL);
868 dma_fence_put(f);
869 err1:
870 amdgpu_gfx_scratch_free(adev, scratch);
871 return r;
872 }
873
874
875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
876 {
877 release_firmware(adev->gfx.pfp_fw);
878 adev->gfx.pfp_fw = NULL;
879 release_firmware(adev->gfx.me_fw);
880 adev->gfx.me_fw = NULL;
881 release_firmware(adev->gfx.ce_fw);
882 adev->gfx.ce_fw = NULL;
883 release_firmware(adev->gfx.rlc_fw);
884 adev->gfx.rlc_fw = NULL;
885 release_firmware(adev->gfx.mec_fw);
886 adev->gfx.mec_fw = NULL;
887 if ((adev->asic_type != CHIP_STONEY) &&
888 (adev->asic_type != CHIP_TOPAZ))
889 release_firmware(adev->gfx.mec2_fw);
890 adev->gfx.mec2_fw = NULL;
891
892 kfree(adev->gfx.rlc.register_list_format);
893 }
894
895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
896 {
897 const char *chip_name;
898 char fw_name[30];
899 int err;
900 struct amdgpu_firmware_info *info = NULL;
901 const struct common_firmware_header *header = NULL;
902 const struct gfx_firmware_header_v1_0 *cp_hdr;
903 const struct rlc_firmware_header_v2_0 *rlc_hdr;
904 unsigned int *tmp = NULL, i;
905
906 DRM_DEBUG("\n");
907
908 switch (adev->asic_type) {
909 case CHIP_TOPAZ:
910 chip_name = "topaz";
911 break;
912 case CHIP_TONGA:
913 chip_name = "tonga";
914 break;
915 case CHIP_CARRIZO:
916 chip_name = "carrizo";
917 break;
918 case CHIP_FIJI:
919 chip_name = "fiji";
920 break;
921 case CHIP_POLARIS11:
922 chip_name = "polaris11";
923 break;
924 case CHIP_POLARIS10:
925 chip_name = "polaris10";
926 break;
927 case CHIP_POLARIS12:
928 chip_name = "polaris12";
929 break;
930 case CHIP_STONEY:
931 chip_name = "stoney";
932 break;
933 default:
934 BUG();
935 }
936
937 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
938 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
939 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940 if (err == -ENOENT) {
941 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
942 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
943 }
944 } else {
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
946 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
947 }
948 if (err)
949 goto out;
950 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
951 if (err)
952 goto out;
953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
954 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
958 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
959 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960 if (err == -ENOENT) {
961 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
962 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
963 }
964 } else {
965 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
966 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
967 }
968 if (err)
969 goto out;
970 err = amdgpu_ucode_validate(adev->gfx.me_fw);
971 if (err)
972 goto out;
973 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
974 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
975
976 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
977
978 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
979 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
980 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981 if (err == -ENOENT) {
982 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
983 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
984 }
985 } else {
986 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
987 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
988 }
989 if (err)
990 goto out;
991 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
992 if (err)
993 goto out;
994 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
995 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997
998 /*
999 * Support for MCBP/Virtualization in combination with chained IBs is
1000 * formal released on feature version #46
1001 */
1002 if (adev->gfx.ce_feature_version >= 46 &&
1003 adev->gfx.pfp_feature_version >= 46) {
1004 adev->virt.chained_ib_support = true;
1005 DRM_INFO("Chained IB support enabled!\n");
1006 } else
1007 adev->virt.chained_ib_support = false;
1008
1009 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1011 if (err)
1012 goto out;
1013 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1017
1018 adev->gfx.rlc.save_and_restore_offset =
1019 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020 adev->gfx.rlc.clear_state_descriptor_offset =
1021 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022 adev->gfx.rlc.avail_scratch_ram_locations =
1023 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024 adev->gfx.rlc.reg_restore_list_size =
1025 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026 adev->gfx.rlc.reg_list_format_start =
1027 le32_to_cpu(rlc_hdr->reg_list_format_start);
1028 adev->gfx.rlc.reg_list_format_separate_start =
1029 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030 adev->gfx.rlc.starting_offsets_start =
1031 le32_to_cpu(rlc_hdr->starting_offsets_start);
1032 adev->gfx.rlc.reg_list_format_size_bytes =
1033 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034 adev->gfx.rlc.reg_list_size_bytes =
1035 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1036
1037 adev->gfx.rlc.register_list_format =
1038 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1040
1041 if (!adev->gfx.rlc.register_list_format) {
1042 err = -ENOMEM;
1043 goto out;
1044 }
1045
1046 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1050
1051 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1052
1053 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1057
1058 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061 if (err == -ENOENT) {
1062 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1064 }
1065 } else {
1066 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1068 }
1069 if (err)
1070 goto out;
1071 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1072 if (err)
1073 goto out;
1074 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1077
1078 if ((adev->asic_type != CHIP_STONEY) &&
1079 (adev->asic_type != CHIP_TOPAZ)) {
1080 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083 if (err == -ENOENT) {
1084 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1086 }
1087 } else {
1088 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1090 }
1091 if (!err) {
1092 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1093 if (err)
1094 goto out;
1095 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096 adev->gfx.mec2_fw->data;
1097 adev->gfx.mec2_fw_version =
1098 le32_to_cpu(cp_hdr->header.ucode_version);
1099 adev->gfx.mec2_feature_version =
1100 le32_to_cpu(cp_hdr->ucode_feature_version);
1101 } else {
1102 err = 0;
1103 adev->gfx.mec2_fw = NULL;
1104 }
1105 }
1106
1107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110 info->fw = adev->gfx.pfp_fw;
1111 header = (const struct common_firmware_header *)info->fw->data;
1112 adev->firmware.fw_size +=
1113 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114
1115 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117 info->fw = adev->gfx.me_fw;
1118 header = (const struct common_firmware_header *)info->fw->data;
1119 adev->firmware.fw_size +=
1120 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121
1122 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124 info->fw = adev->gfx.ce_fw;
1125 header = (const struct common_firmware_header *)info->fw->data;
1126 adev->firmware.fw_size +=
1127 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128
1129 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131 info->fw = adev->gfx.rlc_fw;
1132 header = (const struct common_firmware_header *)info->fw->data;
1133 adev->firmware.fw_size +=
1134 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1135
1136 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138 info->fw = adev->gfx.mec_fw;
1139 header = (const struct common_firmware_header *)info->fw->data;
1140 adev->firmware.fw_size +=
1141 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1142
1143 /* we need account JT in */
1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145 adev->firmware.fw_size +=
1146 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1147
1148 if (amdgpu_sriov_vf(adev)) {
1149 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151 info->fw = adev->gfx.mec_fw;
1152 adev->firmware.fw_size +=
1153 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1154 }
1155
1156 if (adev->gfx.mec2_fw) {
1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159 info->fw = adev->gfx.mec2_fw;
1160 header = (const struct common_firmware_header *)info->fw->data;
1161 adev->firmware.fw_size +=
1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163 }
1164
1165 }
1166
1167 out:
1168 if (err) {
1169 dev_err(adev->dev,
1170 "gfx8: Failed to load firmware \"%s\"\n",
1171 fw_name);
1172 release_firmware(adev->gfx.pfp_fw);
1173 adev->gfx.pfp_fw = NULL;
1174 release_firmware(adev->gfx.me_fw);
1175 adev->gfx.me_fw = NULL;
1176 release_firmware(adev->gfx.ce_fw);
1177 adev->gfx.ce_fw = NULL;
1178 release_firmware(adev->gfx.rlc_fw);
1179 adev->gfx.rlc_fw = NULL;
1180 release_firmware(adev->gfx.mec_fw);
1181 adev->gfx.mec_fw = NULL;
1182 release_firmware(adev->gfx.mec2_fw);
1183 adev->gfx.mec2_fw = NULL;
1184 }
1185 return err;
1186 }
1187
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189 volatile u32 *buffer)
1190 {
1191 u32 count = 0, i;
1192 const struct cs_section_def *sect = NULL;
1193 const struct cs_extent_def *ext = NULL;
1194
1195 if (adev->gfx.rlc.cs_data == NULL)
1196 return;
1197 if (buffer == NULL)
1198 return;
1199
1200 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1202
1203 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204 buffer[count++] = cpu_to_le32(0x80000000);
1205 buffer[count++] = cpu_to_le32(0x80000000);
1206
1207 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208 for (ext = sect->section; ext->extent != NULL; ++ext) {
1209 if (sect->id == SECT_CONTEXT) {
1210 buffer[count++] =
1211 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212 buffer[count++] = cpu_to_le32(ext->reg_index -
1213 PACKET3_SET_CONTEXT_REG_START);
1214 for (i = 0; i < ext->reg_count; i++)
1215 buffer[count++] = cpu_to_le32(ext->extent[i]);
1216 } else {
1217 return;
1218 }
1219 }
1220 }
1221
1222 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224 PACKET3_SET_CONTEXT_REG_START);
1225 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1227
1228 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1230
1231 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232 buffer[count++] = cpu_to_le32(0);
1233 }
1234
1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1236 {
1237 const __le32 *fw_data;
1238 volatile u32 *dst_ptr;
1239 int me, i, max_me = 4;
1240 u32 bo_offset = 0;
1241 u32 table_offset, table_size;
1242
1243 if (adev->asic_type == CHIP_CARRIZO)
1244 max_me = 5;
1245
1246 /* write the cp table buffer */
1247 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248 for (me = 0; me < max_me; me++) {
1249 if (me == 0) {
1250 const struct gfx_firmware_header_v1_0 *hdr =
1251 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252 fw_data = (const __le32 *)
1253 (adev->gfx.ce_fw->data +
1254 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255 table_offset = le32_to_cpu(hdr->jt_offset);
1256 table_size = le32_to_cpu(hdr->jt_size);
1257 } else if (me == 1) {
1258 const struct gfx_firmware_header_v1_0 *hdr =
1259 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260 fw_data = (const __le32 *)
1261 (adev->gfx.pfp_fw->data +
1262 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263 table_offset = le32_to_cpu(hdr->jt_offset);
1264 table_size = le32_to_cpu(hdr->jt_size);
1265 } else if (me == 2) {
1266 const struct gfx_firmware_header_v1_0 *hdr =
1267 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268 fw_data = (const __le32 *)
1269 (adev->gfx.me_fw->data +
1270 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271 table_offset = le32_to_cpu(hdr->jt_offset);
1272 table_size = le32_to_cpu(hdr->jt_size);
1273 } else if (me == 3) {
1274 const struct gfx_firmware_header_v1_0 *hdr =
1275 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276 fw_data = (const __le32 *)
1277 (adev->gfx.mec_fw->data +
1278 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279 table_offset = le32_to_cpu(hdr->jt_offset);
1280 table_size = le32_to_cpu(hdr->jt_size);
1281 } else if (me == 4) {
1282 const struct gfx_firmware_header_v1_0 *hdr =
1283 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284 fw_data = (const __le32 *)
1285 (adev->gfx.mec2_fw->data +
1286 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287 table_offset = le32_to_cpu(hdr->jt_offset);
1288 table_size = le32_to_cpu(hdr->jt_size);
1289 }
1290
1291 for (i = 0; i < table_size; i ++) {
1292 dst_ptr[bo_offset + i] =
1293 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1294 }
1295
1296 bo_offset += table_size;
1297 }
1298 }
1299
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1301 {
1302 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1304 }
1305
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307 {
1308 volatile u32 *dst_ptr;
1309 u32 dws;
1310 const struct cs_section_def *cs_data;
1311 int r;
1312
1313 adev->gfx.rlc.cs_data = vi_cs_data;
1314
1315 cs_data = adev->gfx.rlc.cs_data;
1316
1317 if (cs_data) {
1318 /* clear state block */
1319 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1320
1321 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322 AMDGPU_GEM_DOMAIN_VRAM,
1323 &adev->gfx.rlc.clear_state_obj,
1324 &adev->gfx.rlc.clear_state_gpu_addr,
1325 (void **)&adev->gfx.rlc.cs_ptr);
1326 if (r) {
1327 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328 gfx_v8_0_rlc_fini(adev);
1329 return r;
1330 }
1331
1332 /* set up the cs buffer */
1333 dst_ptr = adev->gfx.rlc.cs_ptr;
1334 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1337 }
1338
1339 if ((adev->asic_type == CHIP_CARRIZO) ||
1340 (adev->asic_type == CHIP_STONEY)) {
1341 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344 &adev->gfx.rlc.cp_table_obj,
1345 &adev->gfx.rlc.cp_table_gpu_addr,
1346 (void **)&adev->gfx.rlc.cp_table_ptr);
1347 if (r) {
1348 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1349 return r;
1350 }
1351
1352 cz_init_cp_jump_table(adev);
1353
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356 }
1357
1358 return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1364 }
1365
1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1367 {
1368 int r;
1369 u32 *hpd;
1370 size_t mec_hpd_size;
1371
1372 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1373
1374 /* take ownership of the relevant compute queues */
1375 amdgpu_gfx_compute_queue_acquire(adev);
1376
1377 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1378
1379 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380 AMDGPU_GEM_DOMAIN_GTT,
1381 &adev->gfx.mec.hpd_eop_obj,
1382 &adev->gfx.mec.hpd_eop_gpu_addr,
1383 (void **)&hpd);
1384 if (r) {
1385 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1386 return r;
1387 }
1388
1389 memset(hpd, 0, mec_hpd_size);
1390
1391 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1393
1394 return 0;
1395 }
1396
1397 static const u32 vgpr_init_compute_shader[] =
1398 {
1399 0x7e000209, 0x7e020208,
1400 0x7e040207, 0x7e060206,
1401 0x7e080205, 0x7e0a0204,
1402 0x7e0c0203, 0x7e0e0202,
1403 0x7e100201, 0x7e120200,
1404 0x7e140209, 0x7e160208,
1405 0x7e180207, 0x7e1a0206,
1406 0x7e1c0205, 0x7e1e0204,
1407 0x7e200203, 0x7e220202,
1408 0x7e240201, 0x7e260200,
1409 0x7e280209, 0x7e2a0208,
1410 0x7e2c0207, 0x7e2e0206,
1411 0x7e300205, 0x7e320204,
1412 0x7e340203, 0x7e360202,
1413 0x7e380201, 0x7e3a0200,
1414 0x7e3c0209, 0x7e3e0208,
1415 0x7e400207, 0x7e420206,
1416 0x7e440205, 0x7e460204,
1417 0x7e480203, 0x7e4a0202,
1418 0x7e4c0201, 0x7e4e0200,
1419 0x7e500209, 0x7e520208,
1420 0x7e540207, 0x7e560206,
1421 0x7e580205, 0x7e5a0204,
1422 0x7e5c0203, 0x7e5e0202,
1423 0x7e600201, 0x7e620200,
1424 0x7e640209, 0x7e660208,
1425 0x7e680207, 0x7e6a0206,
1426 0x7e6c0205, 0x7e6e0204,
1427 0x7e700203, 0x7e720202,
1428 0x7e740201, 0x7e760200,
1429 0x7e780209, 0x7e7a0208,
1430 0x7e7c0207, 0x7e7e0206,
1431 0xbf8a0000, 0xbf810000,
1432 };
1433
1434 static const u32 sgpr_init_compute_shader[] =
1435 {
1436 0xbe8a0100, 0xbe8c0102,
1437 0xbe8e0104, 0xbe900106,
1438 0xbe920108, 0xbe940100,
1439 0xbe960102, 0xbe980104,
1440 0xbe9a0106, 0xbe9c0108,
1441 0xbe9e0100, 0xbea00102,
1442 0xbea20104, 0xbea40106,
1443 0xbea60108, 0xbea80100,
1444 0xbeaa0102, 0xbeac0104,
1445 0xbeae0106, 0xbeb00108,
1446 0xbeb20100, 0xbeb40102,
1447 0xbeb60104, 0xbeb80106,
1448 0xbeba0108, 0xbebc0100,
1449 0xbebe0102, 0xbec00104,
1450 0xbec20106, 0xbec40108,
1451 0xbec60100, 0xbec80102,
1452 0xbee60004, 0xbee70005,
1453 0xbeea0006, 0xbeeb0007,
1454 0xbee80008, 0xbee90009,
1455 0xbefc0000, 0xbf8a0000,
1456 0xbf810000, 0x00000000,
1457 };
1458
1459 static const u32 vgpr_init_regs[] =
1460 {
1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462 mmCOMPUTE_RESOURCE_LIMITS, 0,
1463 mmCOMPUTE_NUM_THREAD_X, 256*4,
1464 mmCOMPUTE_NUM_THREAD_Y, 1,
1465 mmCOMPUTE_NUM_THREAD_Z, 1,
1466 mmCOMPUTE_PGM_RSRC2, 20,
1467 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477 };
1478
1479 static const u32 sgpr1_init_regs[] =
1480 {
1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1482 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483 mmCOMPUTE_NUM_THREAD_X, 256*5,
1484 mmCOMPUTE_NUM_THREAD_Y, 1,
1485 mmCOMPUTE_NUM_THREAD_Z, 1,
1486 mmCOMPUTE_PGM_RSRC2, 20,
1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1497 };
1498
1499 static const u32 sgpr2_init_regs[] =
1500 {
1501 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1502 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1503 mmCOMPUTE_NUM_THREAD_X, 256*5,
1504 mmCOMPUTE_NUM_THREAD_Y, 1,
1505 mmCOMPUTE_NUM_THREAD_Z, 1,
1506 mmCOMPUTE_PGM_RSRC2, 20,
1507 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1508 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1509 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1510 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1511 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1512 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1513 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1514 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1515 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1516 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1517 };
1518
1519 static const u32 sec_ded_counter_registers[] =
1520 {
1521 mmCPC_EDC_ATC_CNT,
1522 mmCPC_EDC_SCRATCH_CNT,
1523 mmCPC_EDC_UCODE_CNT,
1524 mmCPF_EDC_ATC_CNT,
1525 mmCPF_EDC_ROQ_CNT,
1526 mmCPF_EDC_TAG_CNT,
1527 mmCPG_EDC_ATC_CNT,
1528 mmCPG_EDC_DMA_CNT,
1529 mmCPG_EDC_TAG_CNT,
1530 mmDC_EDC_CSINVOC_CNT,
1531 mmDC_EDC_RESTORE_CNT,
1532 mmDC_EDC_STATE_CNT,
1533 mmGDS_EDC_CNT,
1534 mmGDS_EDC_GRBM_CNT,
1535 mmGDS_EDC_OA_DED,
1536 mmSPI_EDC_CNT,
1537 mmSQC_ATC_EDC_GATCL1_CNT,
1538 mmSQC_EDC_CNT,
1539 mmSQ_EDC_DED_CNT,
1540 mmSQ_EDC_INFO,
1541 mmSQ_EDC_SEC_CNT,
1542 mmTCC_EDC_CNT,
1543 mmTCP_ATC_EDC_GATCL1_CNT,
1544 mmTCP_EDC_CNT,
1545 mmTD_EDC_CNT
1546 };
1547
1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1549 {
1550 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1551 struct amdgpu_ib ib;
1552 struct dma_fence *f = NULL;
1553 int r, i;
1554 u32 tmp;
1555 unsigned total_size, vgpr_offset, sgpr_offset;
1556 u64 gpu_addr;
1557
1558 /* only supported on CZ */
1559 if (adev->asic_type != CHIP_CARRIZO)
1560 return 0;
1561
1562 /* bail if the compute ring is not ready */
1563 if (!ring->ready)
1564 return 0;
1565
1566 tmp = RREG32(mmGB_EDC_MODE);
1567 WREG32(mmGB_EDC_MODE, 0);
1568
1569 total_size =
1570 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1571 total_size +=
1572 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1573 total_size +=
1574 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1575 total_size = ALIGN(total_size, 256);
1576 vgpr_offset = total_size;
1577 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1578 sgpr_offset = total_size;
1579 total_size += sizeof(sgpr_init_compute_shader);
1580
1581 /* allocate an indirect buffer to put the commands in */
1582 memset(&ib, 0, sizeof(ib));
1583 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1584 if (r) {
1585 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1586 return r;
1587 }
1588
1589 /* load the compute shaders */
1590 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1591 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1592
1593 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1594 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1595
1596 /* init the ib length to 0 */
1597 ib.length_dw = 0;
1598
1599 /* VGPR */
1600 /* write the register state for the compute dispatch */
1601 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1603 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1604 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1605 }
1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1609 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1610 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1611 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1612
1613 /* write dispatch packet */
1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1615 ib.ptr[ib.length_dw++] = 8; /* x */
1616 ib.ptr[ib.length_dw++] = 1; /* y */
1617 ib.ptr[ib.length_dw++] = 1; /* z */
1618 ib.ptr[ib.length_dw++] =
1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1620
1621 /* write CS partial flush packet */
1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1623 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1624
1625 /* SGPR1 */
1626 /* write the register state for the compute dispatch */
1627 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1630 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1631 }
1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639 /* write dispatch packet */
1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641 ib.ptr[ib.length_dw++] = 8; /* x */
1642 ib.ptr[ib.length_dw++] = 1; /* y */
1643 ib.ptr[ib.length_dw++] = 1; /* z */
1644 ib.ptr[ib.length_dw++] =
1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647 /* write CS partial flush packet */
1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651 /* SGPR2 */
1652 /* write the register state for the compute dispatch */
1653 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1657 }
1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665 /* write dispatch packet */
1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 ib.ptr[ib.length_dw++] = 8; /* x */
1668 ib.ptr[ib.length_dw++] = 1; /* y */
1669 ib.ptr[ib.length_dw++] = 1; /* z */
1670 ib.ptr[ib.length_dw++] =
1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673 /* write CS partial flush packet */
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677 /* shedule the ib on the ring */
1678 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1679 if (r) {
1680 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1681 goto fail;
1682 }
1683
1684 /* wait for the GPU to finish processing the IB */
1685 r = dma_fence_wait(f, false);
1686 if (r) {
1687 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1688 goto fail;
1689 }
1690
1691 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1692 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1693 WREG32(mmGB_EDC_MODE, tmp);
1694
1695 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1696 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1697 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1698
1699
1700 /* read back registers to clear the counters */
1701 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1702 RREG32(sec_ded_counter_registers[i]);
1703
1704 fail:
1705 amdgpu_ib_free(adev, &ib, NULL);
1706 dma_fence_put(f);
1707
1708 return r;
1709 }
1710
1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1712 {
1713 u32 gb_addr_config;
1714 u32 mc_shared_chmap, mc_arb_ramcfg;
1715 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1716 u32 tmp;
1717 int ret;
1718
1719 switch (adev->asic_type) {
1720 case CHIP_TOPAZ:
1721 adev->gfx.config.max_shader_engines = 1;
1722 adev->gfx.config.max_tile_pipes = 2;
1723 adev->gfx.config.max_cu_per_sh = 6;
1724 adev->gfx.config.max_sh_per_se = 1;
1725 adev->gfx.config.max_backends_per_se = 2;
1726 adev->gfx.config.max_texture_channel_caches = 2;
1727 adev->gfx.config.max_gprs = 256;
1728 adev->gfx.config.max_gs_threads = 32;
1729 adev->gfx.config.max_hw_contexts = 8;
1730
1731 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1736 break;
1737 case CHIP_FIJI:
1738 adev->gfx.config.max_shader_engines = 4;
1739 adev->gfx.config.max_tile_pipes = 16;
1740 adev->gfx.config.max_cu_per_sh = 16;
1741 adev->gfx.config.max_sh_per_se = 1;
1742 adev->gfx.config.max_backends_per_se = 4;
1743 adev->gfx.config.max_texture_channel_caches = 16;
1744 adev->gfx.config.max_gprs = 256;
1745 adev->gfx.config.max_gs_threads = 32;
1746 adev->gfx.config.max_hw_contexts = 8;
1747
1748 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1753 break;
1754 case CHIP_POLARIS11:
1755 case CHIP_POLARIS12:
1756 ret = amdgpu_atombios_get_gfx_info(adev);
1757 if (ret)
1758 return ret;
1759 adev->gfx.config.max_gprs = 256;
1760 adev->gfx.config.max_gs_threads = 32;
1761 adev->gfx.config.max_hw_contexts = 8;
1762
1763 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1768 break;
1769 case CHIP_POLARIS10:
1770 ret = amdgpu_atombios_get_gfx_info(adev);
1771 if (ret)
1772 return ret;
1773 adev->gfx.config.max_gprs = 256;
1774 adev->gfx.config.max_gs_threads = 32;
1775 adev->gfx.config.max_hw_contexts = 8;
1776
1777 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782 break;
1783 case CHIP_TONGA:
1784 adev->gfx.config.max_shader_engines = 4;
1785 adev->gfx.config.max_tile_pipes = 8;
1786 adev->gfx.config.max_cu_per_sh = 8;
1787 adev->gfx.config.max_sh_per_se = 1;
1788 adev->gfx.config.max_backends_per_se = 2;
1789 adev->gfx.config.max_texture_channel_caches = 8;
1790 adev->gfx.config.max_gprs = 256;
1791 adev->gfx.config.max_gs_threads = 32;
1792 adev->gfx.config.max_hw_contexts = 8;
1793
1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799 break;
1800 case CHIP_CARRIZO:
1801 adev->gfx.config.max_shader_engines = 1;
1802 adev->gfx.config.max_tile_pipes = 2;
1803 adev->gfx.config.max_sh_per_se = 1;
1804 adev->gfx.config.max_backends_per_se = 2;
1805 adev->gfx.config.max_cu_per_sh = 8;
1806 adev->gfx.config.max_texture_channel_caches = 2;
1807 adev->gfx.config.max_gprs = 256;
1808 adev->gfx.config.max_gs_threads = 32;
1809 adev->gfx.config.max_hw_contexts = 8;
1810
1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816 break;
1817 case CHIP_STONEY:
1818 adev->gfx.config.max_shader_engines = 1;
1819 adev->gfx.config.max_tile_pipes = 2;
1820 adev->gfx.config.max_sh_per_se = 1;
1821 adev->gfx.config.max_backends_per_se = 1;
1822 adev->gfx.config.max_cu_per_sh = 3;
1823 adev->gfx.config.max_texture_channel_caches = 2;
1824 adev->gfx.config.max_gprs = 256;
1825 adev->gfx.config.max_gs_threads = 16;
1826 adev->gfx.config.max_hw_contexts = 8;
1827
1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833 break;
1834 default:
1835 adev->gfx.config.max_shader_engines = 2;
1836 adev->gfx.config.max_tile_pipes = 4;
1837 adev->gfx.config.max_cu_per_sh = 2;
1838 adev->gfx.config.max_sh_per_se = 1;
1839 adev->gfx.config.max_backends_per_se = 2;
1840 adev->gfx.config.max_texture_channel_caches = 4;
1841 adev->gfx.config.max_gprs = 256;
1842 adev->gfx.config.max_gs_threads = 32;
1843 adev->gfx.config.max_hw_contexts = 8;
1844
1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850 break;
1851 }
1852
1853 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856
1857 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858 adev->gfx.config.mem_max_burst_length_bytes = 256;
1859 if (adev->flags & AMD_IS_APU) {
1860 /* Get memory bank mapping mode. */
1861 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864
1865 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868
1869 /* Validate settings in case only one DIMM installed. */
1870 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871 dimm00_addr_map = 0;
1872 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873 dimm01_addr_map = 0;
1874 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875 dimm10_addr_map = 0;
1876 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877 dimm11_addr_map = 0;
1878
1879 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882 adev->gfx.config.mem_row_size_in_kb = 2;
1883 else
1884 adev->gfx.config.mem_row_size_in_kb = 1;
1885 } else {
1886 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888 if (adev->gfx.config.mem_row_size_in_kb > 4)
1889 adev->gfx.config.mem_row_size_in_kb = 4;
1890 }
1891
1892 adev->gfx.config.shader_engine_tile_size = 32;
1893 adev->gfx.config.num_gpus = 1;
1894 adev->gfx.config.multi_gpu_tile_size = 64;
1895
1896 /* fix up row size */
1897 switch (adev->gfx.config.mem_row_size_in_kb) {
1898 case 1:
1899 default:
1900 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1901 break;
1902 case 2:
1903 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1904 break;
1905 case 4:
1906 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1907 break;
1908 }
1909 adev->gfx.config.gb_addr_config = gb_addr_config;
1910
1911 return 0;
1912 }
1913
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915 int mec, int pipe, int queue)
1916 {
1917 int r;
1918 unsigned irq_type;
1919 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920
1921 ring = &adev->gfx.compute_ring[ring_id];
1922
1923 /* mec0 is me1 */
1924 ring->me = mec + 1;
1925 ring->pipe = pipe;
1926 ring->queue = queue;
1927
1928 ring->ring_obj = NULL;
1929 ring->use_doorbell = true;
1930 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1931 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932 + (ring_id * GFX8_MEC_HPD_SIZE);
1933 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934
1935 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1937 + ring->pipe;
1938
1939 /* type-2 packets are deprecated on MEC, use type-3 instead */
1940 r = amdgpu_ring_init(adev, ring, 1024,
1941 &adev->gfx.eop_irq, irq_type);
1942 if (r)
1943 return r;
1944
1945
1946 return 0;
1947 }
1948
1949 static int gfx_v8_0_sw_init(void *handle)
1950 {
1951 int i, j, k, r, ring_id;
1952 struct amdgpu_ring *ring;
1953 struct amdgpu_kiq *kiq;
1954 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1955
1956 switch (adev->asic_type) {
1957 case CHIP_FIJI:
1958 case CHIP_TONGA:
1959 case CHIP_POLARIS11:
1960 case CHIP_POLARIS12:
1961 case CHIP_POLARIS10:
1962 case CHIP_CARRIZO:
1963 adev->gfx.mec.num_mec = 2;
1964 break;
1965 case CHIP_TOPAZ:
1966 case CHIP_STONEY:
1967 default:
1968 adev->gfx.mec.num_mec = 1;
1969 break;
1970 }
1971
1972 adev->gfx.mec.num_pipe_per_mec = 4;
1973 adev->gfx.mec.num_queue_per_pipe = 8;
1974
1975 /* KIQ event */
1976 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1977 if (r)
1978 return r;
1979
1980 /* EOP Event */
1981 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1982 if (r)
1983 return r;
1984
1985 /* Privileged reg */
1986 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1987 &adev->gfx.priv_reg_irq);
1988 if (r)
1989 return r;
1990
1991 /* Privileged inst */
1992 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1993 &adev->gfx.priv_inst_irq);
1994 if (r)
1995 return r;
1996
1997 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1998
1999 gfx_v8_0_scratch_init(adev);
2000
2001 r = gfx_v8_0_init_microcode(adev);
2002 if (r) {
2003 DRM_ERROR("Failed to load gfx firmware!\n");
2004 return r;
2005 }
2006
2007 r = gfx_v8_0_rlc_init(adev);
2008 if (r) {
2009 DRM_ERROR("Failed to init rlc BOs!\n");
2010 return r;
2011 }
2012
2013 r = gfx_v8_0_mec_init(adev);
2014 if (r) {
2015 DRM_ERROR("Failed to init MEC BOs!\n");
2016 return r;
2017 }
2018
2019 /* set up the gfx ring */
2020 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2021 ring = &adev->gfx.gfx_ring[i];
2022 ring->ring_obj = NULL;
2023 sprintf(ring->name, "gfx");
2024 /* no gfx doorbells on iceland */
2025 if (adev->asic_type != CHIP_TOPAZ) {
2026 ring->use_doorbell = true;
2027 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2028 }
2029
2030 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2031 AMDGPU_CP_IRQ_GFX_EOP);
2032 if (r)
2033 return r;
2034 }
2035
2036
2037 /* set up the compute queues - allocate horizontally across pipes */
2038 ring_id = 0;
2039 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2040 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2041 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2042 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2043 continue;
2044
2045 r = gfx_v8_0_compute_ring_init(adev,
2046 ring_id,
2047 i, k, j);
2048 if (r)
2049 return r;
2050
2051 ring_id++;
2052 }
2053 }
2054 }
2055
2056 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2057 if (r) {
2058 DRM_ERROR("Failed to init KIQ BOs!\n");
2059 return r;
2060 }
2061
2062 kiq = &adev->gfx.kiq;
2063 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2064 if (r)
2065 return r;
2066
2067 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2068 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2069 if (r)
2070 return r;
2071
2072 /* reserve GDS, GWS and OA resource for gfx */
2073 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2074 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2075 &adev->gds.gds_gfx_bo, NULL, NULL);
2076 if (r)
2077 return r;
2078
2079 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2080 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2081 &adev->gds.gws_gfx_bo, NULL, NULL);
2082 if (r)
2083 return r;
2084
2085 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2086 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2087 &adev->gds.oa_gfx_bo, NULL, NULL);
2088 if (r)
2089 return r;
2090
2091 adev->gfx.ce_ram_size = 0x8000;
2092
2093 r = gfx_v8_0_gpu_early_init(adev);
2094 if (r)
2095 return r;
2096
2097 return 0;
2098 }
2099
2100 static int gfx_v8_0_sw_fini(void *handle)
2101 {
2102 int i;
2103 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2104
2105 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2106 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2107 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2108
2109 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2110 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2111 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2112 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2113
2114 amdgpu_gfx_compute_mqd_sw_fini(adev);
2115 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2116 amdgpu_gfx_kiq_fini(adev);
2117 amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
2118
2119 gfx_v8_0_mec_fini(adev);
2120 gfx_v8_0_rlc_fini(adev);
2121 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2122 &adev->gfx.rlc.clear_state_gpu_addr,
2123 (void **)&adev->gfx.rlc.cs_ptr);
2124 if ((adev->asic_type == CHIP_CARRIZO) ||
2125 (adev->asic_type == CHIP_STONEY)) {
2126 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2127 &adev->gfx.rlc.cp_table_gpu_addr,
2128 (void **)&adev->gfx.rlc.cp_table_ptr);
2129 }
2130 gfx_v8_0_free_microcode(adev);
2131
2132 return 0;
2133 }
2134
2135 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2136 {
2137 uint32_t *modearray, *mod2array;
2138 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2139 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2140 u32 reg_offset;
2141
2142 modearray = adev->gfx.config.tile_mode_array;
2143 mod2array = adev->gfx.config.macrotile_mode_array;
2144
2145 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2146 modearray[reg_offset] = 0;
2147
2148 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2149 mod2array[reg_offset] = 0;
2150
2151 switch (adev->asic_type) {
2152 case CHIP_TOPAZ:
2153 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154 PIPE_CONFIG(ADDR_SURF_P2) |
2155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2157 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158 PIPE_CONFIG(ADDR_SURF_P2) |
2159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2161 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162 PIPE_CONFIG(ADDR_SURF_P2) |
2163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2165 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166 PIPE_CONFIG(ADDR_SURF_P2) |
2167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2169 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2170 PIPE_CONFIG(ADDR_SURF_P2) |
2171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2173 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2174 PIPE_CONFIG(ADDR_SURF_P2) |
2175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2177 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2181 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2182 PIPE_CONFIG(ADDR_SURF_P2));
2183 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2184 PIPE_CONFIG(ADDR_SURF_P2) |
2185 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2188 PIPE_CONFIG(ADDR_SURF_P2) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2191 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2192 PIPE_CONFIG(ADDR_SURF_P2) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2195 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2196 PIPE_CONFIG(ADDR_SURF_P2) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2200 PIPE_CONFIG(ADDR_SURF_P2) |
2201 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2204 PIPE_CONFIG(ADDR_SURF_P2) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2211 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2219 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2220 PIPE_CONFIG(ADDR_SURF_P2) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2223 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2224 PIPE_CONFIG(ADDR_SURF_P2) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2227 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2231 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2235 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2236 PIPE_CONFIG(ADDR_SURF_P2) |
2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2240 PIPE_CONFIG(ADDR_SURF_P2) |
2241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2243 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244 PIPE_CONFIG(ADDR_SURF_P2) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2247 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2248 PIPE_CONFIG(ADDR_SURF_P2) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2251 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2252 PIPE_CONFIG(ADDR_SURF_P2) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2255
2256 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259 NUM_BANKS(ADDR_SURF_8_BANK));
2260 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263 NUM_BANKS(ADDR_SURF_8_BANK));
2264 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267 NUM_BANKS(ADDR_SURF_8_BANK));
2268 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2271 NUM_BANKS(ADDR_SURF_8_BANK));
2272 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275 NUM_BANKS(ADDR_SURF_8_BANK));
2276 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2279 NUM_BANKS(ADDR_SURF_8_BANK));
2280 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283 NUM_BANKS(ADDR_SURF_8_BANK));
2284 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 NUM_BANKS(ADDR_SURF_16_BANK));
2288 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291 NUM_BANKS(ADDR_SURF_16_BANK));
2292 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295 NUM_BANKS(ADDR_SURF_16_BANK));
2296 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299 NUM_BANKS(ADDR_SURF_16_BANK));
2300 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303 NUM_BANKS(ADDR_SURF_16_BANK));
2304 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2307 NUM_BANKS(ADDR_SURF_16_BANK));
2308 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2311 NUM_BANKS(ADDR_SURF_8_BANK));
2312
2313 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2314 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2315 reg_offset != 23)
2316 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2317
2318 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2319 if (reg_offset != 7)
2320 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2321
2322 break;
2323 case CHIP_FIJI:
2324 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2336 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2340 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2344 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2345 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2358 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2366 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2367 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2370 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2371 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2372 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2374 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2390 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2402 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2406 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2410 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2414 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2415 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2418 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446
2447 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450 NUM_BANKS(ADDR_SURF_8_BANK));
2451 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454 NUM_BANKS(ADDR_SURF_8_BANK));
2455 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2458 NUM_BANKS(ADDR_SURF_8_BANK));
2459 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2462 NUM_BANKS(ADDR_SURF_8_BANK));
2463 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466 NUM_BANKS(ADDR_SURF_8_BANK));
2467 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 NUM_BANKS(ADDR_SURF_8_BANK));
2471 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2474 NUM_BANKS(ADDR_SURF_8_BANK));
2475 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478 NUM_BANKS(ADDR_SURF_8_BANK));
2479 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 NUM_BANKS(ADDR_SURF_8_BANK));
2483 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2486 NUM_BANKS(ADDR_SURF_8_BANK));
2487 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2490 NUM_BANKS(ADDR_SURF_8_BANK));
2491 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494 NUM_BANKS(ADDR_SURF_8_BANK));
2495 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2498 NUM_BANKS(ADDR_SURF_8_BANK));
2499 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2502 NUM_BANKS(ADDR_SURF_4_BANK));
2503
2504 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2505 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2506
2507 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2508 if (reg_offset != 7)
2509 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2510
2511 break;
2512 case CHIP_TONGA:
2513 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2517 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2520 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2521 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2525 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2528 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2529 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2533 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2537 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2545 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2547 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2551 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2559 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2560 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2563 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2579 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2583 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2591 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2595 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2599 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2603 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2607 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2619 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2631 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2635
2636 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639 NUM_BANKS(ADDR_SURF_16_BANK));
2640 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2643 NUM_BANKS(ADDR_SURF_16_BANK));
2644 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647 NUM_BANKS(ADDR_SURF_16_BANK));
2648 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2651 NUM_BANKS(ADDR_SURF_16_BANK));
2652 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2655 NUM_BANKS(ADDR_SURF_16_BANK));
2656 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659 NUM_BANKS(ADDR_SURF_16_BANK));
2660 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663 NUM_BANKS(ADDR_SURF_16_BANK));
2664 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667 NUM_BANKS(ADDR_SURF_16_BANK));
2668 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671 NUM_BANKS(ADDR_SURF_16_BANK));
2672 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 NUM_BANKS(ADDR_SURF_16_BANK));
2676 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2679 NUM_BANKS(ADDR_SURF_16_BANK));
2680 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683 NUM_BANKS(ADDR_SURF_8_BANK));
2684 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 NUM_BANKS(ADDR_SURF_4_BANK));
2688 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2691 NUM_BANKS(ADDR_SURF_4_BANK));
2692
2693 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2694 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2695
2696 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2697 if (reg_offset != 7)
2698 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2699
2700 break;
2701 case CHIP_POLARIS11:
2702 case CHIP_POLARIS12:
2703 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2714 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2718 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2735 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2737 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2753 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2773 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2809 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2817 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2825
2826 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2859 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2867 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2873 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2879 NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884 NUM_BANKS(ADDR_SURF_16_BANK));
2885
2886 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2889 NUM_BANKS(ADDR_SURF_8_BANK));
2890
2891 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2894 NUM_BANKS(ADDR_SURF_4_BANK));
2895
2896 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2897 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2898
2899 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2900 if (reg_offset != 7)
2901 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2902
2903 break;
2904 case CHIP_POLARIS10:
2905 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2912 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2920 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2939 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2953 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2955 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2973 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2996 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3019 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3025 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3027
3028 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3046 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3051 NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3071 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076 NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3081 NUM_BANKS(ADDR_SURF_16_BANK));
3082
3083 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086 NUM_BANKS(ADDR_SURF_8_BANK));
3087
3088 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091 NUM_BANKS(ADDR_SURF_4_BANK));
3092
3093 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3096 NUM_BANKS(ADDR_SURF_4_BANK));
3097
3098 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3099 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3100
3101 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3102 if (reg_offset != 7)
3103 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3104
3105 break;
3106 case CHIP_STONEY:
3107 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 PIPE_CONFIG(ADDR_SURF_P2) |
3109 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112 PIPE_CONFIG(ADDR_SURF_P2) |
3113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 PIPE_CONFIG(ADDR_SURF_P2) |
3117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120 PIPE_CONFIG(ADDR_SURF_P2) |
3121 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124 PIPE_CONFIG(ADDR_SURF_P2) |
3125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3128 PIPE_CONFIG(ADDR_SURF_P2) |
3129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3132 PIPE_CONFIG(ADDR_SURF_P2) |
3133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3135 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3136 PIPE_CONFIG(ADDR_SURF_P2));
3137 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3138 PIPE_CONFIG(ADDR_SURF_P2) |
3139 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142 PIPE_CONFIG(ADDR_SURF_P2) |
3143 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3145 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3146 PIPE_CONFIG(ADDR_SURF_P2) |
3147 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3149 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3150 PIPE_CONFIG(ADDR_SURF_P2) |
3151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3154 PIPE_CONFIG(ADDR_SURF_P2) |
3155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3158 PIPE_CONFIG(ADDR_SURF_P2) |
3159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3161 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3162 PIPE_CONFIG(ADDR_SURF_P2) |
3163 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3165 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3186 PIPE_CONFIG(ADDR_SURF_P2) |
3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3190 PIPE_CONFIG(ADDR_SURF_P2) |
3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3194 PIPE_CONFIG(ADDR_SURF_P2) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3197 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3198 PIPE_CONFIG(ADDR_SURF_P2) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3209
3210 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213 NUM_BANKS(ADDR_SURF_8_BANK));
3214 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3217 NUM_BANKS(ADDR_SURF_8_BANK));
3218 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221 NUM_BANKS(ADDR_SURF_8_BANK));
3222 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225 NUM_BANKS(ADDR_SURF_8_BANK));
3226 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229 NUM_BANKS(ADDR_SURF_8_BANK));
3230 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233 NUM_BANKS(ADDR_SURF_8_BANK));
3234 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3237 NUM_BANKS(ADDR_SURF_8_BANK));
3238 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 NUM_BANKS(ADDR_SURF_16_BANK));
3242 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245 NUM_BANKS(ADDR_SURF_16_BANK));
3246 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249 NUM_BANKS(ADDR_SURF_16_BANK));
3250 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 NUM_BANKS(ADDR_SURF_16_BANK));
3254 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257 NUM_BANKS(ADDR_SURF_16_BANK));
3258 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3261 NUM_BANKS(ADDR_SURF_16_BANK));
3262 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3265 NUM_BANKS(ADDR_SURF_8_BANK));
3266
3267 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3268 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3269 reg_offset != 23)
3270 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3271
3272 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3273 if (reg_offset != 7)
3274 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3275
3276 break;
3277 default:
3278 dev_warn(adev->dev,
3279 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3280 adev->asic_type);
3281
3282 case CHIP_CARRIZO:
3283 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3311 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3312 PIPE_CONFIG(ADDR_SURF_P2));
3313 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3314 PIPE_CONFIG(ADDR_SURF_P2) |
3315 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3318 PIPE_CONFIG(ADDR_SURF_P2) |
3319 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3321 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3322 PIPE_CONFIG(ADDR_SURF_P2) |
3323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3325 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3326 PIPE_CONFIG(ADDR_SURF_P2) |
3327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3330 PIPE_CONFIG(ADDR_SURF_P2) |
3331 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3334 PIPE_CONFIG(ADDR_SURF_P2) |
3335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3337 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3338 PIPE_CONFIG(ADDR_SURF_P2) |
3339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3341 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342 PIPE_CONFIG(ADDR_SURF_P2) |
3343 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3346 PIPE_CONFIG(ADDR_SURF_P2) |
3347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3350 PIPE_CONFIG(ADDR_SURF_P2) |
3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3354 PIPE_CONFIG(ADDR_SURF_P2) |
3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3358 PIPE_CONFIG(ADDR_SURF_P2) |
3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3362 PIPE_CONFIG(ADDR_SURF_P2) |
3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3366 PIPE_CONFIG(ADDR_SURF_P2) |
3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3370 PIPE_CONFIG(ADDR_SURF_P2) |
3371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3373 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3374 PIPE_CONFIG(ADDR_SURF_P2) |
3375 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3378 PIPE_CONFIG(ADDR_SURF_P2) |
3379 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3381 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3382 PIPE_CONFIG(ADDR_SURF_P2) |
3383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3385
3386 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389 NUM_BANKS(ADDR_SURF_8_BANK));
3390 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3393 NUM_BANKS(ADDR_SURF_8_BANK));
3394 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397 NUM_BANKS(ADDR_SURF_8_BANK));
3398 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401 NUM_BANKS(ADDR_SURF_8_BANK));
3402 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405 NUM_BANKS(ADDR_SURF_8_BANK));
3406 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409 NUM_BANKS(ADDR_SURF_8_BANK));
3410 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3413 NUM_BANKS(ADDR_SURF_8_BANK));
3414 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 NUM_BANKS(ADDR_SURF_16_BANK));
3418 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421 NUM_BANKS(ADDR_SURF_16_BANK));
3422 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425 NUM_BANKS(ADDR_SURF_16_BANK));
3426 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429 NUM_BANKS(ADDR_SURF_16_BANK));
3430 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433 NUM_BANKS(ADDR_SURF_16_BANK));
3434 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3437 NUM_BANKS(ADDR_SURF_16_BANK));
3438 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3441 NUM_BANKS(ADDR_SURF_8_BANK));
3442
3443 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3444 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3445 reg_offset != 23)
3446 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3447
3448 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3449 if (reg_offset != 7)
3450 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3451
3452 break;
3453 }
3454 }
3455
3456 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3457 u32 se_num, u32 sh_num, u32 instance)
3458 {
3459 u32 data;
3460
3461 if (instance == 0xffffffff)
3462 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3463 else
3464 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3465
3466 if (se_num == 0xffffffff)
3467 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3468 else
3469 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3470
3471 if (sh_num == 0xffffffff)
3472 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3473 else
3474 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3475
3476 WREG32(mmGRBM_GFX_INDEX, data);
3477 }
3478
3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480 {
3481 u32 data, mask;
3482
3483 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3484 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485
3486 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3487
3488 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3489 adev->gfx.config.max_sh_per_se);
3490
3491 return (~data) & mask;
3492 }
3493
3494 static void
3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496 {
3497 switch (adev->asic_type) {
3498 case CHIP_FIJI:
3499 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500 RB_XSEL2(1) | PKR_MAP(2) |
3501 PKR_XSEL(1) | PKR_YSEL(1) |
3502 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504 SE_PAIR_YSEL(2);
3505 break;
3506 case CHIP_TONGA:
3507 case CHIP_POLARIS10:
3508 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509 SE_XSEL(1) | SE_YSEL(1);
3510 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511 SE_PAIR_YSEL(2);
3512 break;
3513 case CHIP_TOPAZ:
3514 case CHIP_CARRIZO:
3515 *rconf |= RB_MAP_PKR0(2);
3516 *rconf1 |= 0x0;
3517 break;
3518 case CHIP_POLARIS11:
3519 case CHIP_POLARIS12:
3520 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521 SE_XSEL(1) | SE_YSEL(1);
3522 *rconf1 |= 0x0;
3523 break;
3524 case CHIP_STONEY:
3525 *rconf |= 0x0;
3526 *rconf1 |= 0x0;
3527 break;
3528 default:
3529 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530 break;
3531 }
3532 }
3533
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536 u32 raster_config, u32 raster_config_1,
3537 unsigned rb_mask, unsigned num_rb)
3538 {
3539 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542 unsigned rb_per_se = num_rb / num_se;
3543 unsigned se_mask[4];
3544 unsigned se;
3545
3546 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556 (!se_mask[2] && !se_mask[3]))) {
3557 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559 if (!se_mask[0] && !se_mask[1]) {
3560 raster_config_1 |=
3561 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562 } else {
3563 raster_config_1 |=
3564 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565 }
3566 }
3567
3568 for (se = 0; se < num_se; se++) {
3569 unsigned raster_config_se = raster_config;
3570 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572 int idx = (se / 2) * 2;
3573
3574 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575 raster_config_se &= ~SE_MAP_MASK;
3576
3577 if (!se_mask[idx]) {
3578 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579 } else {
3580 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581 }
3582 }
3583
3584 pkr0_mask &= rb_mask;
3585 pkr1_mask &= rb_mask;
3586 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587 raster_config_se &= ~PKR_MAP_MASK;
3588
3589 if (!pkr0_mask) {
3590 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591 } else {
3592 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593 }
3594 }
3595
3596 if (rb_per_se >= 2) {
3597 unsigned rb0_mask = 1 << (se * rb_per_se);
3598 unsigned rb1_mask = rb0_mask << 1;
3599
3600 rb0_mask &= rb_mask;
3601 rb1_mask &= rb_mask;
3602 if (!rb0_mask || !rb1_mask) {
3603 raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605 if (!rb0_mask) {
3606 raster_config_se |=
3607 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608 } else {
3609 raster_config_se |=
3610 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611 }
3612 }
3613
3614 if (rb_per_se > 2) {
3615 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616 rb1_mask = rb0_mask << 1;
3617 rb0_mask &= rb_mask;
3618 rb1_mask &= rb_mask;
3619 if (!rb0_mask || !rb1_mask) {
3620 raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622 if (!rb0_mask) {
3623 raster_config_se |=
3624 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625 } else {
3626 raster_config_se |=
3627 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628 }
3629 }
3630 }
3631 }
3632
3633 /* GRBM_GFX_INDEX has a different offset on VI */
3634 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637 }
3638
3639 /* GRBM_GFX_INDEX has a different offset on VI */
3640 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645 int i, j;
3646 u32 data;
3647 u32 raster_config = 0, raster_config_1 = 0;
3648 u32 active_rbs = 0;
3649 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650 adev->gfx.config.max_sh_per_se;
3651 unsigned num_rb_pipes;
3652
3653 mutex_lock(&adev->grbm_idx_mutex);
3654 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657 data = gfx_v8_0_get_rb_active_bitmap(adev);
3658 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659 rb_bitmap_width_per_sh);
3660 }
3661 }
3662 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663
3664 adev->gfx.config.backend_enable_mask = active_rbs;
3665 adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668 adev->gfx.config.max_shader_engines, 16);
3669
3670 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672 if (!adev->gfx.config.backend_enable_mask ||
3673 adev->gfx.config.num_rbs >= num_rb_pipes) {
3674 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676 } else {
3677 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678 adev->gfx.config.backend_enable_mask,
3679 num_rb_pipes);
3680 }
3681
3682 /* cache the values for userspace */
3683 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3684 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3685 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3686 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3687 RREG32(mmCC_RB_BACKEND_DISABLE);
3688 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3689 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3690 adev->gfx.config.rb_config[i][j].raster_config =
3691 RREG32(mmPA_SC_RASTER_CONFIG);
3692 adev->gfx.config.rb_config[i][j].raster_config_1 =
3693 RREG32(mmPA_SC_RASTER_CONFIG_1);
3694 }
3695 }
3696 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3697 mutex_unlock(&adev->grbm_idx_mutex);
3698 }
3699
3700 /**
3701 * gfx_v8_0_init_compute_vmid - gart enable
3702 *
3703 * @adev: amdgpu_device pointer
3704 *
3705 * Initialize compute vmid sh_mem registers
3706 *
3707 */
3708 #define DEFAULT_SH_MEM_BASES (0x6000)
3709 #define FIRST_COMPUTE_VMID (8)
3710 #define LAST_COMPUTE_VMID (16)
3711 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3712 {
3713 int i;
3714 uint32_t sh_mem_config;
3715 uint32_t sh_mem_bases;
3716
3717 /*
3718 * Configure apertures:
3719 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3722 */
3723 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3724
3725 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3726 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3727 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3728 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3729 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3730 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3731
3732 mutex_lock(&adev->srbm_mutex);
3733 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3734 vi_srbm_select(adev, 0, 0, 0, i);
3735 /* CP and shaders */
3736 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3737 WREG32(mmSH_MEM_APE1_BASE, 1);
3738 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3739 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3740 }
3741 vi_srbm_select(adev, 0, 0, 0, 0);
3742 mutex_unlock(&adev->srbm_mutex);
3743 }
3744
3745 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3746 {
3747 switch (adev->asic_type) {
3748 default:
3749 adev->gfx.config.double_offchip_lds_buf = 1;
3750 break;
3751 case CHIP_CARRIZO:
3752 case CHIP_STONEY:
3753 adev->gfx.config.double_offchip_lds_buf = 0;
3754 break;
3755 }
3756 }
3757
3758 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3759 {
3760 u32 tmp, sh_static_mem_cfg;
3761 int i;
3762
3763 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3764 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3766 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3767
3768 gfx_v8_0_tiling_mode_table_init(adev);
3769 gfx_v8_0_setup_rb(adev);
3770 gfx_v8_0_get_cu_info(adev);
3771 gfx_v8_0_config_init(adev);
3772
3773 /* XXX SH_MEM regs */
3774 /* where to put LDS, scratch, GPUVM in FSA64 space */
3775 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3776 SWIZZLE_ENABLE, 1);
3777 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3778 ELEMENT_SIZE, 1);
3779 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3780 INDEX_STRIDE, 3);
3781 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3782
3783 mutex_lock(&adev->srbm_mutex);
3784 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3785 vi_srbm_select(adev, 0, 0, 0, i);
3786 /* CP and shaders */
3787 if (i == 0) {
3788 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3789 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3790 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3791 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3792 WREG32(mmSH_MEM_CONFIG, tmp);
3793 WREG32(mmSH_MEM_BASES, 0);
3794 } else {
3795 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3796 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3797 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3798 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3799 WREG32(mmSH_MEM_CONFIG, tmp);
3800 tmp = adev->mc.shared_aperture_start >> 48;
3801 WREG32(mmSH_MEM_BASES, tmp);
3802 }
3803
3804 WREG32(mmSH_MEM_APE1_BASE, 1);
3805 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3806 }
3807 vi_srbm_select(adev, 0, 0, 0, 0);
3808 mutex_unlock(&adev->srbm_mutex);
3809
3810 gfx_v8_0_init_compute_vmid(adev);
3811
3812 mutex_lock(&adev->grbm_idx_mutex);
3813 /*
3814 * making sure that the following register writes will be broadcasted
3815 * to all the shaders
3816 */
3817 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3818
3819 WREG32(mmPA_SC_FIFO_SIZE,
3820 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822 (adev->gfx.config.sc_prim_fifo_size_backend <<
3823 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3828
3829 tmp = RREG32(mmSPI_ARB_PRIORITY);
3830 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834 WREG32(mmSPI_ARB_PRIORITY, tmp);
3835
3836 mutex_unlock(&adev->grbm_idx_mutex);
3837
3838 }
3839
3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3841 {
3842 u32 i, j, k;
3843 u32 mask;
3844
3845 mutex_lock(&adev->grbm_idx_mutex);
3846 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849 for (k = 0; k < adev->usec_timeout; k++) {
3850 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3851 break;
3852 udelay(1);
3853 }
3854 if (k == adev->usec_timeout) {
3855 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3856 0xffffffff, 0xffffffff);
3857 mutex_unlock(&adev->grbm_idx_mutex);
3858 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3859 i, j);
3860 return;
3861 }
3862 }
3863 }
3864 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3865 mutex_unlock(&adev->grbm_idx_mutex);
3866
3867 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3868 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3869 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3870 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3871 for (k = 0; k < adev->usec_timeout; k++) {
3872 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3873 break;
3874 udelay(1);
3875 }
3876 }
3877
3878 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3879 bool enable)
3880 {
3881 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3882
3883 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3884 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3885 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3886 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3887
3888 WREG32(mmCP_INT_CNTL_RING0, tmp);
3889 }
3890
3891 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3892 {
3893 /* csib */
3894 WREG32(mmRLC_CSIB_ADDR_HI,
3895 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3896 WREG32(mmRLC_CSIB_ADDR_LO,
3897 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3898 WREG32(mmRLC_CSIB_LENGTH,
3899 adev->gfx.rlc.clear_state_size);
3900 }
3901
3902 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3903 int ind_offset,
3904 int list_size,
3905 int *unique_indices,
3906 int *indices_count,
3907 int max_indices,
3908 int *ind_start_offsets,
3909 int *offset_count,
3910 int max_offset)
3911 {
3912 int indices;
3913 bool new_entry = true;
3914
3915 for (; ind_offset < list_size; ind_offset++) {
3916
3917 if (new_entry) {
3918 new_entry = false;
3919 ind_start_offsets[*offset_count] = ind_offset;
3920 *offset_count = *offset_count + 1;
3921 BUG_ON(*offset_count >= max_offset);
3922 }
3923
3924 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3925 new_entry = true;
3926 continue;
3927 }
3928
3929 ind_offset += 2;
3930
3931 /* look for the matching indice */
3932 for (indices = 0;
3933 indices < *indices_count;
3934 indices++) {
3935 if (unique_indices[indices] ==
3936 register_list_format[ind_offset])
3937 break;
3938 }
3939
3940 if (indices >= *indices_count) {
3941 unique_indices[*indices_count] =
3942 register_list_format[ind_offset];
3943 indices = *indices_count;
3944 *indices_count = *indices_count + 1;
3945 BUG_ON(*indices_count >= max_indices);
3946 }
3947
3948 register_list_format[ind_offset] = indices;
3949 }
3950 }
3951
3952 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3953 {
3954 int i, temp, data;
3955 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3956 int indices_count = 0;
3957 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3958 int offset_count = 0;
3959
3960 int list_size;
3961 unsigned int *register_list_format =
3962 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3963 if (!register_list_format)
3964 return -ENOMEM;
3965 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3966 adev->gfx.rlc.reg_list_format_size_bytes);
3967
3968 gfx_v8_0_parse_ind_reg_list(register_list_format,
3969 RLC_FormatDirectRegListLength,
3970 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3971 unique_indices,
3972 &indices_count,
3973 ARRAY_SIZE(unique_indices),
3974 indirect_start_offsets,
3975 &offset_count,
3976 ARRAY_SIZE(indirect_start_offsets));
3977
3978 /* save and restore list */
3979 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3980
3981 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3982 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3983 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3984
3985 /* indirect list */
3986 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3987 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3988 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3989
3990 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3991 list_size = list_size >> 1;
3992 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3993 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3994
3995 /* starting offsets starts */
3996 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3997 adev->gfx.rlc.starting_offsets_start);
3998 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3999 WREG32(mmRLC_GPM_SCRATCH_DATA,
4000 indirect_start_offsets[i]);
4001
4002 /* unique indices */
4003 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4004 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4005 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4006 if (unique_indices[i] != 0) {
4007 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4008 WREG32(data + i, unique_indices[i] >> 20);
4009 }
4010 }
4011 kfree(register_list_format);
4012
4013 return 0;
4014 }
4015
4016 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4017 {
4018 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4019 }
4020
4021 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4022 {
4023 uint32_t data;
4024
4025 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4026
4027 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4028 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4029 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4030 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4031 WREG32(mmRLC_PG_DELAY, data);
4032
4033 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4034 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4035
4036 }
4037
4038 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4039 bool enable)
4040 {
4041 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4042 }
4043
4044 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4045 bool enable)
4046 {
4047 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4048 }
4049
4050 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4051 {
4052 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4053 }
4054
4055 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4056 {
4057 if ((adev->asic_type == CHIP_CARRIZO) ||
4058 (adev->asic_type == CHIP_STONEY)) {
4059 gfx_v8_0_init_csb(adev);
4060 gfx_v8_0_init_save_restore_list(adev);
4061 gfx_v8_0_enable_save_restore_machine(adev);
4062 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4063 gfx_v8_0_init_power_gating(adev);
4064 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4065 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4066 (adev->asic_type == CHIP_POLARIS12)) {
4067 gfx_v8_0_init_csb(adev);
4068 gfx_v8_0_init_save_restore_list(adev);
4069 gfx_v8_0_enable_save_restore_machine(adev);
4070 gfx_v8_0_init_power_gating(adev);
4071 }
4072
4073 }
4074
4075 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4076 {
4077 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4078
4079 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4080 gfx_v8_0_wait_for_rlc_serdes(adev);
4081 }
4082
4083 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4084 {
4085 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4086 udelay(50);
4087
4088 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4089 udelay(50);
4090 }
4091
4092 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4093 {
4094 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4095
4096 /* carrizo do enable cp interrupt after cp inited */
4097 if (!(adev->flags & AMD_IS_APU))
4098 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4099
4100 udelay(50);
4101 }
4102
4103 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4104 {
4105 const struct rlc_firmware_header_v2_0 *hdr;
4106 const __le32 *fw_data;
4107 unsigned i, fw_size;
4108
4109 if (!adev->gfx.rlc_fw)
4110 return -EINVAL;
4111
4112 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4113 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4114
4115 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4116 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4117 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4118
4119 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4120 for (i = 0; i < fw_size; i++)
4121 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4122 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4123
4124 return 0;
4125 }
4126
4127 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4128 {
4129 int r;
4130 u32 tmp;
4131
4132 gfx_v8_0_rlc_stop(adev);
4133
4134 /* disable CG */
4135 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4136 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4137 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4138 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4139 if (adev->asic_type == CHIP_POLARIS11 ||
4140 adev->asic_type == CHIP_POLARIS10 ||
4141 adev->asic_type == CHIP_POLARIS12) {
4142 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4143 tmp &= ~0x3;
4144 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4145 }
4146
4147 /* disable PG */
4148 WREG32(mmRLC_PG_CNTL, 0);
4149
4150 gfx_v8_0_rlc_reset(adev);
4151 gfx_v8_0_init_pg(adev);
4152
4153
4154 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4155 /* legacy rlc firmware loading */
4156 r = gfx_v8_0_rlc_load_microcode(adev);
4157 if (r)
4158 return r;
4159 }
4160
4161 gfx_v8_0_rlc_start(adev);
4162
4163 return 0;
4164 }
4165
4166 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4167 {
4168 int i;
4169 u32 tmp = RREG32(mmCP_ME_CNTL);
4170
4171 if (enable) {
4172 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4173 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4174 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4175 } else {
4176 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4177 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4178 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4179 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4180 adev->gfx.gfx_ring[i].ready = false;
4181 }
4182 WREG32(mmCP_ME_CNTL, tmp);
4183 udelay(50);
4184 }
4185
4186 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4187 {
4188 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4189 const struct gfx_firmware_header_v1_0 *ce_hdr;
4190 const struct gfx_firmware_header_v1_0 *me_hdr;
4191 const __le32 *fw_data;
4192 unsigned i, fw_size;
4193
4194 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4195 return -EINVAL;
4196
4197 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4198 adev->gfx.pfp_fw->data;
4199 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4200 adev->gfx.ce_fw->data;
4201 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4202 adev->gfx.me_fw->data;
4203
4204 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4205 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4206 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4207
4208 gfx_v8_0_cp_gfx_enable(adev, false);
4209
4210 /* PFP */
4211 fw_data = (const __le32 *)
4212 (adev->gfx.pfp_fw->data +
4213 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4214 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4215 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4216 for (i = 0; i < fw_size; i++)
4217 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4218 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4219
4220 /* CE */
4221 fw_data = (const __le32 *)
4222 (adev->gfx.ce_fw->data +
4223 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4224 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4225 WREG32(mmCP_CE_UCODE_ADDR, 0);
4226 for (i = 0; i < fw_size; i++)
4227 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4228 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4229
4230 /* ME */
4231 fw_data = (const __le32 *)
4232 (adev->gfx.me_fw->data +
4233 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4234 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4235 WREG32(mmCP_ME_RAM_WADDR, 0);
4236 for (i = 0; i < fw_size; i++)
4237 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4238 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4239
4240 return 0;
4241 }
4242
4243 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4244 {
4245 u32 count = 0;
4246 const struct cs_section_def *sect = NULL;
4247 const struct cs_extent_def *ext = NULL;
4248
4249 /* begin clear state */
4250 count += 2;
4251 /* context control state */
4252 count += 3;
4253
4254 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4255 for (ext = sect->section; ext->extent != NULL; ++ext) {
4256 if (sect->id == SECT_CONTEXT)
4257 count += 2 + ext->reg_count;
4258 else
4259 return 0;
4260 }
4261 }
4262 /* pa_sc_raster_config/pa_sc_raster_config1 */
4263 count += 4;
4264 /* end clear state */
4265 count += 2;
4266 /* clear state */
4267 count += 2;
4268
4269 return count;
4270 }
4271
4272 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4273 {
4274 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4275 const struct cs_section_def *sect = NULL;
4276 const struct cs_extent_def *ext = NULL;
4277 int r, i;
4278
4279 /* init the CP */
4280 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4281 WREG32(mmCP_ENDIAN_SWAP, 0);
4282 WREG32(mmCP_DEVICE_ID, 1);
4283
4284 gfx_v8_0_cp_gfx_enable(adev, true);
4285
4286 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4287 if (r) {
4288 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4289 return r;
4290 }
4291
4292 /* clear state buffer */
4293 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4294 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4295
4296 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4297 amdgpu_ring_write(ring, 0x80000000);
4298 amdgpu_ring_write(ring, 0x80000000);
4299
4300 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4301 for (ext = sect->section; ext->extent != NULL; ++ext) {
4302 if (sect->id == SECT_CONTEXT) {
4303 amdgpu_ring_write(ring,
4304 PACKET3(PACKET3_SET_CONTEXT_REG,
4305 ext->reg_count));
4306 amdgpu_ring_write(ring,
4307 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4308 for (i = 0; i < ext->reg_count; i++)
4309 amdgpu_ring_write(ring, ext->extent[i]);
4310 }
4311 }
4312 }
4313
4314 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4315 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4316 switch (adev->asic_type) {
4317 case CHIP_TONGA:
4318 case CHIP_POLARIS10:
4319 amdgpu_ring_write(ring, 0x16000012);
4320 amdgpu_ring_write(ring, 0x0000002A);
4321 break;
4322 case CHIP_POLARIS11:
4323 case CHIP_POLARIS12:
4324 amdgpu_ring_write(ring, 0x16000012);
4325 amdgpu_ring_write(ring, 0x00000000);
4326 break;
4327 case CHIP_FIJI:
4328 amdgpu_ring_write(ring, 0x3a00161a);
4329 amdgpu_ring_write(ring, 0x0000002e);
4330 break;
4331 case CHIP_CARRIZO:
4332 amdgpu_ring_write(ring, 0x00000002);
4333 amdgpu_ring_write(ring, 0x00000000);
4334 break;
4335 case CHIP_TOPAZ:
4336 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4337 0x00000000 : 0x00000002);
4338 amdgpu_ring_write(ring, 0x00000000);
4339 break;
4340 case CHIP_STONEY:
4341 amdgpu_ring_write(ring, 0x00000000);
4342 amdgpu_ring_write(ring, 0x00000000);
4343 break;
4344 default:
4345 BUG();
4346 }
4347
4348 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4349 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4350
4351 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4352 amdgpu_ring_write(ring, 0);
4353
4354 /* init the CE partitions */
4355 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4356 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4357 amdgpu_ring_write(ring, 0x8000);
4358 amdgpu_ring_write(ring, 0x8000);
4359
4360 amdgpu_ring_commit(ring);
4361
4362 return 0;
4363 }
4364 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4365 {
4366 u32 tmp;
4367 /* no gfx doorbells on iceland */
4368 if (adev->asic_type == CHIP_TOPAZ)
4369 return;
4370
4371 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4372
4373 if (ring->use_doorbell) {
4374 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4375 DOORBELL_OFFSET, ring->doorbell_index);
4376 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4377 DOORBELL_HIT, 0);
4378 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4379 DOORBELL_EN, 1);
4380 } else {
4381 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4382 }
4383
4384 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4385
4386 if (adev->flags & AMD_IS_APU)
4387 return;
4388
4389 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4390 DOORBELL_RANGE_LOWER,
4391 AMDGPU_DOORBELL_GFX_RING0);
4392 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4393
4394 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4395 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4396 }
4397
4398 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4399 {
4400 struct amdgpu_ring *ring;
4401 u32 tmp;
4402 u32 rb_bufsz;
4403 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4404 int r;
4405
4406 /* Set the write pointer delay */
4407 WREG32(mmCP_RB_WPTR_DELAY, 0);
4408
4409 /* set the RB to use vmid 0 */
4410 WREG32(mmCP_RB_VMID, 0);
4411
4412 /* Set ring buffer size */
4413 ring = &adev->gfx.gfx_ring[0];
4414 rb_bufsz = order_base_2(ring->ring_size / 8);
4415 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4416 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4417 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4418 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4419 #ifdef __BIG_ENDIAN
4420 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4421 #endif
4422 WREG32(mmCP_RB0_CNTL, tmp);
4423
4424 /* Initialize the ring buffer's read and write pointers */
4425 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4426 ring->wptr = 0;
4427 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4428
4429 /* set the wb address wether it's enabled or not */
4430 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4431 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4432 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4433
4434 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4435 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4436 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4437 mdelay(1);
4438 WREG32(mmCP_RB0_CNTL, tmp);
4439
4440 rb_addr = ring->gpu_addr >> 8;
4441 WREG32(mmCP_RB0_BASE, rb_addr);
4442 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4443
4444 gfx_v8_0_set_cpg_door_bell(adev, ring);
4445 /* start the ring */
4446 amdgpu_ring_clear_ring(ring);
4447 gfx_v8_0_cp_gfx_start(adev);
4448 ring->ready = true;
4449 r = amdgpu_ring_test_ring(ring);
4450 if (r)
4451 ring->ready = false;
4452
4453 return r;
4454 }
4455
4456 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4457 {
4458 int i;
4459
4460 if (enable) {
4461 WREG32(mmCP_MEC_CNTL, 0);
4462 } else {
4463 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4464 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4465 adev->gfx.compute_ring[i].ready = false;
4466 adev->gfx.kiq.ring.ready = false;
4467 }
4468 udelay(50);
4469 }
4470
4471 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4472 {
4473 const struct gfx_firmware_header_v1_0 *mec_hdr;
4474 const __le32 *fw_data;
4475 unsigned i, fw_size;
4476
4477 if (!adev->gfx.mec_fw)
4478 return -EINVAL;
4479
4480 gfx_v8_0_cp_compute_enable(adev, false);
4481
4482 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4483 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4484
4485 fw_data = (const __le32 *)
4486 (adev->gfx.mec_fw->data +
4487 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4488 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4489
4490 /* MEC1 */
4491 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4492 for (i = 0; i < fw_size; i++)
4493 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4494 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4495
4496 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4497 if (adev->gfx.mec2_fw) {
4498 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4499
4500 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4501 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4502
4503 fw_data = (const __le32 *)
4504 (adev->gfx.mec2_fw->data +
4505 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4506 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4507
4508 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4509 for (i = 0; i < fw_size; i++)
4510 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4511 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4512 }
4513
4514 return 0;
4515 }
4516
4517 /* KIQ functions */
4518 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4519 {
4520 uint32_t tmp;
4521 struct amdgpu_device *adev = ring->adev;
4522
4523 /* tell RLC which is KIQ queue */
4524 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4525 tmp &= 0xffffff00;
4526 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4527 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4528 tmp |= 0x80;
4529 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4530 }
4531
4532 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4533 {
4534 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4535 uint32_t scratch, tmp = 0;
4536 uint64_t queue_mask = 0;
4537 int r, i;
4538
4539 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4540 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4541 continue;
4542
4543 /* This situation may be hit in the future if a new HW
4544 * generation exposes more than 64 queues. If so, the
4545 * definition of queue_mask needs updating */
4546 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4547 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4548 break;
4549 }
4550
4551 queue_mask |= (1ull << i);
4552 }
4553
4554 r = amdgpu_gfx_scratch_get(adev, &scratch);
4555 if (r) {
4556 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4557 return r;
4558 }
4559 WREG32(scratch, 0xCAFEDEAD);
4560
4561 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4562 if (r) {
4563 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4564 amdgpu_gfx_scratch_free(adev, scratch);
4565 return r;
4566 }
4567 /* set resources */
4568 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4569 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4570 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4571 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4572 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4573 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4574 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4575 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4576 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4577 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4578 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4579 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4580
4581 /* map queues */
4582 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4583 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4584 amdgpu_ring_write(kiq_ring,
4585 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4586 amdgpu_ring_write(kiq_ring,
4587 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4588 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4589 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4590 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4591 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4592 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4593 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4594 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4595 }
4596 /* write to scratch for completion */
4597 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4598 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4599 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4600 amdgpu_ring_commit(kiq_ring);
4601
4602 for (i = 0; i < adev->usec_timeout; i++) {
4603 tmp = RREG32(scratch);
4604 if (tmp == 0xDEADBEEF)
4605 break;
4606 DRM_UDELAY(1);
4607 }
4608 if (i >= adev->usec_timeout) {
4609 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4610 scratch, tmp);
4611 r = -EINVAL;
4612 }
4613 amdgpu_gfx_scratch_free(adev, scratch);
4614
4615 return r;
4616 }
4617
4618 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4619 {
4620 int i, r = 0;
4621
4622 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4623 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4624 for (i = 0; i < adev->usec_timeout; i++) {
4625 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4626 break;
4627 udelay(1);
4628 }
4629 if (i == adev->usec_timeout)
4630 r = -ETIMEDOUT;
4631 }
4632 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4633 WREG32(mmCP_HQD_PQ_RPTR, 0);
4634 WREG32(mmCP_HQD_PQ_WPTR, 0);
4635
4636 return r;
4637 }
4638
4639 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4640 {
4641 struct amdgpu_device *adev = ring->adev;
4642 struct vi_mqd *mqd = ring->mqd_ptr;
4643 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4644 uint32_t tmp;
4645
4646 mqd->header = 0xC0310800;
4647 mqd->compute_pipelinestat_enable = 0x00000001;
4648 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4649 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4650 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4651 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4652 mqd->compute_misc_reserved = 0x00000003;
4653 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4654 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4655 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4656 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4657 eop_base_addr = ring->eop_gpu_addr >> 8;
4658 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4659 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4660
4661 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4662 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4663 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4664 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4665
4666 mqd->cp_hqd_eop_control = tmp;
4667
4668 /* enable doorbell? */
4669 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4670 CP_HQD_PQ_DOORBELL_CONTROL,
4671 DOORBELL_EN,
4672 ring->use_doorbell ? 1 : 0);
4673
4674 mqd->cp_hqd_pq_doorbell_control = tmp;
4675
4676 /* set the pointer to the MQD */
4677 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4678 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4679
4680 /* set MQD vmid to 0 */
4681 tmp = RREG32(mmCP_MQD_CONTROL);
4682 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4683 mqd->cp_mqd_control = tmp;
4684
4685 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4686 hqd_gpu_addr = ring->gpu_addr >> 8;
4687 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4688 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4689
4690 /* set up the HQD, this is similar to CP_RB0_CNTL */
4691 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4692 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4693 (order_base_2(ring->ring_size / 4) - 1));
4694 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4695 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4696 #ifdef __BIG_ENDIAN
4697 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4698 #endif
4699 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4700 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4701 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4702 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4703 mqd->cp_hqd_pq_control = tmp;
4704
4705 /* set the wb address whether it's enabled or not */
4706 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4707 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4708 mqd->cp_hqd_pq_rptr_report_addr_hi =
4709 upper_32_bits(wb_gpu_addr) & 0xffff;
4710
4711 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4712 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4713 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4714 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4715
4716 tmp = 0;
4717 /* enable the doorbell if requested */
4718 if (ring->use_doorbell) {
4719 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4720 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4721 DOORBELL_OFFSET, ring->doorbell_index);
4722
4723 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4724 DOORBELL_EN, 1);
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4726 DOORBELL_SOURCE, 0);
4727 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4728 DOORBELL_HIT, 0);
4729 }
4730
4731 mqd->cp_hqd_pq_doorbell_control = tmp;
4732
4733 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4734 ring->wptr = 0;
4735 mqd->cp_hqd_pq_wptr = ring->wptr;
4736 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4737
4738 /* set the vmid for the queue */
4739 mqd->cp_hqd_vmid = 0;
4740
4741 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4742 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4743 mqd->cp_hqd_persistent_state = tmp;
4744
4745 /* set MTYPE */
4746 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4747 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4748 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4749 mqd->cp_hqd_ib_control = tmp;
4750
4751 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4752 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4753 mqd->cp_hqd_iq_timer = tmp;
4754
4755 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4756 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4757 mqd->cp_hqd_ctx_save_control = tmp;
4758
4759 /* defaults */
4760 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4761 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4762 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4763 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4764 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4765 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4766 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4767 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4768 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4769 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4770 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4771 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4772 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4773 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4774 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4775
4776 /* activate the queue */
4777 mqd->cp_hqd_active = 1;
4778
4779 return 0;
4780 }
4781
4782 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4783 struct vi_mqd *mqd)
4784 {
4785 uint32_t mqd_reg;
4786 uint32_t *mqd_data;
4787
4788 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4789 mqd_data = &mqd->cp_mqd_base_addr_lo;
4790
4791 /* disable wptr polling */
4792 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4793
4794 /* program all HQD registers */
4795 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4796 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4797
4798 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4799 * This is safe since EOP RPTR==WPTR for any inactive HQD
4800 * on ASICs that do not support context-save.
4801 * EOP writes/reads can start anywhere in the ring.
4802 */
4803 if (adev->asic_type != CHIP_TONGA) {
4804 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4805 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4806 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4807 }
4808
4809 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4810 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4811
4812 /* activate the HQD */
4813 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4814 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4815
4816 return 0;
4817 }
4818
4819 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4820 {
4821 struct amdgpu_device *adev = ring->adev;
4822 struct vi_mqd *mqd = ring->mqd_ptr;
4823 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4824
4825 gfx_v8_0_kiq_setting(ring);
4826
4827 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4828 /* reset MQD to a clean status */
4829 if (adev->gfx.mec.mqd_backup[mqd_idx])
4830 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4831
4832 /* reset ring buffer */
4833 ring->wptr = 0;
4834 amdgpu_ring_clear_ring(ring);
4835 mutex_lock(&adev->srbm_mutex);
4836 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4837 gfx_v8_0_mqd_commit(adev, mqd);
4838 vi_srbm_select(adev, 0, 0, 0, 0);
4839 mutex_unlock(&adev->srbm_mutex);
4840 } else {
4841 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4842 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4843 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4844 mutex_lock(&adev->srbm_mutex);
4845 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4846 gfx_v8_0_mqd_init(ring);
4847 gfx_v8_0_mqd_commit(adev, mqd);
4848 vi_srbm_select(adev, 0, 0, 0, 0);
4849 mutex_unlock(&adev->srbm_mutex);
4850
4851 if (adev->gfx.mec.mqd_backup[mqd_idx])
4852 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4853 }
4854
4855 return 0;
4856 }
4857
4858 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4859 {
4860 struct amdgpu_device *adev = ring->adev;
4861 struct vi_mqd *mqd = ring->mqd_ptr;
4862 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4863
4864 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4865 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4866 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4867 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4868 mutex_lock(&adev->srbm_mutex);
4869 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4870 gfx_v8_0_mqd_init(ring);
4871 vi_srbm_select(adev, 0, 0, 0, 0);
4872 mutex_unlock(&adev->srbm_mutex);
4873
4874 if (adev->gfx.mec.mqd_backup[mqd_idx])
4875 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4876 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4877 /* reset MQD to a clean status */
4878 if (adev->gfx.mec.mqd_backup[mqd_idx])
4879 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4880 } else {
4881 amdgpu_ring_clear_ring(ring);
4882 }
4883 return 0;
4884 }
4885
4886 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4887 {
4888 if (adev->asic_type > CHIP_TONGA) {
4889 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4890 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4891 }
4892 /* enable doorbells */
4893 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4894 }
4895
4896 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4897 {
4898 struct amdgpu_ring *ring = NULL;
4899 int r = 0, i;
4900
4901 gfx_v8_0_cp_compute_enable(adev, true);
4902
4903 ring = &adev->gfx.kiq.ring;
4904
4905 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4906 if (unlikely(r != 0))
4907 goto done;
4908
4909 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4910 if (!r) {
4911 r = gfx_v8_0_kiq_init_queue(ring);
4912 amdgpu_bo_kunmap(ring->mqd_obj);
4913 ring->mqd_ptr = NULL;
4914 }
4915 amdgpu_bo_unreserve(ring->mqd_obj);
4916 if (r)
4917 goto done;
4918
4919 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4920 ring = &adev->gfx.compute_ring[i];
4921
4922 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4923 if (unlikely(r != 0))
4924 goto done;
4925 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4926 if (!r) {
4927 r = gfx_v8_0_kcq_init_queue(ring);
4928 amdgpu_bo_kunmap(ring->mqd_obj);
4929 ring->mqd_ptr = NULL;
4930 }
4931 amdgpu_bo_unreserve(ring->mqd_obj);
4932 if (r)
4933 goto done;
4934 }
4935
4936 gfx_v8_0_set_mec_doorbell_range(adev);
4937
4938 r = gfx_v8_0_kiq_kcq_enable(adev);
4939 if (r)
4940 goto done;
4941
4942 /* Test KIQ */
4943 ring = &adev->gfx.kiq.ring;
4944 ring->ready = true;
4945 r = amdgpu_ring_test_ring(ring);
4946 if (r) {
4947 ring->ready = false;
4948 goto done;
4949 }
4950
4951 /* Test KCQs */
4952 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4953 ring = &adev->gfx.compute_ring[i];
4954 if (adev->in_gpu_reset) {
4955 /* move reset ring buffer to here to workaround
4956 * compute ring test failed
4957 */
4958 ring->wptr = 0;
4959 amdgpu_ring_clear_ring(ring);
4960 }
4961 ring->ready = true;
4962 r = amdgpu_ring_test_ring(ring);
4963 if (r)
4964 ring->ready = false;
4965 }
4966
4967 done:
4968 return r;
4969 }
4970
4971 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4972 {
4973 int r;
4974
4975 if (!(adev->flags & AMD_IS_APU))
4976 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4977
4978 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4979 /* legacy firmware loading */
4980 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4981 if (r)
4982 return r;
4983
4984 r = gfx_v8_0_cp_compute_load_microcode(adev);
4985 if (r)
4986 return r;
4987 }
4988
4989 r = gfx_v8_0_cp_gfx_resume(adev);
4990 if (r)
4991 return r;
4992
4993 r = gfx_v8_0_kiq_resume(adev);
4994 if (r)
4995 return r;
4996
4997 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4998
4999 return 0;
5000 }
5001
5002 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5003 {
5004 gfx_v8_0_cp_gfx_enable(adev, enable);
5005 gfx_v8_0_cp_compute_enable(adev, enable);
5006 }
5007
5008 static int gfx_v8_0_hw_init(void *handle)
5009 {
5010 int r;
5011 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5012
5013 gfx_v8_0_init_golden_registers(adev);
5014 gfx_v8_0_gpu_init(adev);
5015
5016 r = gfx_v8_0_rlc_resume(adev);
5017 if (r)
5018 return r;
5019
5020 r = gfx_v8_0_cp_resume(adev);
5021
5022 return r;
5023 }
5024
5025 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5026 {
5027 struct amdgpu_device *adev = kiq_ring->adev;
5028 uint32_t scratch, tmp = 0;
5029 int r, i;
5030
5031 r = amdgpu_gfx_scratch_get(adev, &scratch);
5032 if (r) {
5033 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5034 return r;
5035 }
5036 WREG32(scratch, 0xCAFEDEAD);
5037
5038 r = amdgpu_ring_alloc(kiq_ring, 10);
5039 if (r) {
5040 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5041 amdgpu_gfx_scratch_free(adev, scratch);
5042 return r;
5043 }
5044
5045 /* unmap queues */
5046 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5047 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5048 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5049 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5050 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5051 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5052 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5053 amdgpu_ring_write(kiq_ring, 0);
5054 amdgpu_ring_write(kiq_ring, 0);
5055 amdgpu_ring_write(kiq_ring, 0);
5056 /* write to scratch for completion */
5057 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5058 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5059 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5060 amdgpu_ring_commit(kiq_ring);
5061
5062 for (i = 0; i < adev->usec_timeout; i++) {
5063 tmp = RREG32(scratch);
5064 if (tmp == 0xDEADBEEF)
5065 break;
5066 DRM_UDELAY(1);
5067 }
5068 if (i >= adev->usec_timeout) {
5069 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5070 r = -EINVAL;
5071 }
5072 amdgpu_gfx_scratch_free(adev, scratch);
5073 return r;
5074 }
5075
5076 static int gfx_v8_0_hw_fini(void *handle)
5077 {
5078 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5079 int i;
5080
5081 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5082 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5083
5084 /* disable KCQ to avoid CPC touch memory not valid anymore */
5085 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5086 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5087
5088 if (amdgpu_sriov_vf(adev)) {
5089 pr_debug("For SRIOV client, shouldn't do anything.\n");
5090 return 0;
5091 }
5092 gfx_v8_0_cp_enable(adev, false);
5093 gfx_v8_0_rlc_stop(adev);
5094
5095 amdgpu_set_powergating_state(adev,
5096 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5097
5098 return 0;
5099 }
5100
5101 static int gfx_v8_0_suspend(void *handle)
5102 {
5103 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5104 adev->gfx.in_suspend = true;
5105 return gfx_v8_0_hw_fini(adev);
5106 }
5107
5108 static int gfx_v8_0_resume(void *handle)
5109 {
5110 int r;
5111 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112
5113 r = gfx_v8_0_hw_init(adev);
5114 adev->gfx.in_suspend = false;
5115 return r;
5116 }
5117
5118 static bool gfx_v8_0_is_idle(void *handle)
5119 {
5120 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5121
5122 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5123 return false;
5124 else
5125 return true;
5126 }
5127
5128 static int gfx_v8_0_wait_for_idle(void *handle)
5129 {
5130 unsigned i;
5131 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5132
5133 for (i = 0; i < adev->usec_timeout; i++) {
5134 if (gfx_v8_0_is_idle(handle))
5135 return 0;
5136
5137 udelay(1);
5138 }
5139 return -ETIMEDOUT;
5140 }
5141
5142 static bool gfx_v8_0_check_soft_reset(void *handle)
5143 {
5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5146 u32 tmp;
5147
5148 /* GRBM_STATUS */
5149 tmp = RREG32(mmGRBM_STATUS);
5150 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5151 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5152 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5153 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5154 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5155 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5156 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5157 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5158 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5159 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5160 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5161 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5162 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5163 }
5164
5165 /* GRBM_STATUS2 */
5166 tmp = RREG32(mmGRBM_STATUS2);
5167 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5168 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5169 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5170
5171 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5172 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5173 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5174 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5175 SOFT_RESET_CPF, 1);
5176 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5177 SOFT_RESET_CPC, 1);
5178 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5179 SOFT_RESET_CPG, 1);
5180 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5181 SOFT_RESET_GRBM, 1);
5182 }
5183
5184 /* SRBM_STATUS */
5185 tmp = RREG32(mmSRBM_STATUS);
5186 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5187 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5188 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5189 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5190 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5191 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5192
5193 if (grbm_soft_reset || srbm_soft_reset) {
5194 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5195 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5196 return true;
5197 } else {
5198 adev->gfx.grbm_soft_reset = 0;
5199 adev->gfx.srbm_soft_reset = 0;
5200 return false;
5201 }
5202 }
5203
5204 static int gfx_v8_0_pre_soft_reset(void *handle)
5205 {
5206 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5207 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5208
5209 if ((!adev->gfx.grbm_soft_reset) &&
5210 (!adev->gfx.srbm_soft_reset))
5211 return 0;
5212
5213 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5214 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5215
5216 /* stop the rlc */
5217 gfx_v8_0_rlc_stop(adev);
5218
5219 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5220 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5221 /* Disable GFX parsing/prefetching */
5222 gfx_v8_0_cp_gfx_enable(adev, false);
5223
5224 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5225 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5226 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5227 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5228 int i;
5229
5230 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5231 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5232
5233 mutex_lock(&adev->srbm_mutex);
5234 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5235 gfx_v8_0_deactivate_hqd(adev, 2);
5236 vi_srbm_select(adev, 0, 0, 0, 0);
5237 mutex_unlock(&adev->srbm_mutex);
5238 }
5239 /* Disable MEC parsing/prefetching */
5240 gfx_v8_0_cp_compute_enable(adev, false);
5241 }
5242
5243 return 0;
5244 }
5245
5246 static int gfx_v8_0_soft_reset(void *handle)
5247 {
5248 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5249 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5250 u32 tmp;
5251
5252 if ((!adev->gfx.grbm_soft_reset) &&
5253 (!adev->gfx.srbm_soft_reset))
5254 return 0;
5255
5256 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5257 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5258
5259 if (grbm_soft_reset || srbm_soft_reset) {
5260 tmp = RREG32(mmGMCON_DEBUG);
5261 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5262 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5263 WREG32(mmGMCON_DEBUG, tmp);
5264 udelay(50);
5265 }
5266
5267 if (grbm_soft_reset) {
5268 tmp = RREG32(mmGRBM_SOFT_RESET);
5269 tmp |= grbm_soft_reset;
5270 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5271 WREG32(mmGRBM_SOFT_RESET, tmp);
5272 tmp = RREG32(mmGRBM_SOFT_RESET);
5273
5274 udelay(50);
5275
5276 tmp &= ~grbm_soft_reset;
5277 WREG32(mmGRBM_SOFT_RESET, tmp);
5278 tmp = RREG32(mmGRBM_SOFT_RESET);
5279 }
5280
5281 if (srbm_soft_reset) {
5282 tmp = RREG32(mmSRBM_SOFT_RESET);
5283 tmp |= srbm_soft_reset;
5284 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5285 WREG32(mmSRBM_SOFT_RESET, tmp);
5286 tmp = RREG32(mmSRBM_SOFT_RESET);
5287
5288 udelay(50);
5289
5290 tmp &= ~srbm_soft_reset;
5291 WREG32(mmSRBM_SOFT_RESET, tmp);
5292 tmp = RREG32(mmSRBM_SOFT_RESET);
5293 }
5294
5295 if (grbm_soft_reset || srbm_soft_reset) {
5296 tmp = RREG32(mmGMCON_DEBUG);
5297 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5298 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5299 WREG32(mmGMCON_DEBUG, tmp);
5300 }
5301
5302 /* Wait a little for things to settle down */
5303 udelay(50);
5304
5305 return 0;
5306 }
5307
5308 static int gfx_v8_0_post_soft_reset(void *handle)
5309 {
5310 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5311 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5312
5313 if ((!adev->gfx.grbm_soft_reset) &&
5314 (!adev->gfx.srbm_soft_reset))
5315 return 0;
5316
5317 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5318 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5319
5320 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5321 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5322 gfx_v8_0_cp_gfx_resume(adev);
5323
5324 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5325 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5326 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5327 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5328 int i;
5329
5330 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5331 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5332
5333 mutex_lock(&adev->srbm_mutex);
5334 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5335 gfx_v8_0_deactivate_hqd(adev, 2);
5336 vi_srbm_select(adev, 0, 0, 0, 0);
5337 mutex_unlock(&adev->srbm_mutex);
5338 }
5339 gfx_v8_0_kiq_resume(adev);
5340 }
5341 gfx_v8_0_rlc_start(adev);
5342
5343 return 0;
5344 }
5345
5346 /**
5347 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5348 *
5349 * @adev: amdgpu_device pointer
5350 *
5351 * Fetches a GPU clock counter snapshot.
5352 * Returns the 64 bit clock counter snapshot.
5353 */
5354 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5355 {
5356 uint64_t clock;
5357
5358 mutex_lock(&adev->gfx.gpu_clock_mutex);
5359 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5360 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5361 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5362 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5363 return clock;
5364 }
5365
5366 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5367 uint32_t vmid,
5368 uint32_t gds_base, uint32_t gds_size,
5369 uint32_t gws_base, uint32_t gws_size,
5370 uint32_t oa_base, uint32_t oa_size)
5371 {
5372 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5373 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5374
5375 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5376 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5377
5378 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5379 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5380
5381 /* GDS Base */
5382 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5383 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5384 WRITE_DATA_DST_SEL(0)));
5385 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5386 amdgpu_ring_write(ring, 0);
5387 amdgpu_ring_write(ring, gds_base);
5388
5389 /* GDS Size */
5390 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5391 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5392 WRITE_DATA_DST_SEL(0)));
5393 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5394 amdgpu_ring_write(ring, 0);
5395 amdgpu_ring_write(ring, gds_size);
5396
5397 /* GWS */
5398 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5399 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5400 WRITE_DATA_DST_SEL(0)));
5401 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5402 amdgpu_ring_write(ring, 0);
5403 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5404
5405 /* OA */
5406 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5407 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5408 WRITE_DATA_DST_SEL(0)));
5409 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5410 amdgpu_ring_write(ring, 0);
5411 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5412 }
5413
5414 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5415 {
5416 WREG32(mmSQ_IND_INDEX,
5417 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5418 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5419 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5420 (SQ_IND_INDEX__FORCE_READ_MASK));
5421 return RREG32(mmSQ_IND_DATA);
5422 }
5423
5424 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5425 uint32_t wave, uint32_t thread,
5426 uint32_t regno, uint32_t num, uint32_t *out)
5427 {
5428 WREG32(mmSQ_IND_INDEX,
5429 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5430 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5431 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5432 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5433 (SQ_IND_INDEX__FORCE_READ_MASK) |
5434 (SQ_IND_INDEX__AUTO_INCR_MASK));
5435 while (num--)
5436 *(out++) = RREG32(mmSQ_IND_DATA);
5437 }
5438
5439 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5440 {
5441 /* type 0 wave data */
5442 dst[(*no_fields)++] = 0;
5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5449 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5450 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5451 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5452 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5453 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5454 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5455 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5456 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5457 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5458 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5459 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5460 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5461 }
5462
5463 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5464 uint32_t wave, uint32_t start,
5465 uint32_t size, uint32_t *dst)
5466 {
5467 wave_read_regs(
5468 adev, simd, wave, 0,
5469 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5470 }
5471
5472
5473 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5474 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5475 .select_se_sh = &gfx_v8_0_select_se_sh,
5476 .read_wave_data = &gfx_v8_0_read_wave_data,
5477 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5478 };
5479
5480 static int gfx_v8_0_early_init(void *handle)
5481 {
5482 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5483
5484 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5485 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5486 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5487 gfx_v8_0_set_ring_funcs(adev);
5488 gfx_v8_0_set_irq_funcs(adev);
5489 gfx_v8_0_set_gds_init(adev);
5490 gfx_v8_0_set_rlc_funcs(adev);
5491
5492 return 0;
5493 }
5494
5495 static int gfx_v8_0_late_init(void *handle)
5496 {
5497 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5498 int r;
5499
5500 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5501 if (r)
5502 return r;
5503
5504 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5505 if (r)
5506 return r;
5507
5508 /* requires IBs so do in late init after IB pool is initialized */
5509 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5510 if (r)
5511 return r;
5512
5513 amdgpu_set_powergating_state(adev,
5514 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5515
5516 return 0;
5517 }
5518
5519 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5520 bool enable)
5521 {
5522 if ((adev->asic_type == CHIP_POLARIS11) ||
5523 (adev->asic_type == CHIP_POLARIS12))
5524 /* Send msg to SMU via Powerplay */
5525 amdgpu_set_powergating_state(adev,
5526 AMD_IP_BLOCK_TYPE_SMC,
5527 enable ?
5528 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5529
5530 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5531 }
5532
5533 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5534 bool enable)
5535 {
5536 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5537 }
5538
5539 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5540 bool enable)
5541 {
5542 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5543 }
5544
5545 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5546 bool enable)
5547 {
5548 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5549 }
5550
5551 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5552 bool enable)
5553 {
5554 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5555
5556 /* Read any GFX register to wake up GFX. */
5557 if (!enable)
5558 RREG32(mmDB_RENDER_CONTROL);
5559 }
5560
5561 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5562 bool enable)
5563 {
5564 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5565 cz_enable_gfx_cg_power_gating(adev, true);
5566 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5567 cz_enable_gfx_pipeline_power_gating(adev, true);
5568 } else {
5569 cz_enable_gfx_cg_power_gating(adev, false);
5570 cz_enable_gfx_pipeline_power_gating(adev, false);
5571 }
5572 }
5573
5574 static int gfx_v8_0_set_powergating_state(void *handle,
5575 enum amd_powergating_state state)
5576 {
5577 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5578 bool enable = (state == AMD_PG_STATE_GATE);
5579
5580 if (amdgpu_sriov_vf(adev))
5581 return 0;
5582
5583 switch (adev->asic_type) {
5584 case CHIP_CARRIZO:
5585 case CHIP_STONEY:
5586
5587 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5588 cz_enable_sck_slow_down_on_power_up(adev, true);
5589 cz_enable_sck_slow_down_on_power_down(adev, true);
5590 } else {
5591 cz_enable_sck_slow_down_on_power_up(adev, false);
5592 cz_enable_sck_slow_down_on_power_down(adev, false);
5593 }
5594 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5595 cz_enable_cp_power_gating(adev, true);
5596 else
5597 cz_enable_cp_power_gating(adev, false);
5598
5599 cz_update_gfx_cg_power_gating(adev, enable);
5600
5601 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5602 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5603 else
5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5605
5606 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5607 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5608 else
5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5610 break;
5611 case CHIP_POLARIS11:
5612 case CHIP_POLARIS12:
5613 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5614 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5615 else
5616 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5617
5618 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5619 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5620 else
5621 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5622
5623 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5624 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5625 else
5626 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5627 break;
5628 default:
5629 break;
5630 }
5631
5632 return 0;
5633 }
5634
5635 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5636 {
5637 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5638 int data;
5639
5640 if (amdgpu_sriov_vf(adev))
5641 *flags = 0;
5642
5643 /* AMD_CG_SUPPORT_GFX_MGCG */
5644 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5645 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5646 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5647
5648 /* AMD_CG_SUPPORT_GFX_CGLG */
5649 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5650 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5651 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5652
5653 /* AMD_CG_SUPPORT_GFX_CGLS */
5654 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5655 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5656
5657 /* AMD_CG_SUPPORT_GFX_CGTS */
5658 data = RREG32(mmCGTS_SM_CTRL_REG);
5659 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5660 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5661
5662 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5663 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5664 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5665
5666 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5667 data = RREG32(mmRLC_MEM_SLP_CNTL);
5668 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5669 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5670
5671 /* AMD_CG_SUPPORT_GFX_CP_LS */
5672 data = RREG32(mmCP_MEM_SLP_CNTL);
5673 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5674 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5675 }
5676
5677 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5678 uint32_t reg_addr, uint32_t cmd)
5679 {
5680 uint32_t data;
5681
5682 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5683
5684 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5685 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5686
5687 data = RREG32(mmRLC_SERDES_WR_CTRL);
5688 if (adev->asic_type == CHIP_STONEY)
5689 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5690 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5691 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5692 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5693 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5694 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5695 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5696 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5697 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5698 else
5699 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5700 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5701 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5702 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5703 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5704 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5705 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5706 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5707 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5708 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5709 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5710 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5711 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5712 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5713 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5714
5715 WREG32(mmRLC_SERDES_WR_CTRL, data);
5716 }
5717
5718 #define MSG_ENTER_RLC_SAFE_MODE 1
5719 #define MSG_EXIT_RLC_SAFE_MODE 0
5720 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5721 #define RLC_GPR_REG2__REQ__SHIFT 0
5722 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5723 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5724
5725 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5726 {
5727 u32 data;
5728 unsigned i;
5729
5730 data = RREG32(mmRLC_CNTL);
5731 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5732 return;
5733
5734 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5735 data |= RLC_SAFE_MODE__CMD_MASK;
5736 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5737 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5738 WREG32(mmRLC_SAFE_MODE, data);
5739
5740 for (i = 0; i < adev->usec_timeout; i++) {
5741 if ((RREG32(mmRLC_GPM_STAT) &
5742 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5743 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5744 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5745 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5746 break;
5747 udelay(1);
5748 }
5749
5750 for (i = 0; i < adev->usec_timeout; i++) {
5751 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5752 break;
5753 udelay(1);
5754 }
5755 adev->gfx.rlc.in_safe_mode = true;
5756 }
5757 }
5758
5759 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5760 {
5761 u32 data = 0;
5762 unsigned i;
5763
5764 data = RREG32(mmRLC_CNTL);
5765 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5766 return;
5767
5768 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5769 if (adev->gfx.rlc.in_safe_mode) {
5770 data |= RLC_SAFE_MODE__CMD_MASK;
5771 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5772 WREG32(mmRLC_SAFE_MODE, data);
5773 adev->gfx.rlc.in_safe_mode = false;
5774 }
5775 }
5776
5777 for (i = 0; i < adev->usec_timeout; i++) {
5778 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5779 break;
5780 udelay(1);
5781 }
5782 }
5783
5784 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5785 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5786 .exit_safe_mode = iceland_exit_rlc_safe_mode
5787 };
5788
5789 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5790 bool enable)
5791 {
5792 uint32_t temp, data;
5793
5794 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5795
5796 /* It is disabled by HW by default */
5797 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5798 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5799 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5800 /* 1 - RLC memory Light sleep */
5801 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5802
5803 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5804 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5805 }
5806
5807 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5808 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5809 if (adev->flags & AMD_IS_APU)
5810 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5811 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5812 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5813 else
5814 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5815 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5816 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5817 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5818
5819 if (temp != data)
5820 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5821
5822 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823 gfx_v8_0_wait_for_rlc_serdes(adev);
5824
5825 /* 5 - clear mgcg override */
5826 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5827
5828 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5829 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5830 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5831 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5832 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5833 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5834 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5835 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5836 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5837 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5838 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5839 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5840 if (temp != data)
5841 WREG32(mmCGTS_SM_CTRL_REG, data);
5842 }
5843 udelay(50);
5844
5845 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5846 gfx_v8_0_wait_for_rlc_serdes(adev);
5847 } else {
5848 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5849 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5850 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5851 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5852 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5853 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5854 if (temp != data)
5855 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5856
5857 /* 2 - disable MGLS in RLC */
5858 data = RREG32(mmRLC_MEM_SLP_CNTL);
5859 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5860 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5861 WREG32(mmRLC_MEM_SLP_CNTL, data);
5862 }
5863
5864 /* 3 - disable MGLS in CP */
5865 data = RREG32(mmCP_MEM_SLP_CNTL);
5866 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5867 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5868 WREG32(mmCP_MEM_SLP_CNTL, data);
5869 }
5870
5871 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5872 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5873 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5874 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5875 if (temp != data)
5876 WREG32(mmCGTS_SM_CTRL_REG, data);
5877
5878 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5879 gfx_v8_0_wait_for_rlc_serdes(adev);
5880
5881 /* 6 - set mgcg override */
5882 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5883
5884 udelay(50);
5885
5886 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5887 gfx_v8_0_wait_for_rlc_serdes(adev);
5888 }
5889
5890 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5891 }
5892
5893 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5894 bool enable)
5895 {
5896 uint32_t temp, temp1, data, data1;
5897
5898 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5899
5900 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5901
5902 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5903 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5904 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5905 if (temp1 != data1)
5906 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5907
5908 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5909 gfx_v8_0_wait_for_rlc_serdes(adev);
5910
5911 /* 2 - clear cgcg override */
5912 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5913
5914 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5915 gfx_v8_0_wait_for_rlc_serdes(adev);
5916
5917 /* 3 - write cmd to set CGLS */
5918 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5919
5920 /* 4 - enable cgcg */
5921 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5922
5923 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5924 /* enable cgls*/
5925 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5926
5927 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5928 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5929
5930 if (temp1 != data1)
5931 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5932 } else {
5933 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5934 }
5935
5936 if (temp != data)
5937 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5938
5939 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5940 * Cmp_busy/GFX_Idle interrupts
5941 */
5942 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5943 } else {
5944 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5945 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5946
5947 /* TEST CGCG */
5948 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5949 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5950 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5951 if (temp1 != data1)
5952 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5953
5954 /* read gfx register to wake up cgcg */
5955 RREG32(mmCB_CGTT_SCLK_CTRL);
5956 RREG32(mmCB_CGTT_SCLK_CTRL);
5957 RREG32(mmCB_CGTT_SCLK_CTRL);
5958 RREG32(mmCB_CGTT_SCLK_CTRL);
5959
5960 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5961 gfx_v8_0_wait_for_rlc_serdes(adev);
5962
5963 /* write cmd to Set CGCG Overrride */
5964 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5965
5966 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5967 gfx_v8_0_wait_for_rlc_serdes(adev);
5968
5969 /* write cmd to Clear CGLS */
5970 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5971
5972 /* disable cgcg, cgls should be disabled too. */
5973 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5974 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5975 if (temp != data)
5976 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5977 /* enable interrupts again for PG */
5978 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5979 }
5980
5981 gfx_v8_0_wait_for_rlc_serdes(adev);
5982
5983 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5984 }
5985 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5986 bool enable)
5987 {
5988 if (enable) {
5989 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5990 * === MGCG + MGLS + TS(CG/LS) ===
5991 */
5992 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5993 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5994 } else {
5995 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5996 * === CGCG + CGLS ===
5997 */
5998 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5999 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6000 }
6001 return 0;
6002 }
6003
6004 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6005 enum amd_clockgating_state state)
6006 {
6007 uint32_t msg_id, pp_state = 0;
6008 uint32_t pp_support_state = 0;
6009
6010 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6011 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6012 pp_support_state = PP_STATE_SUPPORT_LS;
6013 pp_state = PP_STATE_LS;
6014 }
6015 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6016 pp_support_state |= PP_STATE_SUPPORT_CG;
6017 pp_state |= PP_STATE_CG;
6018 }
6019 if (state == AMD_CG_STATE_UNGATE)
6020 pp_state = 0;
6021
6022 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6023 PP_BLOCK_GFX_CG,
6024 pp_support_state,
6025 pp_state);
6026 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6027 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6028 }
6029
6030 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6031 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6032 pp_support_state = PP_STATE_SUPPORT_LS;
6033 pp_state = PP_STATE_LS;
6034 }
6035
6036 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6037 pp_support_state |= PP_STATE_SUPPORT_CG;
6038 pp_state |= PP_STATE_CG;
6039 }
6040
6041 if (state == AMD_CG_STATE_UNGATE)
6042 pp_state = 0;
6043
6044 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6045 PP_BLOCK_GFX_MG,
6046 pp_support_state,
6047 pp_state);
6048 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6049 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6050 }
6051
6052 return 0;
6053 }
6054
6055 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6056 enum amd_clockgating_state state)
6057 {
6058
6059 uint32_t msg_id, pp_state = 0;
6060 uint32_t pp_support_state = 0;
6061
6062 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6063 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6064 pp_support_state = PP_STATE_SUPPORT_LS;
6065 pp_state = PP_STATE_LS;
6066 }
6067 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6068 pp_support_state |= PP_STATE_SUPPORT_CG;
6069 pp_state |= PP_STATE_CG;
6070 }
6071 if (state == AMD_CG_STATE_UNGATE)
6072 pp_state = 0;
6073
6074 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6075 PP_BLOCK_GFX_CG,
6076 pp_support_state,
6077 pp_state);
6078 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6079 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6080 }
6081
6082 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6083 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6084 pp_support_state = PP_STATE_SUPPORT_LS;
6085 pp_state = PP_STATE_LS;
6086 }
6087 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6088 pp_support_state |= PP_STATE_SUPPORT_CG;
6089 pp_state |= PP_STATE_CG;
6090 }
6091 if (state == AMD_CG_STATE_UNGATE)
6092 pp_state = 0;
6093
6094 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6095 PP_BLOCK_GFX_3D,
6096 pp_support_state,
6097 pp_state);
6098 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6099 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6100 }
6101
6102 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6103 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6104 pp_support_state = PP_STATE_SUPPORT_LS;
6105 pp_state = PP_STATE_LS;
6106 }
6107
6108 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6109 pp_support_state |= PP_STATE_SUPPORT_CG;
6110 pp_state |= PP_STATE_CG;
6111 }
6112
6113 if (state == AMD_CG_STATE_UNGATE)
6114 pp_state = 0;
6115
6116 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6117 PP_BLOCK_GFX_MG,
6118 pp_support_state,
6119 pp_state);
6120 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6121 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6122 }
6123
6124 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6125 pp_support_state = PP_STATE_SUPPORT_LS;
6126
6127 if (state == AMD_CG_STATE_UNGATE)
6128 pp_state = 0;
6129 else
6130 pp_state = PP_STATE_LS;
6131
6132 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6133 PP_BLOCK_GFX_RLC,
6134 pp_support_state,
6135 pp_state);
6136 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6137 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6138 }
6139
6140 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6141 pp_support_state = PP_STATE_SUPPORT_LS;
6142
6143 if (state == AMD_CG_STATE_UNGATE)
6144 pp_state = 0;
6145 else
6146 pp_state = PP_STATE_LS;
6147 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6148 PP_BLOCK_GFX_CP,
6149 pp_support_state,
6150 pp_state);
6151 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6152 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6153 }
6154
6155 return 0;
6156 }
6157
6158 static int gfx_v8_0_set_clockgating_state(void *handle,
6159 enum amd_clockgating_state state)
6160 {
6161 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6162
6163 if (amdgpu_sriov_vf(adev))
6164 return 0;
6165
6166 switch (adev->asic_type) {
6167 case CHIP_FIJI:
6168 case CHIP_CARRIZO:
6169 case CHIP_STONEY:
6170 gfx_v8_0_update_gfx_clock_gating(adev,
6171 state == AMD_CG_STATE_GATE);
6172 break;
6173 case CHIP_TONGA:
6174 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6175 break;
6176 case CHIP_POLARIS10:
6177 case CHIP_POLARIS11:
6178 case CHIP_POLARIS12:
6179 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6180 break;
6181 default:
6182 break;
6183 }
6184 return 0;
6185 }
6186
6187 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6188 {
6189 return ring->adev->wb.wb[ring->rptr_offs];
6190 }
6191
6192 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6193 {
6194 struct amdgpu_device *adev = ring->adev;
6195
6196 if (ring->use_doorbell)
6197 /* XXX check if swapping is necessary on BE */
6198 return ring->adev->wb.wb[ring->wptr_offs];
6199 else
6200 return RREG32(mmCP_RB0_WPTR);
6201 }
6202
6203 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6204 {
6205 struct amdgpu_device *adev = ring->adev;
6206
6207 if (ring->use_doorbell) {
6208 /* XXX check if swapping is necessary on BE */
6209 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6210 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6211 } else {
6212 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6213 (void)RREG32(mmCP_RB0_WPTR);
6214 }
6215 }
6216
6217 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6218 {
6219 u32 ref_and_mask, reg_mem_engine;
6220
6221 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6222 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6223 switch (ring->me) {
6224 case 1:
6225 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6226 break;
6227 case 2:
6228 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6229 break;
6230 default:
6231 return;
6232 }
6233 reg_mem_engine = 0;
6234 } else {
6235 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6236 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6237 }
6238
6239 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6240 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6241 WAIT_REG_MEM_FUNCTION(3) | /* == */
6242 reg_mem_engine));
6243 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6244 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6245 amdgpu_ring_write(ring, ref_and_mask);
6246 amdgpu_ring_write(ring, ref_and_mask);
6247 amdgpu_ring_write(ring, 0x20); /* poll interval */
6248 }
6249
6250 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6251 {
6252 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6253 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6254 EVENT_INDEX(4));
6255
6256 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6257 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6258 EVENT_INDEX(0));
6259 }
6260
6261
6262 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6263 {
6264 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6265 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6266 WRITE_DATA_DST_SEL(0) |
6267 WR_CONFIRM));
6268 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6269 amdgpu_ring_write(ring, 0);
6270 amdgpu_ring_write(ring, 1);
6271
6272 }
6273
6274 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6275 struct amdgpu_ib *ib,
6276 unsigned vm_id, bool ctx_switch)
6277 {
6278 u32 header, control = 0;
6279
6280 if (ib->flags & AMDGPU_IB_FLAG_CE)
6281 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6282 else
6283 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6284
6285 control |= ib->length_dw | (vm_id << 24);
6286
6287 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6288 control |= INDIRECT_BUFFER_PRE_ENB(1);
6289
6290 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6291 gfx_v8_0_ring_emit_de_meta(ring);
6292 }
6293
6294 amdgpu_ring_write(ring, header);
6295 amdgpu_ring_write(ring,
6296 #ifdef __BIG_ENDIAN
6297 (2 << 0) |
6298 #endif
6299 (ib->gpu_addr & 0xFFFFFFFC));
6300 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6301 amdgpu_ring_write(ring, control);
6302 }
6303
6304 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6305 struct amdgpu_ib *ib,
6306 unsigned vm_id, bool ctx_switch)
6307 {
6308 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6309
6310 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6311 amdgpu_ring_write(ring,
6312 #ifdef __BIG_ENDIAN
6313 (2 << 0) |
6314 #endif
6315 (ib->gpu_addr & 0xFFFFFFFC));
6316 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6317 amdgpu_ring_write(ring, control);
6318 }
6319
6320 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6321 u64 seq, unsigned flags)
6322 {
6323 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6324 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6325
6326 /* EVENT_WRITE_EOP - flush caches, send int */
6327 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6328 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6329 EOP_TC_ACTION_EN |
6330 EOP_TC_WB_ACTION_EN |
6331 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6332 EVENT_INDEX(5)));
6333 amdgpu_ring_write(ring, addr & 0xfffffffc);
6334 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6335 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6336 amdgpu_ring_write(ring, lower_32_bits(seq));
6337 amdgpu_ring_write(ring, upper_32_bits(seq));
6338
6339 }
6340
6341 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6342 {
6343 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6344 uint32_t seq = ring->fence_drv.sync_seq;
6345 uint64_t addr = ring->fence_drv.gpu_addr;
6346
6347 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6348 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6349 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6350 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6351 amdgpu_ring_write(ring, addr & 0xfffffffc);
6352 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6353 amdgpu_ring_write(ring, seq);
6354 amdgpu_ring_write(ring, 0xffffffff);
6355 amdgpu_ring_write(ring, 4); /* poll interval */
6356 }
6357
6358 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6359 unsigned vm_id, uint64_t pd_addr)
6360 {
6361 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6362
6363 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6364 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6365 WRITE_DATA_DST_SEL(0)) |
6366 WR_CONFIRM);
6367 if (vm_id < 8) {
6368 amdgpu_ring_write(ring,
6369 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6370 } else {
6371 amdgpu_ring_write(ring,
6372 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6373 }
6374 amdgpu_ring_write(ring, 0);
6375 amdgpu_ring_write(ring, pd_addr >> 12);
6376
6377 /* bits 0-15 are the VM contexts0-15 */
6378 /* invalidate the cache */
6379 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6380 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6381 WRITE_DATA_DST_SEL(0)));
6382 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6383 amdgpu_ring_write(ring, 0);
6384 amdgpu_ring_write(ring, 1 << vm_id);
6385
6386 /* wait for the invalidate to complete */
6387 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6388 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6389 WAIT_REG_MEM_FUNCTION(0) | /* always */
6390 WAIT_REG_MEM_ENGINE(0))); /* me */
6391 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6392 amdgpu_ring_write(ring, 0);
6393 amdgpu_ring_write(ring, 0); /* ref */
6394 amdgpu_ring_write(ring, 0); /* mask */
6395 amdgpu_ring_write(ring, 0x20); /* poll interval */
6396
6397 /* compute doesn't have PFP */
6398 if (usepfp) {
6399 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6400 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6401 amdgpu_ring_write(ring, 0x0);
6402 }
6403 }
6404
6405 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6406 {
6407 return ring->adev->wb.wb[ring->wptr_offs];
6408 }
6409
6410 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6411 {
6412 struct amdgpu_device *adev = ring->adev;
6413
6414 /* XXX check if swapping is necessary on BE */
6415 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6416 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6417 }
6418
6419 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6420 bool acquire)
6421 {
6422 struct amdgpu_device *adev = ring->adev;
6423 int pipe_num, tmp, reg;
6424 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6425
6426 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6427
6428 /* first me only has 2 entries, GFX and HP3D */
6429 if (ring->me > 0)
6430 pipe_num -= 2;
6431
6432 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6433 tmp = RREG32(reg);
6434 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6435 WREG32(reg, tmp);
6436 }
6437
6438 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6439 struct amdgpu_ring *ring,
6440 bool acquire)
6441 {
6442 int i, pipe;
6443 bool reserve;
6444 struct amdgpu_ring *iring;
6445
6446 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6447 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6448 if (acquire)
6449 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6450 else
6451 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6452
6453 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6454 /* Clear all reservations - everyone reacquires all resources */
6455 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6456 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6457 true);
6458
6459 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6460 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6461 true);
6462 } else {
6463 /* Lower all pipes without a current reservation */
6464 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6465 iring = &adev->gfx.gfx_ring[i];
6466 pipe = amdgpu_gfx_queue_to_bit(adev,
6467 iring->me,
6468 iring->pipe,
6469 0);
6470 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6471 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6472 }
6473
6474 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6475 iring = &adev->gfx.compute_ring[i];
6476 pipe = amdgpu_gfx_queue_to_bit(adev,
6477 iring->me,
6478 iring->pipe,
6479 0);
6480 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6481 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6482 }
6483 }
6484
6485 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6486 }
6487
6488 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6489 struct amdgpu_ring *ring,
6490 bool acquire)
6491 {
6492 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6493 uint32_t queue_priority = acquire ? 0xf : 0x0;
6494
6495 mutex_lock(&adev->srbm_mutex);
6496 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6497
6498 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6499 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6500
6501 vi_srbm_select(adev, 0, 0, 0, 0);
6502 mutex_unlock(&adev->srbm_mutex);
6503 }
6504 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6505 enum amd_sched_priority priority)
6506 {
6507 struct amdgpu_device *adev = ring->adev;
6508 bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
6509
6510 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6511 return;
6512
6513 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6514 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6515 }
6516
6517 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6518 u64 addr, u64 seq,
6519 unsigned flags)
6520 {
6521 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6522 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6523
6524 /* RELEASE_MEM - flush caches, send int */
6525 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6526 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6527 EOP_TC_ACTION_EN |
6528 EOP_TC_WB_ACTION_EN |
6529 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6530 EVENT_INDEX(5)));
6531 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6532 amdgpu_ring_write(ring, addr & 0xfffffffc);
6533 amdgpu_ring_write(ring, upper_32_bits(addr));
6534 amdgpu_ring_write(ring, lower_32_bits(seq));
6535 amdgpu_ring_write(ring, upper_32_bits(seq));
6536 }
6537
6538 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6539 u64 seq, unsigned int flags)
6540 {
6541 /* we only allocate 32bit for each seq wb address */
6542 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6543
6544 /* write fence seq to the "addr" */
6545 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6546 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6547 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6548 amdgpu_ring_write(ring, lower_32_bits(addr));
6549 amdgpu_ring_write(ring, upper_32_bits(addr));
6550 amdgpu_ring_write(ring, lower_32_bits(seq));
6551
6552 if (flags & AMDGPU_FENCE_FLAG_INT) {
6553 /* set register to trigger INT */
6554 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6555 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6556 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6557 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6558 amdgpu_ring_write(ring, 0);
6559 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6560 }
6561 }
6562
6563 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6564 {
6565 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6566 amdgpu_ring_write(ring, 0);
6567 }
6568
6569 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6570 {
6571 uint32_t dw2 = 0;
6572
6573 if (amdgpu_sriov_vf(ring->adev))
6574 gfx_v8_0_ring_emit_ce_meta(ring);
6575
6576 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6577 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6578 gfx_v8_0_ring_emit_vgt_flush(ring);
6579 /* set load_global_config & load_global_uconfig */
6580 dw2 |= 0x8001;
6581 /* set load_cs_sh_regs */
6582 dw2 |= 0x01000000;
6583 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6584 dw2 |= 0x10002;
6585
6586 /* set load_ce_ram if preamble presented */
6587 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6588 dw2 |= 0x10000000;
6589 } else {
6590 /* still load_ce_ram if this is the first time preamble presented
6591 * although there is no context switch happens.
6592 */
6593 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6594 dw2 |= 0x10000000;
6595 }
6596
6597 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6598 amdgpu_ring_write(ring, dw2);
6599 amdgpu_ring_write(ring, 0);
6600 }
6601
6602 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6603 {
6604 unsigned ret;
6605
6606 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6607 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6608 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6609 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6610 ret = ring->wptr & ring->buf_mask;
6611 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6612 return ret;
6613 }
6614
6615 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6616 {
6617 unsigned cur;
6618
6619 BUG_ON(offset > ring->buf_mask);
6620 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6621
6622 cur = (ring->wptr & ring->buf_mask) - 1;
6623 if (likely(cur > offset))
6624 ring->ring[offset] = cur - offset;
6625 else
6626 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6627 }
6628
6629 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6630 {
6631 struct amdgpu_device *adev = ring->adev;
6632
6633 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6634 amdgpu_ring_write(ring, 0 | /* src: register*/
6635 (5 << 8) | /* dst: memory */
6636 (1 << 20)); /* write confirm */
6637 amdgpu_ring_write(ring, reg);
6638 amdgpu_ring_write(ring, 0);
6639 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6640 adev->virt.reg_val_offs * 4));
6641 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6642 adev->virt.reg_val_offs * 4));
6643 }
6644
6645 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6646 uint32_t val)
6647 {
6648 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6649 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6650 amdgpu_ring_write(ring, reg);
6651 amdgpu_ring_write(ring, 0);
6652 amdgpu_ring_write(ring, val);
6653 }
6654
6655 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6656 enum amdgpu_interrupt_state state)
6657 {
6658 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6659 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6660 }
6661
6662 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6663 int me, int pipe,
6664 enum amdgpu_interrupt_state state)
6665 {
6666 u32 mec_int_cntl, mec_int_cntl_reg;
6667
6668 /*
6669 * amdgpu controls only the first MEC. That's why this function only
6670 * handles the setting of interrupts for this specific MEC. All other
6671 * pipes' interrupts are set by amdkfd.
6672 */
6673
6674 if (me == 1) {
6675 switch (pipe) {
6676 case 0:
6677 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6678 break;
6679 case 1:
6680 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6681 break;
6682 case 2:
6683 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6684 break;
6685 case 3:
6686 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6687 break;
6688 default:
6689 DRM_DEBUG("invalid pipe %d\n", pipe);
6690 return;
6691 }
6692 } else {
6693 DRM_DEBUG("invalid me %d\n", me);
6694 return;
6695 }
6696
6697 switch (state) {
6698 case AMDGPU_IRQ_STATE_DISABLE:
6699 mec_int_cntl = RREG32(mec_int_cntl_reg);
6700 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6701 WREG32(mec_int_cntl_reg, mec_int_cntl);
6702 break;
6703 case AMDGPU_IRQ_STATE_ENABLE:
6704 mec_int_cntl = RREG32(mec_int_cntl_reg);
6705 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6706 WREG32(mec_int_cntl_reg, mec_int_cntl);
6707 break;
6708 default:
6709 break;
6710 }
6711 }
6712
6713 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6714 struct amdgpu_irq_src *source,
6715 unsigned type,
6716 enum amdgpu_interrupt_state state)
6717 {
6718 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6719 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6720
6721 return 0;
6722 }
6723
6724 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6725 struct amdgpu_irq_src *source,
6726 unsigned type,
6727 enum amdgpu_interrupt_state state)
6728 {
6729 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6730 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6731
6732 return 0;
6733 }
6734
6735 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6736 struct amdgpu_irq_src *src,
6737 unsigned type,
6738 enum amdgpu_interrupt_state state)
6739 {
6740 switch (type) {
6741 case AMDGPU_CP_IRQ_GFX_EOP:
6742 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6743 break;
6744 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6745 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6746 break;
6747 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6748 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6749 break;
6750 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6751 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6752 break;
6753 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6754 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6755 break;
6756 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6757 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6758 break;
6759 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6760 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6761 break;
6762 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6763 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6764 break;
6765 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6766 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6767 break;
6768 default:
6769 break;
6770 }
6771 return 0;
6772 }
6773
6774 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6775 struct amdgpu_irq_src *source,
6776 struct amdgpu_iv_entry *entry)
6777 {
6778 int i;
6779 u8 me_id, pipe_id, queue_id;
6780 struct amdgpu_ring *ring;
6781
6782 DRM_DEBUG("IH: CP EOP\n");
6783 me_id = (entry->ring_id & 0x0c) >> 2;
6784 pipe_id = (entry->ring_id & 0x03) >> 0;
6785 queue_id = (entry->ring_id & 0x70) >> 4;
6786
6787 switch (me_id) {
6788 case 0:
6789 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6790 break;
6791 case 1:
6792 case 2:
6793 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6794 ring = &adev->gfx.compute_ring[i];
6795 /* Per-queue interrupt is supported for MEC starting from VI.
6796 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6797 */
6798 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6799 amdgpu_fence_process(ring);
6800 }
6801 break;
6802 }
6803 return 0;
6804 }
6805
6806 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6807 struct amdgpu_irq_src *source,
6808 struct amdgpu_iv_entry *entry)
6809 {
6810 DRM_ERROR("Illegal register access in command stream\n");
6811 schedule_work(&adev->reset_work);
6812 return 0;
6813 }
6814
6815 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6816 struct amdgpu_irq_src *source,
6817 struct amdgpu_iv_entry *entry)
6818 {
6819 DRM_ERROR("Illegal instruction in command stream\n");
6820 schedule_work(&adev->reset_work);
6821 return 0;
6822 }
6823
6824 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6825 struct amdgpu_irq_src *src,
6826 unsigned int type,
6827 enum amdgpu_interrupt_state state)
6828 {
6829 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6830
6831 switch (type) {
6832 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6833 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6834 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6835 if (ring->me == 1)
6836 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6837 ring->pipe,
6838 GENERIC2_INT_ENABLE,
6839 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6840 else
6841 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6842 ring->pipe,
6843 GENERIC2_INT_ENABLE,
6844 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6845 break;
6846 default:
6847 BUG(); /* kiq only support GENERIC2_INT now */
6848 break;
6849 }
6850 return 0;
6851 }
6852
6853 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6854 struct amdgpu_irq_src *source,
6855 struct amdgpu_iv_entry *entry)
6856 {
6857 u8 me_id, pipe_id, queue_id;
6858 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6859
6860 me_id = (entry->ring_id & 0x0c) >> 2;
6861 pipe_id = (entry->ring_id & 0x03) >> 0;
6862 queue_id = (entry->ring_id & 0x70) >> 4;
6863 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6864 me_id, pipe_id, queue_id);
6865
6866 amdgpu_fence_process(ring);
6867 return 0;
6868 }
6869
6870 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6871 .name = "gfx_v8_0",
6872 .early_init = gfx_v8_0_early_init,
6873 .late_init = gfx_v8_0_late_init,
6874 .sw_init = gfx_v8_0_sw_init,
6875 .sw_fini = gfx_v8_0_sw_fini,
6876 .hw_init = gfx_v8_0_hw_init,
6877 .hw_fini = gfx_v8_0_hw_fini,
6878 .suspend = gfx_v8_0_suspend,
6879 .resume = gfx_v8_0_resume,
6880 .is_idle = gfx_v8_0_is_idle,
6881 .wait_for_idle = gfx_v8_0_wait_for_idle,
6882 .check_soft_reset = gfx_v8_0_check_soft_reset,
6883 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6884 .soft_reset = gfx_v8_0_soft_reset,
6885 .post_soft_reset = gfx_v8_0_post_soft_reset,
6886 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6887 .set_powergating_state = gfx_v8_0_set_powergating_state,
6888 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6889 };
6890
6891 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6892 .type = AMDGPU_RING_TYPE_GFX,
6893 .align_mask = 0xff,
6894 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6895 .support_64bit_ptrs = false,
6896 .get_rptr = gfx_v8_0_ring_get_rptr,
6897 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6898 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6899 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6900 5 + /* COND_EXEC */
6901 7 + /* PIPELINE_SYNC */
6902 19 + /* VM_FLUSH */
6903 8 + /* FENCE for VM_FLUSH */
6904 20 + /* GDS switch */
6905 4 + /* double SWITCH_BUFFER,
6906 the first COND_EXEC jump to the place just
6907 prior to this double SWITCH_BUFFER */
6908 5 + /* COND_EXEC */
6909 7 + /* HDP_flush */
6910 4 + /* VGT_flush */
6911 14 + /* CE_META */
6912 31 + /* DE_META */
6913 3 + /* CNTX_CTRL */
6914 5 + /* HDP_INVL */
6915 8 + 8 + /* FENCE x2 */
6916 2, /* SWITCH_BUFFER */
6917 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6918 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6919 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6920 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6921 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6922 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6923 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6924 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6925 .test_ring = gfx_v8_0_ring_test_ring,
6926 .test_ib = gfx_v8_0_ring_test_ib,
6927 .insert_nop = amdgpu_ring_insert_nop,
6928 .pad_ib = amdgpu_ring_generic_pad_ib,
6929 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6930 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6931 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6932 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6933 };
6934
6935 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6936 .type = AMDGPU_RING_TYPE_COMPUTE,
6937 .align_mask = 0xff,
6938 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6939 .support_64bit_ptrs = false,
6940 .get_rptr = gfx_v8_0_ring_get_rptr,
6941 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6942 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6943 .emit_frame_size =
6944 20 + /* gfx_v8_0_ring_emit_gds_switch */
6945 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6946 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6947 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6948 17 + /* gfx_v8_0_ring_emit_vm_flush */
6949 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6950 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6951 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6952 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6953 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6954 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6955 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6956 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6957 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6958 .test_ring = gfx_v8_0_ring_test_ring,
6959 .test_ib = gfx_v8_0_ring_test_ib,
6960 .insert_nop = amdgpu_ring_insert_nop,
6961 .pad_ib = amdgpu_ring_generic_pad_ib,
6962 .set_priority = gfx_v8_0_ring_set_priority_compute,
6963 };
6964
6965 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6966 .type = AMDGPU_RING_TYPE_KIQ,
6967 .align_mask = 0xff,
6968 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6969 .support_64bit_ptrs = false,
6970 .get_rptr = gfx_v8_0_ring_get_rptr,
6971 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6972 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6973 .emit_frame_size =
6974 20 + /* gfx_v8_0_ring_emit_gds_switch */
6975 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6976 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6977 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6978 17 + /* gfx_v8_0_ring_emit_vm_flush */
6979 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6980 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6981 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6982 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6983 .test_ring = gfx_v8_0_ring_test_ring,
6984 .test_ib = gfx_v8_0_ring_test_ib,
6985 .insert_nop = amdgpu_ring_insert_nop,
6986 .pad_ib = amdgpu_ring_generic_pad_ib,
6987 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6988 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6989 };
6990
6991 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6992 {
6993 int i;
6994
6995 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6996
6997 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6998 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6999
7000 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7001 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7002 }
7003
7004 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7005 .set = gfx_v8_0_set_eop_interrupt_state,
7006 .process = gfx_v8_0_eop_irq,
7007 };
7008
7009 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7010 .set = gfx_v8_0_set_priv_reg_fault_state,
7011 .process = gfx_v8_0_priv_reg_irq,
7012 };
7013
7014 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7015 .set = gfx_v8_0_set_priv_inst_fault_state,
7016 .process = gfx_v8_0_priv_inst_irq,
7017 };
7018
7019 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7020 .set = gfx_v8_0_kiq_set_interrupt_state,
7021 .process = gfx_v8_0_kiq_irq,
7022 };
7023
7024 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7025 {
7026 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7027 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7028
7029 adev->gfx.priv_reg_irq.num_types = 1;
7030 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7031
7032 adev->gfx.priv_inst_irq.num_types = 1;
7033 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7034
7035 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7036 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7037 }
7038
7039 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7040 {
7041 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7042 }
7043
7044 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7045 {
7046 /* init asci gds info */
7047 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7048 adev->gds.gws.total_size = 64;
7049 adev->gds.oa.total_size = 16;
7050
7051 if (adev->gds.mem.total_size == 64 * 1024) {
7052 adev->gds.mem.gfx_partition_size = 4096;
7053 adev->gds.mem.cs_partition_size = 4096;
7054
7055 adev->gds.gws.gfx_partition_size = 4;
7056 adev->gds.gws.cs_partition_size = 4;
7057
7058 adev->gds.oa.gfx_partition_size = 4;
7059 adev->gds.oa.cs_partition_size = 1;
7060 } else {
7061 adev->gds.mem.gfx_partition_size = 1024;
7062 adev->gds.mem.cs_partition_size = 1024;
7063
7064 adev->gds.gws.gfx_partition_size = 16;
7065 adev->gds.gws.cs_partition_size = 16;
7066
7067 adev->gds.oa.gfx_partition_size = 4;
7068 adev->gds.oa.cs_partition_size = 4;
7069 }
7070 }
7071
7072 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7073 u32 bitmap)
7074 {
7075 u32 data;
7076
7077 if (!bitmap)
7078 return;
7079
7080 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7081 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7082
7083 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7084 }
7085
7086 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7087 {
7088 u32 data, mask;
7089
7090 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7091 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7092
7093 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7094
7095 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7096 }
7097
7098 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7099 {
7100 int i, j, k, counter, active_cu_number = 0;
7101 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7102 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7103 unsigned disable_masks[4 * 2];
7104 u32 ao_cu_num;
7105
7106 memset(cu_info, 0, sizeof(*cu_info));
7107
7108 if (adev->flags & AMD_IS_APU)
7109 ao_cu_num = 2;
7110 else
7111 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7112
7113 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7114
7115 mutex_lock(&adev->grbm_idx_mutex);
7116 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7117 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7118 mask = 1;
7119 ao_bitmap = 0;
7120 counter = 0;
7121 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7122 if (i < 4 && j < 2)
7123 gfx_v8_0_set_user_cu_inactive_bitmap(
7124 adev, disable_masks[i * 2 + j]);
7125 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7126 cu_info->bitmap[i][j] = bitmap;
7127
7128 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7129 if (bitmap & mask) {
7130 if (counter < ao_cu_num)
7131 ao_bitmap |= mask;
7132 counter ++;
7133 }
7134 mask <<= 1;
7135 }
7136 active_cu_number += counter;
7137 if (i < 2 && j < 2)
7138 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7139 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7140 }
7141 }
7142 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7143 mutex_unlock(&adev->grbm_idx_mutex);
7144
7145 cu_info->number = active_cu_number;
7146 cu_info->ao_cu_mask = ao_cu_mask;
7147 }
7148
7149 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7150 {
7151 .type = AMD_IP_BLOCK_TYPE_GFX,
7152 .major = 8,
7153 .minor = 0,
7154 .rev = 0,
7155 .funcs = &gfx_v8_0_ip_funcs,
7156 };
7157
7158 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7159 {
7160 .type = AMD_IP_BLOCK_TYPE_GFX,
7161 .major = 8,
7162 .minor = 1,
7163 .rev = 0,
7164 .funcs = &gfx_v8_0_ip_funcs,
7165 };
7166
7167 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7168 {
7169 uint64_t ce_payload_addr;
7170 int cnt_ce;
7171 union {
7172 struct vi_ce_ib_state regular;
7173 struct vi_ce_ib_state_chained_ib chained;
7174 } ce_payload = {};
7175
7176 if (ring->adev->virt.chained_ib_support) {
7177 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7178 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7179 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7180 } else {
7181 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7182 offsetof(struct vi_gfx_meta_data, ce_payload);
7183 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7184 }
7185
7186 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7187 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7188 WRITE_DATA_DST_SEL(8) |
7189 WR_CONFIRM) |
7190 WRITE_DATA_CACHE_POLICY(0));
7191 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7192 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7193 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7194 }
7195
7196 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7197 {
7198 uint64_t de_payload_addr, gds_addr, csa_addr;
7199 int cnt_de;
7200 union {
7201 struct vi_de_ib_state regular;
7202 struct vi_de_ib_state_chained_ib chained;
7203 } de_payload = {};
7204
7205 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7206 gds_addr = csa_addr + 4096;
7207 if (ring->adev->virt.chained_ib_support) {
7208 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7209 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7210 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7211 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7212 } else {
7213 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7214 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7215 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7216 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7217 }
7218
7219 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7220 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7221 WRITE_DATA_DST_SEL(8) |
7222 WR_CONFIRM) |
7223 WRITE_DATA_CACHE_POLICY(0));
7224 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7225 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7226 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7227 }