]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
perf test: Fix vmlinux failure on s390x part 2
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
81
82 /* BPM Register Address*/
83 enum {
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength 14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149 {
150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166 };
167
168 static const u32 golden_settings_tonga_a11[] =
169 {
170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173 mmGB_GPU_ID, 0x0000000f, 0x00000000,
174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
186 };
187
188 static const u32 tonga_golden_common_all[] =
189 {
190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
198 };
199
200 static const u32 tonga_mgcg_cgcg_init[] =
201 {
202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277 };
278
279 static const u32 golden_settings_polaris11_a11[] =
280 {
281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291 mmSQ_CONFIG, 0x07f80000, 0x01180000,
292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
298 };
299
300 static const u32 polaris11_golden_common_all[] =
301 {
302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
308 };
309
310 static const u32 golden_settings_polaris10_a11[] =
311 {
312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323 mmSQ_CONFIG, 0x07f80000, 0x07180000,
324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
329 };
330
331 static const u32 polaris10_golden_common_all[] =
332 {
333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
341 };
342
343 static const u32 fiji_golden_common_all[] =
344 {
345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
355 };
356
357 static const u32 golden_settings_fiji_a10[] =
358 {
359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
370 };
371
372 static const u32 fiji_mgcg_cgcg_init[] =
373 {
374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409 };
410
411 static const u32 golden_settings_iceland_a11[] =
412 {
413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416 mmGB_GPU_ID, 0x0000000f, 0x00000000,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429 };
430
431 static const u32 iceland_golden_common_all[] =
432 {
433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
441 };
442
443 static const u32 iceland_mgcg_cgcg_init[] =
444 {
445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509 };
510
511 static const u32 cz_golden_settings_a11[] =
512 {
513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515 mmGB_GPU_ID, 0x0000000f, 0x00000000,
516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525 };
526
527 static const u32 cz_golden_common_all[] =
528 {
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
537 };
538
539 static const u32 cz_mgcg_cgcg_init[] =
540 {
541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616 };
617
618 static const u32 stoney_golden_settings_a11[] =
619 {
620 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621 mmGB_GPU_ID, 0x0000000f, 0x00000000,
622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630 };
631
632 static const u32 stoney_golden_common_all[] =
633 {
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
642 };
643
644 static const u32 stoney_mgcg_cgcg_init[] =
645 {
646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
651 };
652
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
661
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 {
664 switch (adev->asic_type) {
665 case CHIP_TOPAZ:
666 amdgpu_program_register_sequence(adev,
667 iceland_mgcg_cgcg_init,
668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669 amdgpu_program_register_sequence(adev,
670 golden_settings_iceland_a11,
671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672 amdgpu_program_register_sequence(adev,
673 iceland_golden_common_all,
674 (const u32)ARRAY_SIZE(iceland_golden_common_all));
675 break;
676 case CHIP_FIJI:
677 amdgpu_program_register_sequence(adev,
678 fiji_mgcg_cgcg_init,
679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680 amdgpu_program_register_sequence(adev,
681 golden_settings_fiji_a10,
682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683 amdgpu_program_register_sequence(adev,
684 fiji_golden_common_all,
685 (const u32)ARRAY_SIZE(fiji_golden_common_all));
686 break;
687
688 case CHIP_TONGA:
689 amdgpu_program_register_sequence(adev,
690 tonga_mgcg_cgcg_init,
691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692 amdgpu_program_register_sequence(adev,
693 golden_settings_tonga_a11,
694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695 amdgpu_program_register_sequence(adev,
696 tonga_golden_common_all,
697 (const u32)ARRAY_SIZE(tonga_golden_common_all));
698 break;
699 case CHIP_POLARIS11:
700 case CHIP_POLARIS12:
701 amdgpu_program_register_sequence(adev,
702 golden_settings_polaris11_a11,
703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704 amdgpu_program_register_sequence(adev,
705 polaris11_golden_common_all,
706 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
707 break;
708 case CHIP_POLARIS10:
709 amdgpu_program_register_sequence(adev,
710 golden_settings_polaris10_a11,
711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712 amdgpu_program_register_sequence(adev,
713 polaris10_golden_common_all,
714 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716 if (adev->pdev->revision == 0xc7 &&
717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722 }
723 break;
724 case CHIP_CARRIZO:
725 amdgpu_program_register_sequence(adev,
726 cz_mgcg_cgcg_init,
727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728 amdgpu_program_register_sequence(adev,
729 cz_golden_settings_a11,
730 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_common_all,
733 (const u32)ARRAY_SIZE(cz_golden_common_all));
734 break;
735 case CHIP_STONEY:
736 amdgpu_program_register_sequence(adev,
737 stoney_mgcg_cgcg_init,
738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739 amdgpu_program_register_sequence(adev,
740 stoney_golden_settings_a11,
741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_common_all,
744 (const u32)ARRAY_SIZE(stoney_golden_common_all));
745 break;
746 default:
747 break;
748 }
749 }
750
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752 {
753 adev->gfx.scratch.num_reg = 8;
754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
756 }
757
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759 {
760 struct amdgpu_device *adev = ring->adev;
761 uint32_t scratch;
762 uint32_t tmp = 0;
763 unsigned i;
764 int r;
765
766 r = amdgpu_gfx_scratch_get(adev, &scratch);
767 if (r) {
768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769 return r;
770 }
771 WREG32(scratch, 0xCAFEDEAD);
772 r = amdgpu_ring_alloc(ring, 3);
773 if (r) {
774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775 ring->idx, r);
776 amdgpu_gfx_scratch_free(adev, scratch);
777 return r;
778 }
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
782 amdgpu_ring_commit(ring);
783
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
787 break;
788 DRM_UDELAY(1);
789 }
790 if (i < adev->usec_timeout) {
791 DRM_INFO("ring test on %d succeeded in %d usecs\n",
792 ring->idx, i);
793 } else {
794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795 ring->idx, scratch, tmp);
796 r = -EINVAL;
797 }
798 amdgpu_gfx_scratch_free(adev, scratch);
799 return r;
800 }
801
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
803 {
804 struct amdgpu_device *adev = ring->adev;
805 struct amdgpu_ib ib;
806 struct dma_fence *f = NULL;
807 uint32_t scratch;
808 uint32_t tmp = 0;
809 long r;
810
811 r = amdgpu_gfx_scratch_get(adev, &scratch);
812 if (r) {
813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
814 return r;
815 }
816 WREG32(scratch, 0xCAFEDEAD);
817 memset(&ib, 0, sizeof(ib));
818 r = amdgpu_ib_get(adev, NULL, 256, &ib);
819 if (r) {
820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
821 goto err1;
822 }
823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825 ib.ptr[2] = 0xDEADBEEF;
826 ib.length_dw = 3;
827
828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829 if (r)
830 goto err2;
831
832 r = dma_fence_wait_timeout(f, false, timeout);
833 if (r == 0) {
834 DRM_ERROR("amdgpu: IB test timed out.\n");
835 r = -ETIMEDOUT;
836 goto err2;
837 } else if (r < 0) {
838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
839 goto err2;
840 }
841 tmp = RREG32(scratch);
842 if (tmp == 0xDEADBEEF) {
843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
844 r = 0;
845 } else {
846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847 scratch, tmp);
848 r = -EINVAL;
849 }
850 err2:
851 amdgpu_ib_free(adev, &ib, NULL);
852 dma_fence_put(f);
853 err1:
854 amdgpu_gfx_scratch_free(adev, scratch);
855 return r;
856 }
857
858
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860 {
861 release_firmware(adev->gfx.pfp_fw);
862 adev->gfx.pfp_fw = NULL;
863 release_firmware(adev->gfx.me_fw);
864 adev->gfx.me_fw = NULL;
865 release_firmware(adev->gfx.ce_fw);
866 adev->gfx.ce_fw = NULL;
867 release_firmware(adev->gfx.rlc_fw);
868 adev->gfx.rlc_fw = NULL;
869 release_firmware(adev->gfx.mec_fw);
870 adev->gfx.mec_fw = NULL;
871 if ((adev->asic_type != CHIP_STONEY) &&
872 (adev->asic_type != CHIP_TOPAZ))
873 release_firmware(adev->gfx.mec2_fw);
874 adev->gfx.mec2_fw = NULL;
875
876 kfree(adev->gfx.rlc.register_list_format);
877 }
878
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881 const char *chip_name;
882 char fw_name[30];
883 int err;
884 struct amdgpu_firmware_info *info = NULL;
885 const struct common_firmware_header *header = NULL;
886 const struct gfx_firmware_header_v1_0 *cp_hdr;
887 const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 unsigned int *tmp = NULL, i;
889
890 DRM_DEBUG("\n");
891
892 switch (adev->asic_type) {
893 case CHIP_TOPAZ:
894 chip_name = "topaz";
895 break;
896 case CHIP_TONGA:
897 chip_name = "tonga";
898 break;
899 case CHIP_CARRIZO:
900 chip_name = "carrizo";
901 break;
902 case CHIP_FIJI:
903 chip_name = "fiji";
904 break;
905 case CHIP_POLARIS11:
906 chip_name = "polaris11";
907 break;
908 case CHIP_POLARIS10:
909 chip_name = "polaris10";
910 break;
911 case CHIP_POLARIS12:
912 chip_name = "polaris12";
913 break;
914 case CHIP_STONEY:
915 chip_name = "stoney";
916 break;
917 default:
918 BUG();
919 }
920
921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923 if (err)
924 goto out;
925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926 if (err)
927 goto out;
928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934 if (err)
935 goto out;
936 err = amdgpu_ucode_validate(adev->gfx.me_fw);
937 if (err)
938 goto out;
939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941
942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946 if (err)
947 goto out;
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949 if (err)
950 goto out;
951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
954
955 /*
956 * Support for MCBP/Virtualization in combination with chained IBs is
957 * formal released on feature version #46
958 */
959 if (adev->gfx.ce_feature_version >= 46 &&
960 adev->gfx.pfp_feature_version >= 46) {
961 adev->virt.chained_ib_support = true;
962 DRM_INFO("Chained IB support enabled!\n");
963 } else
964 adev->virt.chained_ib_support = false;
965
966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
968 if (err)
969 goto out;
970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
974
975 adev->gfx.rlc.save_and_restore_offset =
976 le32_to_cpu(rlc_hdr->save_and_restore_offset);
977 adev->gfx.rlc.clear_state_descriptor_offset =
978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979 adev->gfx.rlc.avail_scratch_ram_locations =
980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981 adev->gfx.rlc.reg_restore_list_size =
982 le32_to_cpu(rlc_hdr->reg_restore_list_size);
983 adev->gfx.rlc.reg_list_format_start =
984 le32_to_cpu(rlc_hdr->reg_list_format_start);
985 adev->gfx.rlc.reg_list_format_separate_start =
986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987 adev->gfx.rlc.starting_offsets_start =
988 le32_to_cpu(rlc_hdr->starting_offsets_start);
989 adev->gfx.rlc.reg_list_format_size_bytes =
990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991 adev->gfx.rlc.reg_list_size_bytes =
992 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
993
994 adev->gfx.rlc.register_list_format =
995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
997
998 if (!adev->gfx.rlc.register_list_format) {
999 err = -ENOMEM;
1000 goto out;
1001 }
1002
1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1007
1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009
1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014
1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017 if (err)
1018 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020 if (err)
1021 goto out;
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026 if ((adev->asic_type != CHIP_STONEY) &&
1027 (adev->asic_type != CHIP_TOPAZ)) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030 if (!err) {
1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032 if (err)
1033 goto out;
1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035 adev->gfx.mec2_fw->data;
1036 adev->gfx.mec2_fw_version =
1037 le32_to_cpu(cp_hdr->header.ucode_version);
1038 adev->gfx.mec2_feature_version =
1039 le32_to_cpu(cp_hdr->ucode_feature_version);
1040 } else {
1041 err = 0;
1042 adev->gfx.mec2_fw = NULL;
1043 }
1044 }
1045
1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049 info->fw = adev->gfx.pfp_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056 info->fw = adev->gfx.me_fw;
1057 header = (const struct common_firmware_header *)info->fw->data;
1058 adev->firmware.fw_size +=
1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063 info->fw = adev->gfx.ce_fw;
1064 header = (const struct common_firmware_header *)info->fw->data;
1065 adev->firmware.fw_size +=
1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067
1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070 info->fw = adev->gfx.rlc_fw;
1071 header = (const struct common_firmware_header *)info->fw->data;
1072 adev->firmware.fw_size +=
1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074
1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077 info->fw = adev->gfx.mec_fw;
1078 header = (const struct common_firmware_header *)info->fw->data;
1079 adev->firmware.fw_size +=
1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081
1082 /* we need account JT in */
1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084 adev->firmware.fw_size +=
1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086
1087 if (amdgpu_sriov_vf(adev)) {
1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090 info->fw = adev->gfx.mec_fw;
1091 adev->firmware.fw_size +=
1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093 }
1094
1095 if (adev->gfx.mec2_fw) {
1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098 info->fw = adev->gfx.mec2_fw;
1099 header = (const struct common_firmware_header *)info->fw->data;
1100 adev->firmware.fw_size +=
1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102 }
1103
1104 }
1105
1106 out:
1107 if (err) {
1108 dev_err(adev->dev,
1109 "gfx8: Failed to load firmware \"%s\"\n",
1110 fw_name);
1111 release_firmware(adev->gfx.pfp_fw);
1112 adev->gfx.pfp_fw = NULL;
1113 release_firmware(adev->gfx.me_fw);
1114 adev->gfx.me_fw = NULL;
1115 release_firmware(adev->gfx.ce_fw);
1116 adev->gfx.ce_fw = NULL;
1117 release_firmware(adev->gfx.rlc_fw);
1118 adev->gfx.rlc_fw = NULL;
1119 release_firmware(adev->gfx.mec_fw);
1120 adev->gfx.mec_fw = NULL;
1121 release_firmware(adev->gfx.mec2_fw);
1122 adev->gfx.mec2_fw = NULL;
1123 }
1124 return err;
1125 }
1126
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128 volatile u32 *buffer)
1129 {
1130 u32 count = 0, i;
1131 const struct cs_section_def *sect = NULL;
1132 const struct cs_extent_def *ext = NULL;
1133
1134 if (adev->gfx.rlc.cs_data == NULL)
1135 return;
1136 if (buffer == NULL)
1137 return;
1138
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141
1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143 buffer[count++] = cpu_to_le32(0x80000000);
1144 buffer[count++] = cpu_to_le32(0x80000000);
1145
1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148 if (sect->id == SECT_CONTEXT) {
1149 buffer[count++] =
1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151 buffer[count++] = cpu_to_le32(ext->reg_index -
1152 PACKET3_SET_CONTEXT_REG_START);
1153 for (i = 0; i < ext->reg_count; i++)
1154 buffer[count++] = cpu_to_le32(ext->extent[i]);
1155 } else {
1156 return;
1157 }
1158 }
1159 }
1160
1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163 PACKET3_SET_CONTEXT_REG_START);
1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1166
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169
1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171 buffer[count++] = cpu_to_le32(0);
1172 }
1173
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175 {
1176 const __le32 *fw_data;
1177 volatile u32 *dst_ptr;
1178 int me, i, max_me = 4;
1179 u32 bo_offset = 0;
1180 u32 table_offset, table_size;
1181
1182 if (adev->asic_type == CHIP_CARRIZO)
1183 max_me = 5;
1184
1185 /* write the cp table buffer */
1186 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187 for (me = 0; me < max_me; me++) {
1188 if (me == 0) {
1189 const struct gfx_firmware_header_v1_0 *hdr =
1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191 fw_data = (const __le32 *)
1192 (adev->gfx.ce_fw->data +
1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 table_offset = le32_to_cpu(hdr->jt_offset);
1195 table_size = le32_to_cpu(hdr->jt_size);
1196 } else if (me == 1) {
1197 const struct gfx_firmware_header_v1_0 *hdr =
1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199 fw_data = (const __le32 *)
1200 (adev->gfx.pfp_fw->data +
1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 table_offset = le32_to_cpu(hdr->jt_offset);
1203 table_size = le32_to_cpu(hdr->jt_size);
1204 } else if (me == 2) {
1205 const struct gfx_firmware_header_v1_0 *hdr =
1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207 fw_data = (const __le32 *)
1208 (adev->gfx.me_fw->data +
1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 table_offset = le32_to_cpu(hdr->jt_offset);
1211 table_size = le32_to_cpu(hdr->jt_size);
1212 } else if (me == 3) {
1213 const struct gfx_firmware_header_v1_0 *hdr =
1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215 fw_data = (const __le32 *)
1216 (adev->gfx.mec_fw->data +
1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 table_offset = le32_to_cpu(hdr->jt_offset);
1219 table_size = le32_to_cpu(hdr->jt_size);
1220 } else if (me == 4) {
1221 const struct gfx_firmware_header_v1_0 *hdr =
1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223 fw_data = (const __le32 *)
1224 (adev->gfx.mec2_fw->data +
1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226 table_offset = le32_to_cpu(hdr->jt_offset);
1227 table_size = le32_to_cpu(hdr->jt_size);
1228 }
1229
1230 for (i = 0; i < table_size; i ++) {
1231 dst_ptr[bo_offset + i] =
1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233 }
1234
1235 bo_offset += table_size;
1236 }
1237 }
1238
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 {
1241 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1242 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1243 }
1244
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1246 {
1247 volatile u32 *dst_ptr;
1248 u32 dws;
1249 const struct cs_section_def *cs_data;
1250 int r;
1251
1252 adev->gfx.rlc.cs_data = vi_cs_data;
1253
1254 cs_data = adev->gfx.rlc.cs_data;
1255
1256 if (cs_data) {
1257 /* clear state block */
1258 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1259
1260 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1261 AMDGPU_GEM_DOMAIN_VRAM,
1262 &adev->gfx.rlc.clear_state_obj,
1263 &adev->gfx.rlc.clear_state_gpu_addr,
1264 (void **)&adev->gfx.rlc.cs_ptr);
1265 if (r) {
1266 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1267 gfx_v8_0_rlc_fini(adev);
1268 return r;
1269 }
1270
1271 /* set up the cs buffer */
1272 dst_ptr = adev->gfx.rlc.cs_ptr;
1273 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1274 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1275 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1276 }
1277
1278 if ((adev->asic_type == CHIP_CARRIZO) ||
1279 (adev->asic_type == CHIP_STONEY)) {
1280 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1282 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1283 &adev->gfx.rlc.cp_table_obj,
1284 &adev->gfx.rlc.cp_table_gpu_addr,
1285 (void **)&adev->gfx.rlc.cp_table_ptr);
1286 if (r) {
1287 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1288 return r;
1289 }
1290
1291 cz_init_cp_jump_table(adev);
1292
1293 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1294 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1295 }
1296
1297 return 0;
1298 }
1299
1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1301 {
1302 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1303 }
1304
1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1306 {
1307 int r;
1308 u32 *hpd;
1309 size_t mec_hpd_size;
1310
1311 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1312
1313 /* take ownership of the relevant compute queues */
1314 amdgpu_gfx_compute_queue_acquire(adev);
1315
1316 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1317
1318 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1319 AMDGPU_GEM_DOMAIN_GTT,
1320 &adev->gfx.mec.hpd_eop_obj,
1321 &adev->gfx.mec.hpd_eop_gpu_addr,
1322 (void **)&hpd);
1323 if (r) {
1324 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1325 return r;
1326 }
1327
1328 memset(hpd, 0, mec_hpd_size);
1329
1330 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1331 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1332
1333 return 0;
1334 }
1335
1336 static const u32 vgpr_init_compute_shader[] =
1337 {
1338 0x7e000209, 0x7e020208,
1339 0x7e040207, 0x7e060206,
1340 0x7e080205, 0x7e0a0204,
1341 0x7e0c0203, 0x7e0e0202,
1342 0x7e100201, 0x7e120200,
1343 0x7e140209, 0x7e160208,
1344 0x7e180207, 0x7e1a0206,
1345 0x7e1c0205, 0x7e1e0204,
1346 0x7e200203, 0x7e220202,
1347 0x7e240201, 0x7e260200,
1348 0x7e280209, 0x7e2a0208,
1349 0x7e2c0207, 0x7e2e0206,
1350 0x7e300205, 0x7e320204,
1351 0x7e340203, 0x7e360202,
1352 0x7e380201, 0x7e3a0200,
1353 0x7e3c0209, 0x7e3e0208,
1354 0x7e400207, 0x7e420206,
1355 0x7e440205, 0x7e460204,
1356 0x7e480203, 0x7e4a0202,
1357 0x7e4c0201, 0x7e4e0200,
1358 0x7e500209, 0x7e520208,
1359 0x7e540207, 0x7e560206,
1360 0x7e580205, 0x7e5a0204,
1361 0x7e5c0203, 0x7e5e0202,
1362 0x7e600201, 0x7e620200,
1363 0x7e640209, 0x7e660208,
1364 0x7e680207, 0x7e6a0206,
1365 0x7e6c0205, 0x7e6e0204,
1366 0x7e700203, 0x7e720202,
1367 0x7e740201, 0x7e760200,
1368 0x7e780209, 0x7e7a0208,
1369 0x7e7c0207, 0x7e7e0206,
1370 0xbf8a0000, 0xbf810000,
1371 };
1372
1373 static const u32 sgpr_init_compute_shader[] =
1374 {
1375 0xbe8a0100, 0xbe8c0102,
1376 0xbe8e0104, 0xbe900106,
1377 0xbe920108, 0xbe940100,
1378 0xbe960102, 0xbe980104,
1379 0xbe9a0106, 0xbe9c0108,
1380 0xbe9e0100, 0xbea00102,
1381 0xbea20104, 0xbea40106,
1382 0xbea60108, 0xbea80100,
1383 0xbeaa0102, 0xbeac0104,
1384 0xbeae0106, 0xbeb00108,
1385 0xbeb20100, 0xbeb40102,
1386 0xbeb60104, 0xbeb80106,
1387 0xbeba0108, 0xbebc0100,
1388 0xbebe0102, 0xbec00104,
1389 0xbec20106, 0xbec40108,
1390 0xbec60100, 0xbec80102,
1391 0xbee60004, 0xbee70005,
1392 0xbeea0006, 0xbeeb0007,
1393 0xbee80008, 0xbee90009,
1394 0xbefc0000, 0xbf8a0000,
1395 0xbf810000, 0x00000000,
1396 };
1397
1398 static const u32 vgpr_init_regs[] =
1399 {
1400 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1401 mmCOMPUTE_RESOURCE_LIMITS, 0,
1402 mmCOMPUTE_NUM_THREAD_X, 256*4,
1403 mmCOMPUTE_NUM_THREAD_Y, 1,
1404 mmCOMPUTE_NUM_THREAD_Z, 1,
1405 mmCOMPUTE_PGM_RSRC2, 20,
1406 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1416 };
1417
1418 static const u32 sgpr1_init_regs[] =
1419 {
1420 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1421 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1422 mmCOMPUTE_NUM_THREAD_X, 256*5,
1423 mmCOMPUTE_NUM_THREAD_Y, 1,
1424 mmCOMPUTE_NUM_THREAD_Z, 1,
1425 mmCOMPUTE_PGM_RSRC2, 20,
1426 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1436 };
1437
1438 static const u32 sgpr2_init_regs[] =
1439 {
1440 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442 mmCOMPUTE_NUM_THREAD_X, 256*5,
1443 mmCOMPUTE_NUM_THREAD_Y, 1,
1444 mmCOMPUTE_NUM_THREAD_Z, 1,
1445 mmCOMPUTE_PGM_RSRC2, 20,
1446 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1456 };
1457
1458 static const u32 sec_ded_counter_registers[] =
1459 {
1460 mmCPC_EDC_ATC_CNT,
1461 mmCPC_EDC_SCRATCH_CNT,
1462 mmCPC_EDC_UCODE_CNT,
1463 mmCPF_EDC_ATC_CNT,
1464 mmCPF_EDC_ROQ_CNT,
1465 mmCPF_EDC_TAG_CNT,
1466 mmCPG_EDC_ATC_CNT,
1467 mmCPG_EDC_DMA_CNT,
1468 mmCPG_EDC_TAG_CNT,
1469 mmDC_EDC_CSINVOC_CNT,
1470 mmDC_EDC_RESTORE_CNT,
1471 mmDC_EDC_STATE_CNT,
1472 mmGDS_EDC_CNT,
1473 mmGDS_EDC_GRBM_CNT,
1474 mmGDS_EDC_OA_DED,
1475 mmSPI_EDC_CNT,
1476 mmSQC_ATC_EDC_GATCL1_CNT,
1477 mmSQC_EDC_CNT,
1478 mmSQ_EDC_DED_CNT,
1479 mmSQ_EDC_INFO,
1480 mmSQ_EDC_SEC_CNT,
1481 mmTCC_EDC_CNT,
1482 mmTCP_ATC_EDC_GATCL1_CNT,
1483 mmTCP_EDC_CNT,
1484 mmTD_EDC_CNT
1485 };
1486
1487 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1488 {
1489 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1490 struct amdgpu_ib ib;
1491 struct dma_fence *f = NULL;
1492 int r, i;
1493 u32 tmp;
1494 unsigned total_size, vgpr_offset, sgpr_offset;
1495 u64 gpu_addr;
1496
1497 /* only supported on CZ */
1498 if (adev->asic_type != CHIP_CARRIZO)
1499 return 0;
1500
1501 /* bail if the compute ring is not ready */
1502 if (!ring->ready)
1503 return 0;
1504
1505 tmp = RREG32(mmGB_EDC_MODE);
1506 WREG32(mmGB_EDC_MODE, 0);
1507
1508 total_size =
1509 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1510 total_size +=
1511 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1512 total_size +=
1513 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1514 total_size = ALIGN(total_size, 256);
1515 vgpr_offset = total_size;
1516 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1517 sgpr_offset = total_size;
1518 total_size += sizeof(sgpr_init_compute_shader);
1519
1520 /* allocate an indirect buffer to put the commands in */
1521 memset(&ib, 0, sizeof(ib));
1522 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1523 if (r) {
1524 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1525 return r;
1526 }
1527
1528 /* load the compute shaders */
1529 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1530 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1531
1532 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1533 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1534
1535 /* init the ib length to 0 */
1536 ib.length_dw = 0;
1537
1538 /* VGPR */
1539 /* write the register state for the compute dispatch */
1540 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1541 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1542 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1543 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1544 }
1545 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1546 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1547 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1548 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1549 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1550 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1551
1552 /* write dispatch packet */
1553 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1554 ib.ptr[ib.length_dw++] = 8; /* x */
1555 ib.ptr[ib.length_dw++] = 1; /* y */
1556 ib.ptr[ib.length_dw++] = 1; /* z */
1557 ib.ptr[ib.length_dw++] =
1558 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1559
1560 /* write CS partial flush packet */
1561 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1562 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1563
1564 /* SGPR1 */
1565 /* write the register state for the compute dispatch */
1566 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1567 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1568 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1569 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1570 }
1571 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1572 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1573 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1574 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1575 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1576 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1577
1578 /* write dispatch packet */
1579 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1580 ib.ptr[ib.length_dw++] = 8; /* x */
1581 ib.ptr[ib.length_dw++] = 1; /* y */
1582 ib.ptr[ib.length_dw++] = 1; /* z */
1583 ib.ptr[ib.length_dw++] =
1584 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1585
1586 /* write CS partial flush packet */
1587 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1588 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1589
1590 /* SGPR2 */
1591 /* write the register state for the compute dispatch */
1592 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1593 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1594 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1595 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1596 }
1597 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1598 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1599 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1600 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1601 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1602 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1603
1604 /* write dispatch packet */
1605 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1606 ib.ptr[ib.length_dw++] = 8; /* x */
1607 ib.ptr[ib.length_dw++] = 1; /* y */
1608 ib.ptr[ib.length_dw++] = 1; /* z */
1609 ib.ptr[ib.length_dw++] =
1610 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1611
1612 /* write CS partial flush packet */
1613 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1614 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1615
1616 /* shedule the ib on the ring */
1617 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1618 if (r) {
1619 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1620 goto fail;
1621 }
1622
1623 /* wait for the GPU to finish processing the IB */
1624 r = dma_fence_wait(f, false);
1625 if (r) {
1626 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1627 goto fail;
1628 }
1629
1630 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1631 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1632 WREG32(mmGB_EDC_MODE, tmp);
1633
1634 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1635 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1636 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1637
1638
1639 /* read back registers to clear the counters */
1640 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1641 RREG32(sec_ded_counter_registers[i]);
1642
1643 fail:
1644 amdgpu_ib_free(adev, &ib, NULL);
1645 dma_fence_put(f);
1646
1647 return r;
1648 }
1649
1650 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1651 {
1652 u32 gb_addr_config;
1653 u32 mc_shared_chmap, mc_arb_ramcfg;
1654 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1655 u32 tmp;
1656 int ret;
1657
1658 switch (adev->asic_type) {
1659 case CHIP_TOPAZ:
1660 adev->gfx.config.max_shader_engines = 1;
1661 adev->gfx.config.max_tile_pipes = 2;
1662 adev->gfx.config.max_cu_per_sh = 6;
1663 adev->gfx.config.max_sh_per_se = 1;
1664 adev->gfx.config.max_backends_per_se = 2;
1665 adev->gfx.config.max_texture_channel_caches = 2;
1666 adev->gfx.config.max_gprs = 256;
1667 adev->gfx.config.max_gs_threads = 32;
1668 adev->gfx.config.max_hw_contexts = 8;
1669
1670 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1671 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1672 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1673 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1674 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1675 break;
1676 case CHIP_FIJI:
1677 adev->gfx.config.max_shader_engines = 4;
1678 adev->gfx.config.max_tile_pipes = 16;
1679 adev->gfx.config.max_cu_per_sh = 16;
1680 adev->gfx.config.max_sh_per_se = 1;
1681 adev->gfx.config.max_backends_per_se = 4;
1682 adev->gfx.config.max_texture_channel_caches = 16;
1683 adev->gfx.config.max_gprs = 256;
1684 adev->gfx.config.max_gs_threads = 32;
1685 adev->gfx.config.max_hw_contexts = 8;
1686
1687 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692 break;
1693 case CHIP_POLARIS11:
1694 case CHIP_POLARIS12:
1695 ret = amdgpu_atombios_get_gfx_info(adev);
1696 if (ret)
1697 return ret;
1698 adev->gfx.config.max_gprs = 256;
1699 adev->gfx.config.max_gs_threads = 32;
1700 adev->gfx.config.max_hw_contexts = 8;
1701
1702 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1703 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1704 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1705 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1706 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1707 break;
1708 case CHIP_POLARIS10:
1709 ret = amdgpu_atombios_get_gfx_info(adev);
1710 if (ret)
1711 return ret;
1712 adev->gfx.config.max_gprs = 256;
1713 adev->gfx.config.max_gs_threads = 32;
1714 adev->gfx.config.max_hw_contexts = 8;
1715
1716 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721 break;
1722 case CHIP_TONGA:
1723 adev->gfx.config.max_shader_engines = 4;
1724 adev->gfx.config.max_tile_pipes = 8;
1725 adev->gfx.config.max_cu_per_sh = 8;
1726 adev->gfx.config.max_sh_per_se = 1;
1727 adev->gfx.config.max_backends_per_se = 2;
1728 adev->gfx.config.max_texture_channel_caches = 8;
1729 adev->gfx.config.max_gprs = 256;
1730 adev->gfx.config.max_gs_threads = 32;
1731 adev->gfx.config.max_hw_contexts = 8;
1732
1733 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1738 break;
1739 case CHIP_CARRIZO:
1740 adev->gfx.config.max_shader_engines = 1;
1741 adev->gfx.config.max_tile_pipes = 2;
1742 adev->gfx.config.max_sh_per_se = 1;
1743 adev->gfx.config.max_backends_per_se = 2;
1744 adev->gfx.config.max_cu_per_sh = 8;
1745 adev->gfx.config.max_texture_channel_caches = 2;
1746 adev->gfx.config.max_gprs = 256;
1747 adev->gfx.config.max_gs_threads = 32;
1748 adev->gfx.config.max_hw_contexts = 8;
1749
1750 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1755 break;
1756 case CHIP_STONEY:
1757 adev->gfx.config.max_shader_engines = 1;
1758 adev->gfx.config.max_tile_pipes = 2;
1759 adev->gfx.config.max_sh_per_se = 1;
1760 adev->gfx.config.max_backends_per_se = 1;
1761 adev->gfx.config.max_cu_per_sh = 3;
1762 adev->gfx.config.max_texture_channel_caches = 2;
1763 adev->gfx.config.max_gprs = 256;
1764 adev->gfx.config.max_gs_threads = 16;
1765 adev->gfx.config.max_hw_contexts = 8;
1766
1767 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1772 break;
1773 default:
1774 adev->gfx.config.max_shader_engines = 2;
1775 adev->gfx.config.max_tile_pipes = 4;
1776 adev->gfx.config.max_cu_per_sh = 2;
1777 adev->gfx.config.max_sh_per_se = 1;
1778 adev->gfx.config.max_backends_per_se = 2;
1779 adev->gfx.config.max_texture_channel_caches = 4;
1780 adev->gfx.config.max_gprs = 256;
1781 adev->gfx.config.max_gs_threads = 32;
1782 adev->gfx.config.max_hw_contexts = 8;
1783
1784 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1789 break;
1790 }
1791
1792 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1793 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1794 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1795
1796 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1797 adev->gfx.config.mem_max_burst_length_bytes = 256;
1798 if (adev->flags & AMD_IS_APU) {
1799 /* Get memory bank mapping mode. */
1800 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1801 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1802 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1803
1804 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1805 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1807
1808 /* Validate settings in case only one DIMM installed. */
1809 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1810 dimm00_addr_map = 0;
1811 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1812 dimm01_addr_map = 0;
1813 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1814 dimm10_addr_map = 0;
1815 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1816 dimm11_addr_map = 0;
1817
1818 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1819 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1820 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1821 adev->gfx.config.mem_row_size_in_kb = 2;
1822 else
1823 adev->gfx.config.mem_row_size_in_kb = 1;
1824 } else {
1825 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1826 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1827 if (adev->gfx.config.mem_row_size_in_kb > 4)
1828 adev->gfx.config.mem_row_size_in_kb = 4;
1829 }
1830
1831 adev->gfx.config.shader_engine_tile_size = 32;
1832 adev->gfx.config.num_gpus = 1;
1833 adev->gfx.config.multi_gpu_tile_size = 64;
1834
1835 /* fix up row size */
1836 switch (adev->gfx.config.mem_row_size_in_kb) {
1837 case 1:
1838 default:
1839 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1840 break;
1841 case 2:
1842 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1843 break;
1844 case 4:
1845 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1846 break;
1847 }
1848 adev->gfx.config.gb_addr_config = gb_addr_config;
1849
1850 return 0;
1851 }
1852
1853 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1854 int mec, int pipe, int queue)
1855 {
1856 int r;
1857 unsigned irq_type;
1858 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1859
1860 ring = &adev->gfx.compute_ring[ring_id];
1861
1862 /* mec0 is me1 */
1863 ring->me = mec + 1;
1864 ring->pipe = pipe;
1865 ring->queue = queue;
1866
1867 ring->ring_obj = NULL;
1868 ring->use_doorbell = true;
1869 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1870 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1871 + (ring_id * GFX8_MEC_HPD_SIZE);
1872 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1873
1874 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1875 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1876 + ring->pipe;
1877
1878 /* type-2 packets are deprecated on MEC, use type-3 instead */
1879 r = amdgpu_ring_init(adev, ring, 1024,
1880 &adev->gfx.eop_irq, irq_type);
1881 if (r)
1882 return r;
1883
1884
1885 return 0;
1886 }
1887
1888 static int gfx_v8_0_sw_init(void *handle)
1889 {
1890 int i, j, k, r, ring_id;
1891 struct amdgpu_ring *ring;
1892 struct amdgpu_kiq *kiq;
1893 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1894
1895 switch (adev->asic_type) {
1896 case CHIP_FIJI:
1897 case CHIP_TONGA:
1898 case CHIP_POLARIS11:
1899 case CHIP_POLARIS12:
1900 case CHIP_POLARIS10:
1901 case CHIP_CARRIZO:
1902 adev->gfx.mec.num_mec = 2;
1903 break;
1904 case CHIP_TOPAZ:
1905 case CHIP_STONEY:
1906 default:
1907 adev->gfx.mec.num_mec = 1;
1908 break;
1909 }
1910
1911 adev->gfx.mec.num_pipe_per_mec = 4;
1912 adev->gfx.mec.num_queue_per_pipe = 8;
1913
1914 /* KIQ event */
1915 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1916 if (r)
1917 return r;
1918
1919 /* EOP Event */
1920 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1921 if (r)
1922 return r;
1923
1924 /* Privileged reg */
1925 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1926 &adev->gfx.priv_reg_irq);
1927 if (r)
1928 return r;
1929
1930 /* Privileged inst */
1931 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1932 &adev->gfx.priv_inst_irq);
1933 if (r)
1934 return r;
1935
1936 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1937
1938 gfx_v8_0_scratch_init(adev);
1939
1940 r = gfx_v8_0_init_microcode(adev);
1941 if (r) {
1942 DRM_ERROR("Failed to load gfx firmware!\n");
1943 return r;
1944 }
1945
1946 r = gfx_v8_0_rlc_init(adev);
1947 if (r) {
1948 DRM_ERROR("Failed to init rlc BOs!\n");
1949 return r;
1950 }
1951
1952 r = gfx_v8_0_mec_init(adev);
1953 if (r) {
1954 DRM_ERROR("Failed to init MEC BOs!\n");
1955 return r;
1956 }
1957
1958 /* set up the gfx ring */
1959 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1960 ring = &adev->gfx.gfx_ring[i];
1961 ring->ring_obj = NULL;
1962 sprintf(ring->name, "gfx");
1963 /* no gfx doorbells on iceland */
1964 if (adev->asic_type != CHIP_TOPAZ) {
1965 ring->use_doorbell = true;
1966 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1967 }
1968
1969 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1970 AMDGPU_CP_IRQ_GFX_EOP);
1971 if (r)
1972 return r;
1973 }
1974
1975
1976 /* set up the compute queues - allocate horizontally across pipes */
1977 ring_id = 0;
1978 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1979 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1980 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1981 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1982 continue;
1983
1984 r = gfx_v8_0_compute_ring_init(adev,
1985 ring_id,
1986 i, k, j);
1987 if (r)
1988 return r;
1989
1990 ring_id++;
1991 }
1992 }
1993 }
1994
1995 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1996 if (r) {
1997 DRM_ERROR("Failed to init KIQ BOs!\n");
1998 return r;
1999 }
2000
2001 kiq = &adev->gfx.kiq;
2002 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2003 if (r)
2004 return r;
2005
2006 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2007 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2008 if (r)
2009 return r;
2010
2011 /* reserve GDS, GWS and OA resource for gfx */
2012 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2013 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2014 &adev->gds.gds_gfx_bo, NULL, NULL);
2015 if (r)
2016 return r;
2017
2018 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2019 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2020 &adev->gds.gws_gfx_bo, NULL, NULL);
2021 if (r)
2022 return r;
2023
2024 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2025 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2026 &adev->gds.oa_gfx_bo, NULL, NULL);
2027 if (r)
2028 return r;
2029
2030 adev->gfx.ce_ram_size = 0x8000;
2031
2032 r = gfx_v8_0_gpu_early_init(adev);
2033 if (r)
2034 return r;
2035
2036 return 0;
2037 }
2038
2039 static int gfx_v8_0_sw_fini(void *handle)
2040 {
2041 int i;
2042 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2043
2044 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2045 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2046 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2047
2048 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2049 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2050 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2051 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2052
2053 amdgpu_gfx_compute_mqd_sw_fini(adev);
2054 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2055 amdgpu_gfx_kiq_fini(adev);
2056
2057 gfx_v8_0_mec_fini(adev);
2058 gfx_v8_0_rlc_fini(adev);
2059 gfx_v8_0_free_microcode(adev);
2060
2061 return 0;
2062 }
2063
2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2065 {
2066 uint32_t *modearray, *mod2array;
2067 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2068 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2069 u32 reg_offset;
2070
2071 modearray = adev->gfx.config.tile_mode_array;
2072 mod2array = adev->gfx.config.macrotile_mode_array;
2073
2074 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2075 modearray[reg_offset] = 0;
2076
2077 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2078 mod2array[reg_offset] = 0;
2079
2080 switch (adev->asic_type) {
2081 case CHIP_TOPAZ:
2082 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083 PIPE_CONFIG(ADDR_SURF_P2) |
2084 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 PIPE_CONFIG(ADDR_SURF_P2) |
2088 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P2) |
2092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095 PIPE_CONFIG(ADDR_SURF_P2) |
2096 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 PIPE_CONFIG(ADDR_SURF_P2) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2103 PIPE_CONFIG(ADDR_SURF_P2) |
2104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2107 PIPE_CONFIG(ADDR_SURF_P2) |
2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2111 PIPE_CONFIG(ADDR_SURF_P2));
2112 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113 PIPE_CONFIG(ADDR_SURF_P2) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 PIPE_CONFIG(ADDR_SURF_P2) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2120 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2121 PIPE_CONFIG(ADDR_SURF_P2) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2124 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P2) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P2) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2133 PIPE_CONFIG(ADDR_SURF_P2) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137 PIPE_CONFIG(ADDR_SURF_P2) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141 PIPE_CONFIG(ADDR_SURF_P2) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2145 PIPE_CONFIG(ADDR_SURF_P2) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2149 PIPE_CONFIG(ADDR_SURF_P2) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2153 PIPE_CONFIG(ADDR_SURF_P2) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2157 PIPE_CONFIG(ADDR_SURF_P2) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2161 PIPE_CONFIG(ADDR_SURF_P2) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2165 PIPE_CONFIG(ADDR_SURF_P2) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2169 PIPE_CONFIG(ADDR_SURF_P2) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 PIPE_CONFIG(ADDR_SURF_P2) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177 PIPE_CONFIG(ADDR_SURF_P2) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 PIPE_CONFIG(ADDR_SURF_P2) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2184
2185 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2189 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192 NUM_BANKS(ADDR_SURF_8_BANK));
2193 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196 NUM_BANKS(ADDR_SURF_8_BANK));
2197 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2200 NUM_BANKS(ADDR_SURF_8_BANK));
2201 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204 NUM_BANKS(ADDR_SURF_8_BANK));
2205 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208 NUM_BANKS(ADDR_SURF_8_BANK));
2209 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212 NUM_BANKS(ADDR_SURF_8_BANK));
2213 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216 NUM_BANKS(ADDR_SURF_16_BANK));
2217 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220 NUM_BANKS(ADDR_SURF_16_BANK));
2221 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 NUM_BANKS(ADDR_SURF_16_BANK));
2225 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228 NUM_BANKS(ADDR_SURF_16_BANK));
2229 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 NUM_BANKS(ADDR_SURF_16_BANK));
2233 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236 NUM_BANKS(ADDR_SURF_16_BANK));
2237 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240 NUM_BANKS(ADDR_SURF_8_BANK));
2241
2242 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2243 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2244 reg_offset != 23)
2245 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2246
2247 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248 if (reg_offset != 7)
2249 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2250
2251 break;
2252 case CHIP_FIJI:
2253 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2277 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2281 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2285 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2286 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2287 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2299 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2307 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2344 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2351 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2352 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2355 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2375
2376 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379 NUM_BANKS(ADDR_SURF_8_BANK));
2380 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383 NUM_BANKS(ADDR_SURF_8_BANK));
2384 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2387 NUM_BANKS(ADDR_SURF_8_BANK));
2388 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2391 NUM_BANKS(ADDR_SURF_8_BANK));
2392 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395 NUM_BANKS(ADDR_SURF_8_BANK));
2396 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2399 NUM_BANKS(ADDR_SURF_8_BANK));
2400 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403 NUM_BANKS(ADDR_SURF_8_BANK));
2404 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407 NUM_BANKS(ADDR_SURF_8_BANK));
2408 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411 NUM_BANKS(ADDR_SURF_8_BANK));
2412 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415 NUM_BANKS(ADDR_SURF_8_BANK));
2416 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 NUM_BANKS(ADDR_SURF_8_BANK));
2420 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423 NUM_BANKS(ADDR_SURF_8_BANK));
2424 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427 NUM_BANKS(ADDR_SURF_8_BANK));
2428 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431 NUM_BANKS(ADDR_SURF_4_BANK));
2432
2433 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2434 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2435
2436 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2437 if (reg_offset != 7)
2438 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2439
2440 break;
2441 case CHIP_TONGA:
2442 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2468 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2475 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2476 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2501 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2541 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564
2565 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568 NUM_BANKS(ADDR_SURF_16_BANK));
2569 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2572 NUM_BANKS(ADDR_SURF_16_BANK));
2573 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2576 NUM_BANKS(ADDR_SURF_16_BANK));
2577 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2580 NUM_BANKS(ADDR_SURF_16_BANK));
2581 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 NUM_BANKS(ADDR_SURF_16_BANK));
2585 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588 NUM_BANKS(ADDR_SURF_16_BANK));
2589 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 NUM_BANKS(ADDR_SURF_16_BANK));
2593 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2596 NUM_BANKS(ADDR_SURF_16_BANK));
2597 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600 NUM_BANKS(ADDR_SURF_16_BANK));
2601 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604 NUM_BANKS(ADDR_SURF_16_BANK));
2605 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2608 NUM_BANKS(ADDR_SURF_16_BANK));
2609 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2612 NUM_BANKS(ADDR_SURF_8_BANK));
2613 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616 NUM_BANKS(ADDR_SURF_4_BANK));
2617 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620 NUM_BANKS(ADDR_SURF_4_BANK));
2621
2622 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2623 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2624
2625 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2626 if (reg_offset != 7)
2627 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2628
2629 break;
2630 case CHIP_POLARIS11:
2631 case CHIP_POLARIS12:
2632 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2656 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2661 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2665 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2666 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2698 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754
2755 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2759
2760 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 NUM_BANKS(ADDR_SURF_16_BANK));
2764
2765 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768 NUM_BANKS(ADDR_SURF_16_BANK));
2769
2770 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773 NUM_BANKS(ADDR_SURF_16_BANK));
2774
2775 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778 NUM_BANKS(ADDR_SURF_16_BANK));
2779
2780 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 NUM_BANKS(ADDR_SURF_16_BANK));
2784
2785 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2788 NUM_BANKS(ADDR_SURF_16_BANK));
2789
2790 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793 NUM_BANKS(ADDR_SURF_16_BANK));
2794
2795 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2798 NUM_BANKS(ADDR_SURF_16_BANK));
2799
2800 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2802 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803 NUM_BANKS(ADDR_SURF_16_BANK));
2804
2805 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808 NUM_BANKS(ADDR_SURF_16_BANK));
2809
2810 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2813 NUM_BANKS(ADDR_SURF_16_BANK));
2814
2815 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818 NUM_BANKS(ADDR_SURF_8_BANK));
2819
2820 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823 NUM_BANKS(ADDR_SURF_4_BANK));
2824
2825 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2826 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2827
2828 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2829 if (reg_offset != 7)
2830 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2831
2832 break;
2833 case CHIP_POLARIS10:
2834 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2858 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2867 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2868 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2872 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2877 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2880 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2884 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2893 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2925 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956
2957 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 NUM_BANKS(ADDR_SURF_16_BANK));
2966
2967 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2980 NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2985 NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2990 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 NUM_BANKS(ADDR_SURF_16_BANK));
2996
2997 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000 NUM_BANKS(ADDR_SURF_16_BANK));
3001
3002 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3005 NUM_BANKS(ADDR_SURF_16_BANK));
3006
3007 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010 NUM_BANKS(ADDR_SURF_16_BANK));
3011
3012 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015 NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3020 NUM_BANKS(ADDR_SURF_4_BANK));
3021
3022 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3024 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3025 NUM_BANKS(ADDR_SURF_4_BANK));
3026
3027 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3028 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3029
3030 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031 if (reg_offset != 7)
3032 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3033
3034 break;
3035 case CHIP_STONEY:
3036 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037 PIPE_CONFIG(ADDR_SURF_P2) |
3038 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041 PIPE_CONFIG(ADDR_SURF_P2) |
3042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3045 PIPE_CONFIG(ADDR_SURF_P2) |
3046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3049 PIPE_CONFIG(ADDR_SURF_P2) |
3050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3053 PIPE_CONFIG(ADDR_SURF_P2) |
3054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3056 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3057 PIPE_CONFIG(ADDR_SURF_P2) |
3058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3060 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061 PIPE_CONFIG(ADDR_SURF_P2) |
3062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3065 PIPE_CONFIG(ADDR_SURF_P2));
3066 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067 PIPE_CONFIG(ADDR_SURF_P2) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071 PIPE_CONFIG(ADDR_SURF_P2) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3075 PIPE_CONFIG(ADDR_SURF_P2) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3079 PIPE_CONFIG(ADDR_SURF_P2) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3082 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083 PIPE_CONFIG(ADDR_SURF_P2) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3086 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3087 PIPE_CONFIG(ADDR_SURF_P2) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3091 PIPE_CONFIG(ADDR_SURF_P2) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3094 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3095 PIPE_CONFIG(ADDR_SURF_P2) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3099 PIPE_CONFIG(ADDR_SURF_P2) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103 PIPE_CONFIG(ADDR_SURF_P2) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3107 PIPE_CONFIG(ADDR_SURF_P2) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3111 PIPE_CONFIG(ADDR_SURF_P2) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3115 PIPE_CONFIG(ADDR_SURF_P2) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3118 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3130 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3138
3139 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142 NUM_BANKS(ADDR_SURF_8_BANK));
3143 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146 NUM_BANKS(ADDR_SURF_8_BANK));
3147 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 NUM_BANKS(ADDR_SURF_8_BANK));
3151 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154 NUM_BANKS(ADDR_SURF_8_BANK));
3155 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3158 NUM_BANKS(ADDR_SURF_8_BANK));
3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3162 NUM_BANKS(ADDR_SURF_8_BANK));
3163 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166 NUM_BANKS(ADDR_SURF_8_BANK));
3167 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 NUM_BANKS(ADDR_SURF_16_BANK));
3171 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174 NUM_BANKS(ADDR_SURF_16_BANK));
3175 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178 NUM_BANKS(ADDR_SURF_16_BANK));
3179 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3182 NUM_BANKS(ADDR_SURF_16_BANK));
3183 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3186 NUM_BANKS(ADDR_SURF_16_BANK));
3187 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3190 NUM_BANKS(ADDR_SURF_16_BANK));
3191 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194 NUM_BANKS(ADDR_SURF_8_BANK));
3195
3196 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3197 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3198 reg_offset != 23)
3199 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3200
3201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3202 if (reg_offset != 7)
3203 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3204
3205 break;
3206 default:
3207 dev_warn(adev->dev,
3208 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3209 adev->asic_type);
3210
3211 case CHIP_CARRIZO:
3212 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3213 PIPE_CONFIG(ADDR_SURF_P2) |
3214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3216 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217 PIPE_CONFIG(ADDR_SURF_P2) |
3218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3220 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221 PIPE_CONFIG(ADDR_SURF_P2) |
3222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225 PIPE_CONFIG(ADDR_SURF_P2) |
3226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229 PIPE_CONFIG(ADDR_SURF_P2) |
3230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3233 PIPE_CONFIG(ADDR_SURF_P2) |
3234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3241 PIPE_CONFIG(ADDR_SURF_P2));
3242 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3243 PIPE_CONFIG(ADDR_SURF_P2) |
3244 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3246 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247 PIPE_CONFIG(ADDR_SURF_P2) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3250 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3251 PIPE_CONFIG(ADDR_SURF_P2) |
3252 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3254 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255 PIPE_CONFIG(ADDR_SURF_P2) |
3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259 PIPE_CONFIG(ADDR_SURF_P2) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3263 PIPE_CONFIG(ADDR_SURF_P2) |
3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267 PIPE_CONFIG(ADDR_SURF_P2) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3270 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3271 PIPE_CONFIG(ADDR_SURF_P2) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3274 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3278 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3279 PIPE_CONFIG(ADDR_SURF_P2) |
3280 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3283 PIPE_CONFIG(ADDR_SURF_P2) |
3284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3287 PIPE_CONFIG(ADDR_SURF_P2) |
3288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3291 PIPE_CONFIG(ADDR_SURF_P2) |
3292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3295 PIPE_CONFIG(ADDR_SURF_P2) |
3296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3299 PIPE_CONFIG(ADDR_SURF_P2) |
3300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3303 PIPE_CONFIG(ADDR_SURF_P2) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3314
3315 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318 NUM_BANKS(ADDR_SURF_8_BANK));
3319 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322 NUM_BANKS(ADDR_SURF_8_BANK));
3323 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326 NUM_BANKS(ADDR_SURF_8_BANK));
3327 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330 NUM_BANKS(ADDR_SURF_8_BANK));
3331 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334 NUM_BANKS(ADDR_SURF_8_BANK));
3335 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338 NUM_BANKS(ADDR_SURF_8_BANK));
3339 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342 NUM_BANKS(ADDR_SURF_8_BANK));
3343 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346 NUM_BANKS(ADDR_SURF_16_BANK));
3347 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3350 NUM_BANKS(ADDR_SURF_16_BANK));
3351 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 NUM_BANKS(ADDR_SURF_16_BANK));
3355 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358 NUM_BANKS(ADDR_SURF_16_BANK));
3359 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362 NUM_BANKS(ADDR_SURF_16_BANK));
3363 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366 NUM_BANKS(ADDR_SURF_16_BANK));
3367 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370 NUM_BANKS(ADDR_SURF_8_BANK));
3371
3372 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3373 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3374 reg_offset != 23)
3375 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3376
3377 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3378 if (reg_offset != 7)
3379 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3380
3381 break;
3382 }
3383 }
3384
3385 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3386 u32 se_num, u32 sh_num, u32 instance)
3387 {
3388 u32 data;
3389
3390 if (instance == 0xffffffff)
3391 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3392 else
3393 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3394
3395 if (se_num == 0xffffffff)
3396 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3397 else
3398 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3399
3400 if (sh_num == 0xffffffff)
3401 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3402 else
3403 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3404
3405 WREG32(mmGRBM_GFX_INDEX, data);
3406 }
3407
3408 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3409 {
3410 u32 data, mask;
3411
3412 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3413 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3414
3415 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3416
3417 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3418 adev->gfx.config.max_sh_per_se);
3419
3420 return (~data) & mask;
3421 }
3422
3423 static void
3424 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3425 {
3426 switch (adev->asic_type) {
3427 case CHIP_FIJI:
3428 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3429 RB_XSEL2(1) | PKR_MAP(2) |
3430 PKR_XSEL(1) | PKR_YSEL(1) |
3431 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3432 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3433 SE_PAIR_YSEL(2);
3434 break;
3435 case CHIP_TONGA:
3436 case CHIP_POLARIS10:
3437 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3438 SE_XSEL(1) | SE_YSEL(1);
3439 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3440 SE_PAIR_YSEL(2);
3441 break;
3442 case CHIP_TOPAZ:
3443 case CHIP_CARRIZO:
3444 *rconf |= RB_MAP_PKR0(2);
3445 *rconf1 |= 0x0;
3446 break;
3447 case CHIP_POLARIS11:
3448 case CHIP_POLARIS12:
3449 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3450 SE_XSEL(1) | SE_YSEL(1);
3451 *rconf1 |= 0x0;
3452 break;
3453 case CHIP_STONEY:
3454 *rconf |= 0x0;
3455 *rconf1 |= 0x0;
3456 break;
3457 default:
3458 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3459 break;
3460 }
3461 }
3462
3463 static void
3464 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3465 u32 raster_config, u32 raster_config_1,
3466 unsigned rb_mask, unsigned num_rb)
3467 {
3468 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3469 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3470 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3471 unsigned rb_per_se = num_rb / num_se;
3472 unsigned se_mask[4];
3473 unsigned se;
3474
3475 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3476 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3477 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3478 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3479
3480 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3481 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3482 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3483
3484 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3485 (!se_mask[2] && !se_mask[3]))) {
3486 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3487
3488 if (!se_mask[0] && !se_mask[1]) {
3489 raster_config_1 |=
3490 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3491 } else {
3492 raster_config_1 |=
3493 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3494 }
3495 }
3496
3497 for (se = 0; se < num_se; se++) {
3498 unsigned raster_config_se = raster_config;
3499 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3500 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3501 int idx = (se / 2) * 2;
3502
3503 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3504 raster_config_se &= ~SE_MAP_MASK;
3505
3506 if (!se_mask[idx]) {
3507 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3508 } else {
3509 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3510 }
3511 }
3512
3513 pkr0_mask &= rb_mask;
3514 pkr1_mask &= rb_mask;
3515 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3516 raster_config_se &= ~PKR_MAP_MASK;
3517
3518 if (!pkr0_mask) {
3519 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3520 } else {
3521 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3522 }
3523 }
3524
3525 if (rb_per_se >= 2) {
3526 unsigned rb0_mask = 1 << (se * rb_per_se);
3527 unsigned rb1_mask = rb0_mask << 1;
3528
3529 rb0_mask &= rb_mask;
3530 rb1_mask &= rb_mask;
3531 if (!rb0_mask || !rb1_mask) {
3532 raster_config_se &= ~RB_MAP_PKR0_MASK;
3533
3534 if (!rb0_mask) {
3535 raster_config_se |=
3536 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3537 } else {
3538 raster_config_se |=
3539 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3540 }
3541 }
3542
3543 if (rb_per_se > 2) {
3544 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3545 rb1_mask = rb0_mask << 1;
3546 rb0_mask &= rb_mask;
3547 rb1_mask &= rb_mask;
3548 if (!rb0_mask || !rb1_mask) {
3549 raster_config_se &= ~RB_MAP_PKR1_MASK;
3550
3551 if (!rb0_mask) {
3552 raster_config_se |=
3553 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3554 } else {
3555 raster_config_se |=
3556 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3557 }
3558 }
3559 }
3560 }
3561
3562 /* GRBM_GFX_INDEX has a different offset on VI */
3563 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3564 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3565 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3566 }
3567
3568 /* GRBM_GFX_INDEX has a different offset on VI */
3569 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3570 }
3571
3572 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3573 {
3574 int i, j;
3575 u32 data;
3576 u32 raster_config = 0, raster_config_1 = 0;
3577 u32 active_rbs = 0;
3578 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3579 adev->gfx.config.max_sh_per_se;
3580 unsigned num_rb_pipes;
3581
3582 mutex_lock(&adev->grbm_idx_mutex);
3583 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586 data = gfx_v8_0_get_rb_active_bitmap(adev);
3587 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3588 rb_bitmap_width_per_sh);
3589 }
3590 }
3591 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3592
3593 adev->gfx.config.backend_enable_mask = active_rbs;
3594 adev->gfx.config.num_rbs = hweight32(active_rbs);
3595
3596 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3597 adev->gfx.config.max_shader_engines, 16);
3598
3599 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3600
3601 if (!adev->gfx.config.backend_enable_mask ||
3602 adev->gfx.config.num_rbs >= num_rb_pipes) {
3603 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3604 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3605 } else {
3606 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3607 adev->gfx.config.backend_enable_mask,
3608 num_rb_pipes);
3609 }
3610
3611 /* cache the values for userspace */
3612 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3613 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3614 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3615 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3616 RREG32(mmCC_RB_BACKEND_DISABLE);
3617 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3618 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3619 adev->gfx.config.rb_config[i][j].raster_config =
3620 RREG32(mmPA_SC_RASTER_CONFIG);
3621 adev->gfx.config.rb_config[i][j].raster_config_1 =
3622 RREG32(mmPA_SC_RASTER_CONFIG_1);
3623 }
3624 }
3625 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626 mutex_unlock(&adev->grbm_idx_mutex);
3627 }
3628
3629 /**
3630 * gfx_v8_0_init_compute_vmid - gart enable
3631 *
3632 * @adev: amdgpu_device pointer
3633 *
3634 * Initialize compute vmid sh_mem registers
3635 *
3636 */
3637 #define DEFAULT_SH_MEM_BASES (0x6000)
3638 #define FIRST_COMPUTE_VMID (8)
3639 #define LAST_COMPUTE_VMID (16)
3640 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3641 {
3642 int i;
3643 uint32_t sh_mem_config;
3644 uint32_t sh_mem_bases;
3645
3646 /*
3647 * Configure apertures:
3648 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3649 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3650 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3651 */
3652 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3653
3654 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3655 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3656 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3657 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3658 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3659 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3660
3661 mutex_lock(&adev->srbm_mutex);
3662 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3663 vi_srbm_select(adev, 0, 0, 0, i);
3664 /* CP and shaders */
3665 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3666 WREG32(mmSH_MEM_APE1_BASE, 1);
3667 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3668 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3669 }
3670 vi_srbm_select(adev, 0, 0, 0, 0);
3671 mutex_unlock(&adev->srbm_mutex);
3672 }
3673
3674 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3675 {
3676 switch (adev->asic_type) {
3677 default:
3678 adev->gfx.config.double_offchip_lds_buf = 1;
3679 break;
3680 case CHIP_CARRIZO:
3681 case CHIP_STONEY:
3682 adev->gfx.config.double_offchip_lds_buf = 0;
3683 break;
3684 }
3685 }
3686
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3688 {
3689 u32 tmp, sh_static_mem_cfg;
3690 int i;
3691
3692 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3696
3697 gfx_v8_0_tiling_mode_table_init(adev);
3698 gfx_v8_0_setup_rb(adev);
3699 gfx_v8_0_get_cu_info(adev);
3700 gfx_v8_0_config_init(adev);
3701
3702 /* XXX SH_MEM regs */
3703 /* where to put LDS, scratch, GPUVM in FSA64 space */
3704 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3705 SWIZZLE_ENABLE, 1);
3706 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3707 ELEMENT_SIZE, 1);
3708 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3709 INDEX_STRIDE, 3);
3710 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3711
3712 mutex_lock(&adev->srbm_mutex);
3713 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3714 vi_srbm_select(adev, 0, 0, 0, i);
3715 /* CP and shaders */
3716 if (i == 0) {
3717 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3718 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3719 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3720 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3721 WREG32(mmSH_MEM_CONFIG, tmp);
3722 WREG32(mmSH_MEM_BASES, 0);
3723 } else {
3724 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3725 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3726 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3727 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3728 WREG32(mmSH_MEM_CONFIG, tmp);
3729 tmp = adev->mc.shared_aperture_start >> 48;
3730 WREG32(mmSH_MEM_BASES, tmp);
3731 }
3732
3733 WREG32(mmSH_MEM_APE1_BASE, 1);
3734 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3735 }
3736 vi_srbm_select(adev, 0, 0, 0, 0);
3737 mutex_unlock(&adev->srbm_mutex);
3738
3739 gfx_v8_0_init_compute_vmid(adev);
3740
3741 mutex_lock(&adev->grbm_idx_mutex);
3742 /*
3743 * making sure that the following register writes will be broadcasted
3744 * to all the shaders
3745 */
3746 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3747
3748 WREG32(mmPA_SC_FIFO_SIZE,
3749 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3750 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3751 (adev->gfx.config.sc_prim_fifo_size_backend <<
3752 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3753 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3754 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3755 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3756 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3757
3758 tmp = RREG32(mmSPI_ARB_PRIORITY);
3759 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3760 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3761 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3762 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3763 WREG32(mmSPI_ARB_PRIORITY, tmp);
3764
3765 mutex_unlock(&adev->grbm_idx_mutex);
3766
3767 }
3768
3769 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3770 {
3771 u32 i, j, k;
3772 u32 mask;
3773
3774 mutex_lock(&adev->grbm_idx_mutex);
3775 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3776 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3777 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3778 for (k = 0; k < adev->usec_timeout; k++) {
3779 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3780 break;
3781 udelay(1);
3782 }
3783 }
3784 }
3785 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3786 mutex_unlock(&adev->grbm_idx_mutex);
3787
3788 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3789 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3790 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3791 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3792 for (k = 0; k < adev->usec_timeout; k++) {
3793 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3794 break;
3795 udelay(1);
3796 }
3797 }
3798
3799 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3800 bool enable)
3801 {
3802 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3803
3804 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3805 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3806 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3807 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3808
3809 WREG32(mmCP_INT_CNTL_RING0, tmp);
3810 }
3811
3812 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3813 {
3814 /* csib */
3815 WREG32(mmRLC_CSIB_ADDR_HI,
3816 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3817 WREG32(mmRLC_CSIB_ADDR_LO,
3818 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3819 WREG32(mmRLC_CSIB_LENGTH,
3820 adev->gfx.rlc.clear_state_size);
3821 }
3822
3823 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3824 int ind_offset,
3825 int list_size,
3826 int *unique_indices,
3827 int *indices_count,
3828 int max_indices,
3829 int *ind_start_offsets,
3830 int *offset_count,
3831 int max_offset)
3832 {
3833 int indices;
3834 bool new_entry = true;
3835
3836 for (; ind_offset < list_size; ind_offset++) {
3837
3838 if (new_entry) {
3839 new_entry = false;
3840 ind_start_offsets[*offset_count] = ind_offset;
3841 *offset_count = *offset_count + 1;
3842 BUG_ON(*offset_count >= max_offset);
3843 }
3844
3845 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3846 new_entry = true;
3847 continue;
3848 }
3849
3850 ind_offset += 2;
3851
3852 /* look for the matching indice */
3853 for (indices = 0;
3854 indices < *indices_count;
3855 indices++) {
3856 if (unique_indices[indices] ==
3857 register_list_format[ind_offset])
3858 break;
3859 }
3860
3861 if (indices >= *indices_count) {
3862 unique_indices[*indices_count] =
3863 register_list_format[ind_offset];
3864 indices = *indices_count;
3865 *indices_count = *indices_count + 1;
3866 BUG_ON(*indices_count >= max_indices);
3867 }
3868
3869 register_list_format[ind_offset] = indices;
3870 }
3871 }
3872
3873 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3874 {
3875 int i, temp, data;
3876 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3877 int indices_count = 0;
3878 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3879 int offset_count = 0;
3880
3881 int list_size;
3882 unsigned int *register_list_format =
3883 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3884 if (!register_list_format)
3885 return -ENOMEM;
3886 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3887 adev->gfx.rlc.reg_list_format_size_bytes);
3888
3889 gfx_v8_0_parse_ind_reg_list(register_list_format,
3890 RLC_FormatDirectRegListLength,
3891 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3892 unique_indices,
3893 &indices_count,
3894 sizeof(unique_indices) / sizeof(int),
3895 indirect_start_offsets,
3896 &offset_count,
3897 sizeof(indirect_start_offsets)/sizeof(int));
3898
3899 /* save and restore list */
3900 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3901
3902 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3903 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3904 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3905
3906 /* indirect list */
3907 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3908 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3909 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3910
3911 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3912 list_size = list_size >> 1;
3913 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3914 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3915
3916 /* starting offsets starts */
3917 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3918 adev->gfx.rlc.starting_offsets_start);
3919 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3920 WREG32(mmRLC_GPM_SCRATCH_DATA,
3921 indirect_start_offsets[i]);
3922
3923 /* unique indices */
3924 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3925 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3926 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3927 if (unique_indices[i] != 0) {
3928 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3929 WREG32(data + i, unique_indices[i] >> 20);
3930 }
3931 }
3932 kfree(register_list_format);
3933
3934 return 0;
3935 }
3936
3937 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3938 {
3939 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3940 }
3941
3942 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3943 {
3944 uint32_t data;
3945
3946 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3947
3948 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3949 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3950 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3951 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3952 WREG32(mmRLC_PG_DELAY, data);
3953
3954 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3955 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3956
3957 }
3958
3959 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3960 bool enable)
3961 {
3962 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3963 }
3964
3965 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3966 bool enable)
3967 {
3968 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3969 }
3970
3971 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3972 {
3973 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3974 }
3975
3976 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3977 {
3978 if ((adev->asic_type == CHIP_CARRIZO) ||
3979 (adev->asic_type == CHIP_STONEY)) {
3980 gfx_v8_0_init_csb(adev);
3981 gfx_v8_0_init_save_restore_list(adev);
3982 gfx_v8_0_enable_save_restore_machine(adev);
3983 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3984 gfx_v8_0_init_power_gating(adev);
3985 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3986 } else if ((adev->asic_type == CHIP_POLARIS11) ||
3987 (adev->asic_type == CHIP_POLARIS12)) {
3988 gfx_v8_0_init_csb(adev);
3989 gfx_v8_0_init_save_restore_list(adev);
3990 gfx_v8_0_enable_save_restore_machine(adev);
3991 gfx_v8_0_init_power_gating(adev);
3992 }
3993
3994 }
3995
3996 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3997 {
3998 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3999
4000 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4001 gfx_v8_0_wait_for_rlc_serdes(adev);
4002 }
4003
4004 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4005 {
4006 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4007 udelay(50);
4008
4009 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4010 udelay(50);
4011 }
4012
4013 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4014 {
4015 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4016
4017 /* carrizo do enable cp interrupt after cp inited */
4018 if (!(adev->flags & AMD_IS_APU))
4019 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4020
4021 udelay(50);
4022 }
4023
4024 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4025 {
4026 const struct rlc_firmware_header_v2_0 *hdr;
4027 const __le32 *fw_data;
4028 unsigned i, fw_size;
4029
4030 if (!adev->gfx.rlc_fw)
4031 return -EINVAL;
4032
4033 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4034 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4035
4036 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4037 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4038 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4039
4040 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4041 for (i = 0; i < fw_size; i++)
4042 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4043 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4044
4045 return 0;
4046 }
4047
4048 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4049 {
4050 int r;
4051 u32 tmp;
4052
4053 gfx_v8_0_rlc_stop(adev);
4054
4055 /* disable CG */
4056 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4057 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4058 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4059 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4060 if (adev->asic_type == CHIP_POLARIS11 ||
4061 adev->asic_type == CHIP_POLARIS10 ||
4062 adev->asic_type == CHIP_POLARIS12) {
4063 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4064 tmp &= ~0x3;
4065 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4066 }
4067
4068 /* disable PG */
4069 WREG32(mmRLC_PG_CNTL, 0);
4070
4071 gfx_v8_0_rlc_reset(adev);
4072 gfx_v8_0_init_pg(adev);
4073
4074 if (!adev->pp_enabled) {
4075 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4076 /* legacy rlc firmware loading */
4077 r = gfx_v8_0_rlc_load_microcode(adev);
4078 if (r)
4079 return r;
4080 } else {
4081 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4082 AMDGPU_UCODE_ID_RLC_G);
4083 if (r)
4084 return -EINVAL;
4085 }
4086 }
4087
4088 gfx_v8_0_rlc_start(adev);
4089
4090 return 0;
4091 }
4092
4093 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4094 {
4095 int i;
4096 u32 tmp = RREG32(mmCP_ME_CNTL);
4097
4098 if (enable) {
4099 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4100 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4101 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4102 } else {
4103 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4104 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4105 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4106 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4107 adev->gfx.gfx_ring[i].ready = false;
4108 }
4109 WREG32(mmCP_ME_CNTL, tmp);
4110 udelay(50);
4111 }
4112
4113 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4114 {
4115 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4116 const struct gfx_firmware_header_v1_0 *ce_hdr;
4117 const struct gfx_firmware_header_v1_0 *me_hdr;
4118 const __le32 *fw_data;
4119 unsigned i, fw_size;
4120
4121 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4122 return -EINVAL;
4123
4124 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4125 adev->gfx.pfp_fw->data;
4126 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4127 adev->gfx.ce_fw->data;
4128 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4129 adev->gfx.me_fw->data;
4130
4131 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4132 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4133 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4134
4135 gfx_v8_0_cp_gfx_enable(adev, false);
4136
4137 /* PFP */
4138 fw_data = (const __le32 *)
4139 (adev->gfx.pfp_fw->data +
4140 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4141 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4142 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4143 for (i = 0; i < fw_size; i++)
4144 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4145 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4146
4147 /* CE */
4148 fw_data = (const __le32 *)
4149 (adev->gfx.ce_fw->data +
4150 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4151 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4152 WREG32(mmCP_CE_UCODE_ADDR, 0);
4153 for (i = 0; i < fw_size; i++)
4154 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4155 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4156
4157 /* ME */
4158 fw_data = (const __le32 *)
4159 (adev->gfx.me_fw->data +
4160 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4161 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4162 WREG32(mmCP_ME_RAM_WADDR, 0);
4163 for (i = 0; i < fw_size; i++)
4164 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4165 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4166
4167 return 0;
4168 }
4169
4170 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4171 {
4172 u32 count = 0;
4173 const struct cs_section_def *sect = NULL;
4174 const struct cs_extent_def *ext = NULL;
4175
4176 /* begin clear state */
4177 count += 2;
4178 /* context control state */
4179 count += 3;
4180
4181 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4182 for (ext = sect->section; ext->extent != NULL; ++ext) {
4183 if (sect->id == SECT_CONTEXT)
4184 count += 2 + ext->reg_count;
4185 else
4186 return 0;
4187 }
4188 }
4189 /* pa_sc_raster_config/pa_sc_raster_config1 */
4190 count += 4;
4191 /* end clear state */
4192 count += 2;
4193 /* clear state */
4194 count += 2;
4195
4196 return count;
4197 }
4198
4199 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4200 {
4201 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4202 const struct cs_section_def *sect = NULL;
4203 const struct cs_extent_def *ext = NULL;
4204 int r, i;
4205
4206 /* init the CP */
4207 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4208 WREG32(mmCP_ENDIAN_SWAP, 0);
4209 WREG32(mmCP_DEVICE_ID, 1);
4210
4211 gfx_v8_0_cp_gfx_enable(adev, true);
4212
4213 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4214 if (r) {
4215 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4216 return r;
4217 }
4218
4219 /* clear state buffer */
4220 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4221 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4222
4223 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4224 amdgpu_ring_write(ring, 0x80000000);
4225 amdgpu_ring_write(ring, 0x80000000);
4226
4227 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4228 for (ext = sect->section; ext->extent != NULL; ++ext) {
4229 if (sect->id == SECT_CONTEXT) {
4230 amdgpu_ring_write(ring,
4231 PACKET3(PACKET3_SET_CONTEXT_REG,
4232 ext->reg_count));
4233 amdgpu_ring_write(ring,
4234 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4235 for (i = 0; i < ext->reg_count; i++)
4236 amdgpu_ring_write(ring, ext->extent[i]);
4237 }
4238 }
4239 }
4240
4241 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4242 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4243 switch (adev->asic_type) {
4244 case CHIP_TONGA:
4245 case CHIP_POLARIS10:
4246 amdgpu_ring_write(ring, 0x16000012);
4247 amdgpu_ring_write(ring, 0x0000002A);
4248 break;
4249 case CHIP_POLARIS11:
4250 case CHIP_POLARIS12:
4251 amdgpu_ring_write(ring, 0x16000012);
4252 amdgpu_ring_write(ring, 0x00000000);
4253 break;
4254 case CHIP_FIJI:
4255 amdgpu_ring_write(ring, 0x3a00161a);
4256 amdgpu_ring_write(ring, 0x0000002e);
4257 break;
4258 case CHIP_CARRIZO:
4259 amdgpu_ring_write(ring, 0x00000002);
4260 amdgpu_ring_write(ring, 0x00000000);
4261 break;
4262 case CHIP_TOPAZ:
4263 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4264 0x00000000 : 0x00000002);
4265 amdgpu_ring_write(ring, 0x00000000);
4266 break;
4267 case CHIP_STONEY:
4268 amdgpu_ring_write(ring, 0x00000000);
4269 amdgpu_ring_write(ring, 0x00000000);
4270 break;
4271 default:
4272 BUG();
4273 }
4274
4275 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4276 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4277
4278 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4279 amdgpu_ring_write(ring, 0);
4280
4281 /* init the CE partitions */
4282 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4283 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4284 amdgpu_ring_write(ring, 0x8000);
4285 amdgpu_ring_write(ring, 0x8000);
4286
4287 amdgpu_ring_commit(ring);
4288
4289 return 0;
4290 }
4291 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4292 {
4293 u32 tmp;
4294 /* no gfx doorbells on iceland */
4295 if (adev->asic_type == CHIP_TOPAZ)
4296 return;
4297
4298 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4299
4300 if (ring->use_doorbell) {
4301 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4302 DOORBELL_OFFSET, ring->doorbell_index);
4303 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4304 DOORBELL_HIT, 0);
4305 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4306 DOORBELL_EN, 1);
4307 } else {
4308 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4309 }
4310
4311 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4312
4313 if (adev->flags & AMD_IS_APU)
4314 return;
4315
4316 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4317 DOORBELL_RANGE_LOWER,
4318 AMDGPU_DOORBELL_GFX_RING0);
4319 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4320
4321 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4322 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4323 }
4324
4325 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4326 {
4327 struct amdgpu_ring *ring;
4328 u32 tmp;
4329 u32 rb_bufsz;
4330 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4331 int r;
4332
4333 /* Set the write pointer delay */
4334 WREG32(mmCP_RB_WPTR_DELAY, 0);
4335
4336 /* set the RB to use vmid 0 */
4337 WREG32(mmCP_RB_VMID, 0);
4338
4339 /* Set ring buffer size */
4340 ring = &adev->gfx.gfx_ring[0];
4341 rb_bufsz = order_base_2(ring->ring_size / 8);
4342 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4343 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4344 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4345 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4346 #ifdef __BIG_ENDIAN
4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4348 #endif
4349 WREG32(mmCP_RB0_CNTL, tmp);
4350
4351 /* Initialize the ring buffer's read and write pointers */
4352 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4353 ring->wptr = 0;
4354 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4355
4356 /* set the wb address wether it's enabled or not */
4357 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4358 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4359 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4360
4361 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4362 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4363 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4364 mdelay(1);
4365 WREG32(mmCP_RB0_CNTL, tmp);
4366
4367 rb_addr = ring->gpu_addr >> 8;
4368 WREG32(mmCP_RB0_BASE, rb_addr);
4369 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4370
4371 gfx_v8_0_set_cpg_door_bell(adev, ring);
4372 /* start the ring */
4373 amdgpu_ring_clear_ring(ring);
4374 gfx_v8_0_cp_gfx_start(adev);
4375 ring->ready = true;
4376 r = amdgpu_ring_test_ring(ring);
4377 if (r)
4378 ring->ready = false;
4379
4380 return r;
4381 }
4382
4383 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4384 {
4385 int i;
4386
4387 if (enable) {
4388 WREG32(mmCP_MEC_CNTL, 0);
4389 } else {
4390 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4391 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4392 adev->gfx.compute_ring[i].ready = false;
4393 adev->gfx.kiq.ring.ready = false;
4394 }
4395 udelay(50);
4396 }
4397
4398 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4399 {
4400 const struct gfx_firmware_header_v1_0 *mec_hdr;
4401 const __le32 *fw_data;
4402 unsigned i, fw_size;
4403
4404 if (!adev->gfx.mec_fw)
4405 return -EINVAL;
4406
4407 gfx_v8_0_cp_compute_enable(adev, false);
4408
4409 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4410 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4411
4412 fw_data = (const __le32 *)
4413 (adev->gfx.mec_fw->data +
4414 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4415 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4416
4417 /* MEC1 */
4418 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4419 for (i = 0; i < fw_size; i++)
4420 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4421 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4422
4423 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4424 if (adev->gfx.mec2_fw) {
4425 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4426
4427 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4428 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4429
4430 fw_data = (const __le32 *)
4431 (adev->gfx.mec2_fw->data +
4432 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4433 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4434
4435 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4436 for (i = 0; i < fw_size; i++)
4437 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4438 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4439 }
4440
4441 return 0;
4442 }
4443
4444 /* KIQ functions */
4445 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4446 {
4447 uint32_t tmp;
4448 struct amdgpu_device *adev = ring->adev;
4449
4450 /* tell RLC which is KIQ queue */
4451 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4452 tmp &= 0xffffff00;
4453 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4454 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4455 tmp |= 0x80;
4456 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4457 }
4458
4459 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4460 {
4461 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4462 uint32_t scratch, tmp = 0;
4463 uint64_t queue_mask = 0;
4464 int r, i;
4465
4466 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4467 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4468 continue;
4469
4470 /* This situation may be hit in the future if a new HW
4471 * generation exposes more than 64 queues. If so, the
4472 * definition of queue_mask needs updating */
4473 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4474 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4475 break;
4476 }
4477
4478 queue_mask |= (1ull << i);
4479 }
4480
4481 r = amdgpu_gfx_scratch_get(adev, &scratch);
4482 if (r) {
4483 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4484 return r;
4485 }
4486 WREG32(scratch, 0xCAFEDEAD);
4487
4488 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4489 if (r) {
4490 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4491 amdgpu_gfx_scratch_free(adev, scratch);
4492 return r;
4493 }
4494 /* set resources */
4495 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4496 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4497 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4498 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4499 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4500 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4501 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4502 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4503 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4504 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4505 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4506 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4507
4508 /* map queues */
4509 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4510 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4511 amdgpu_ring_write(kiq_ring,
4512 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4513 amdgpu_ring_write(kiq_ring,
4514 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4515 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4516 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4517 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4518 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4519 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4520 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4521 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4522 }
4523 /* write to scratch for completion */
4524 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4525 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4526 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4527 amdgpu_ring_commit(kiq_ring);
4528
4529 for (i = 0; i < adev->usec_timeout; i++) {
4530 tmp = RREG32(scratch);
4531 if (tmp == 0xDEADBEEF)
4532 break;
4533 DRM_UDELAY(1);
4534 }
4535 if (i >= adev->usec_timeout) {
4536 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4537 scratch, tmp);
4538 r = -EINVAL;
4539 }
4540 amdgpu_gfx_scratch_free(adev, scratch);
4541
4542 return r;
4543 }
4544
4545 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4546 {
4547 int i, r = 0;
4548
4549 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4550 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4551 for (i = 0; i < adev->usec_timeout; i++) {
4552 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4553 break;
4554 udelay(1);
4555 }
4556 if (i == adev->usec_timeout)
4557 r = -ETIMEDOUT;
4558 }
4559 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4560 WREG32(mmCP_HQD_PQ_RPTR, 0);
4561 WREG32(mmCP_HQD_PQ_WPTR, 0);
4562
4563 return r;
4564 }
4565
4566 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4567 {
4568 struct amdgpu_device *adev = ring->adev;
4569 struct vi_mqd *mqd = ring->mqd_ptr;
4570 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4571 uint32_t tmp;
4572
4573 mqd->header = 0xC0310800;
4574 mqd->compute_pipelinestat_enable = 0x00000001;
4575 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4576 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4577 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4578 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4579 mqd->compute_misc_reserved = 0x00000003;
4580 if (!(adev->flags & AMD_IS_APU)) {
4581 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4582 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4583 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4584 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4585 }
4586 eop_base_addr = ring->eop_gpu_addr >> 8;
4587 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4588 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4589
4590 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4591 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4592 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4593 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4594
4595 mqd->cp_hqd_eop_control = tmp;
4596
4597 /* enable doorbell? */
4598 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4599 CP_HQD_PQ_DOORBELL_CONTROL,
4600 DOORBELL_EN,
4601 ring->use_doorbell ? 1 : 0);
4602
4603 mqd->cp_hqd_pq_doorbell_control = tmp;
4604
4605 /* set the pointer to the MQD */
4606 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4607 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4608
4609 /* set MQD vmid to 0 */
4610 tmp = RREG32(mmCP_MQD_CONTROL);
4611 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4612 mqd->cp_mqd_control = tmp;
4613
4614 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4615 hqd_gpu_addr = ring->gpu_addr >> 8;
4616 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4617 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4618
4619 /* set up the HQD, this is similar to CP_RB0_CNTL */
4620 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4621 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4622 (order_base_2(ring->ring_size / 4) - 1));
4623 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4624 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4625 #ifdef __BIG_ENDIAN
4626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4627 #endif
4628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4629 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4630 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4631 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4632 mqd->cp_hqd_pq_control = tmp;
4633
4634 /* set the wb address whether it's enabled or not */
4635 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4636 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4637 mqd->cp_hqd_pq_rptr_report_addr_hi =
4638 upper_32_bits(wb_gpu_addr) & 0xffff;
4639
4640 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4641 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4642 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4643 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4644
4645 tmp = 0;
4646 /* enable the doorbell if requested */
4647 if (ring->use_doorbell) {
4648 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4649 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4650 DOORBELL_OFFSET, ring->doorbell_index);
4651
4652 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4653 DOORBELL_EN, 1);
4654 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4655 DOORBELL_SOURCE, 0);
4656 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4657 DOORBELL_HIT, 0);
4658 }
4659
4660 mqd->cp_hqd_pq_doorbell_control = tmp;
4661
4662 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4663 ring->wptr = 0;
4664 mqd->cp_hqd_pq_wptr = ring->wptr;
4665 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4666
4667 /* set the vmid for the queue */
4668 mqd->cp_hqd_vmid = 0;
4669
4670 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4671 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4672 mqd->cp_hqd_persistent_state = tmp;
4673
4674 /* set MTYPE */
4675 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4676 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4677 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4678 mqd->cp_hqd_ib_control = tmp;
4679
4680 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4681 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4682 mqd->cp_hqd_iq_timer = tmp;
4683
4684 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4685 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4686 mqd->cp_hqd_ctx_save_control = tmp;
4687
4688 /* defaults */
4689 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4690 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4691 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4692 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4693 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4694 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4695 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4696 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4697 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4698 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4699 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4700 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4701 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4702 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4703 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4704
4705 /* activate the queue */
4706 mqd->cp_hqd_active = 1;
4707
4708 return 0;
4709 }
4710
4711 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4712 struct vi_mqd *mqd)
4713 {
4714 uint32_t mqd_reg;
4715 uint32_t *mqd_data;
4716
4717 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4718 mqd_data = &mqd->cp_mqd_base_addr_lo;
4719
4720 /* disable wptr polling */
4721 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4722
4723 /* program all HQD registers */
4724 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4725 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4726
4727 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4728 * This is safe since EOP RPTR==WPTR for any inactive HQD
4729 * on ASICs that do not support context-save.
4730 * EOP writes/reads can start anywhere in the ring.
4731 */
4732 if (adev->asic_type != CHIP_TONGA) {
4733 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4734 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4735 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4736 }
4737
4738 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4739 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4740
4741 /* activate the HQD */
4742 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4743 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4744
4745 return 0;
4746 }
4747
4748 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4749 {
4750 struct amdgpu_device *adev = ring->adev;
4751 struct vi_mqd *mqd = ring->mqd_ptr;
4752 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4753
4754 gfx_v8_0_kiq_setting(ring);
4755
4756 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4757 /* reset MQD to a clean status */
4758 if (adev->gfx.mec.mqd_backup[mqd_idx])
4759 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4760
4761 /* reset ring buffer */
4762 ring->wptr = 0;
4763 amdgpu_ring_clear_ring(ring);
4764 mutex_lock(&adev->srbm_mutex);
4765 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4766 gfx_v8_0_mqd_commit(adev, mqd);
4767 vi_srbm_select(adev, 0, 0, 0, 0);
4768 mutex_unlock(&adev->srbm_mutex);
4769 } else {
4770 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4771 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4772 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4773 mutex_lock(&adev->srbm_mutex);
4774 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4775 gfx_v8_0_mqd_init(ring);
4776 gfx_v8_0_mqd_commit(adev, mqd);
4777 vi_srbm_select(adev, 0, 0, 0, 0);
4778 mutex_unlock(&adev->srbm_mutex);
4779
4780 if (adev->gfx.mec.mqd_backup[mqd_idx])
4781 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4782 }
4783
4784 return 0;
4785 }
4786
4787 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4788 {
4789 struct amdgpu_device *adev = ring->adev;
4790 struct vi_mqd *mqd = ring->mqd_ptr;
4791 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4792
4793 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4794 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4795 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4796 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4797 mutex_lock(&adev->srbm_mutex);
4798 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4799 gfx_v8_0_mqd_init(ring);
4800 vi_srbm_select(adev, 0, 0, 0, 0);
4801 mutex_unlock(&adev->srbm_mutex);
4802
4803 if (adev->gfx.mec.mqd_backup[mqd_idx])
4804 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4805 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4806 /* reset MQD to a clean status */
4807 if (adev->gfx.mec.mqd_backup[mqd_idx])
4808 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4809 /* reset ring buffer */
4810 ring->wptr = 0;
4811 amdgpu_ring_clear_ring(ring);
4812 } else {
4813 amdgpu_ring_clear_ring(ring);
4814 }
4815 return 0;
4816 }
4817
4818 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4819 {
4820 if (adev->asic_type > CHIP_TONGA) {
4821 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4822 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4823 }
4824 /* enable doorbells */
4825 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4826 }
4827
4828 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4829 {
4830 struct amdgpu_ring *ring = NULL;
4831 int r = 0, i;
4832
4833 gfx_v8_0_cp_compute_enable(adev, true);
4834
4835 ring = &adev->gfx.kiq.ring;
4836
4837 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4838 if (unlikely(r != 0))
4839 goto done;
4840
4841 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4842 if (!r) {
4843 r = gfx_v8_0_kiq_init_queue(ring);
4844 amdgpu_bo_kunmap(ring->mqd_obj);
4845 ring->mqd_ptr = NULL;
4846 }
4847 amdgpu_bo_unreserve(ring->mqd_obj);
4848 if (r)
4849 goto done;
4850
4851 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4852 ring = &adev->gfx.compute_ring[i];
4853
4854 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4855 if (unlikely(r != 0))
4856 goto done;
4857 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4858 if (!r) {
4859 r = gfx_v8_0_kcq_init_queue(ring);
4860 amdgpu_bo_kunmap(ring->mqd_obj);
4861 ring->mqd_ptr = NULL;
4862 }
4863 amdgpu_bo_unreserve(ring->mqd_obj);
4864 if (r)
4865 goto done;
4866 }
4867
4868 gfx_v8_0_set_mec_doorbell_range(adev);
4869
4870 r = gfx_v8_0_kiq_kcq_enable(adev);
4871 if (r)
4872 goto done;
4873
4874 /* Test KIQ */
4875 ring = &adev->gfx.kiq.ring;
4876 ring->ready = true;
4877 r = amdgpu_ring_test_ring(ring);
4878 if (r) {
4879 ring->ready = false;
4880 goto done;
4881 }
4882
4883 /* Test KCQs */
4884 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4885 ring = &adev->gfx.compute_ring[i];
4886 ring->ready = true;
4887 r = amdgpu_ring_test_ring(ring);
4888 if (r)
4889 ring->ready = false;
4890 }
4891
4892 done:
4893 return r;
4894 }
4895
4896 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4897 {
4898 int r;
4899
4900 if (!(adev->flags & AMD_IS_APU))
4901 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4902
4903 if (!adev->pp_enabled) {
4904 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4905 /* legacy firmware loading */
4906 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4907 if (r)
4908 return r;
4909
4910 r = gfx_v8_0_cp_compute_load_microcode(adev);
4911 if (r)
4912 return r;
4913 } else {
4914 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4915 AMDGPU_UCODE_ID_CP_CE);
4916 if (r)
4917 return -EINVAL;
4918
4919 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4920 AMDGPU_UCODE_ID_CP_PFP);
4921 if (r)
4922 return -EINVAL;
4923
4924 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4925 AMDGPU_UCODE_ID_CP_ME);
4926 if (r)
4927 return -EINVAL;
4928
4929 if (adev->asic_type == CHIP_TOPAZ) {
4930 r = gfx_v8_0_cp_compute_load_microcode(adev);
4931 if (r)
4932 return r;
4933 } else {
4934 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4935 AMDGPU_UCODE_ID_CP_MEC1);
4936 if (r)
4937 return -EINVAL;
4938 }
4939 }
4940 }
4941
4942 r = gfx_v8_0_cp_gfx_resume(adev);
4943 if (r)
4944 return r;
4945
4946 r = gfx_v8_0_kiq_resume(adev);
4947 if (r)
4948 return r;
4949
4950 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4951
4952 return 0;
4953 }
4954
4955 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4956 {
4957 gfx_v8_0_cp_gfx_enable(adev, enable);
4958 gfx_v8_0_cp_compute_enable(adev, enable);
4959 }
4960
4961 static int gfx_v8_0_hw_init(void *handle)
4962 {
4963 int r;
4964 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965
4966 gfx_v8_0_init_golden_registers(adev);
4967 gfx_v8_0_gpu_init(adev);
4968
4969 r = gfx_v8_0_rlc_resume(adev);
4970 if (r)
4971 return r;
4972
4973 r = gfx_v8_0_cp_resume(adev);
4974
4975 return r;
4976 }
4977
4978 static int gfx_v8_0_hw_fini(void *handle)
4979 {
4980 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4981
4982 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4983 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4984 if (amdgpu_sriov_vf(adev)) {
4985 pr_debug("For SRIOV client, shouldn't do anything.\n");
4986 return 0;
4987 }
4988 gfx_v8_0_cp_enable(adev, false);
4989 gfx_v8_0_rlc_stop(adev);
4990
4991 amdgpu_set_powergating_state(adev,
4992 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4993
4994 return 0;
4995 }
4996
4997 static int gfx_v8_0_suspend(void *handle)
4998 {
4999 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000 adev->gfx.in_suspend = true;
5001 return gfx_v8_0_hw_fini(adev);
5002 }
5003
5004 static int gfx_v8_0_resume(void *handle)
5005 {
5006 int r;
5007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5008
5009 r = gfx_v8_0_hw_init(adev);
5010 adev->gfx.in_suspend = false;
5011 return r;
5012 }
5013
5014 static bool gfx_v8_0_is_idle(void *handle)
5015 {
5016 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5017
5018 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5019 return false;
5020 else
5021 return true;
5022 }
5023
5024 static int gfx_v8_0_wait_for_idle(void *handle)
5025 {
5026 unsigned i;
5027 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5028
5029 for (i = 0; i < adev->usec_timeout; i++) {
5030 if (gfx_v8_0_is_idle(handle))
5031 return 0;
5032
5033 udelay(1);
5034 }
5035 return -ETIMEDOUT;
5036 }
5037
5038 static bool gfx_v8_0_check_soft_reset(void *handle)
5039 {
5040 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5042 u32 tmp;
5043
5044 /* GRBM_STATUS */
5045 tmp = RREG32(mmGRBM_STATUS);
5046 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5047 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5048 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5049 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5050 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5051 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5052 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5053 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5054 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5055 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5056 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5057 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5058 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5059 }
5060
5061 /* GRBM_STATUS2 */
5062 tmp = RREG32(mmGRBM_STATUS2);
5063 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5064 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5065 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5066
5067 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5068 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5069 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5070 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5071 SOFT_RESET_CPF, 1);
5072 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5073 SOFT_RESET_CPC, 1);
5074 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5075 SOFT_RESET_CPG, 1);
5076 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5077 SOFT_RESET_GRBM, 1);
5078 }
5079
5080 /* SRBM_STATUS */
5081 tmp = RREG32(mmSRBM_STATUS);
5082 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5083 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5084 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5085 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5086 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5088
5089 if (grbm_soft_reset || srbm_soft_reset) {
5090 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5091 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5092 return true;
5093 } else {
5094 adev->gfx.grbm_soft_reset = 0;
5095 adev->gfx.srbm_soft_reset = 0;
5096 return false;
5097 }
5098 }
5099
5100 static int gfx_v8_0_pre_soft_reset(void *handle)
5101 {
5102 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5104
5105 if ((!adev->gfx.grbm_soft_reset) &&
5106 (!adev->gfx.srbm_soft_reset))
5107 return 0;
5108
5109 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5110 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5111
5112 /* stop the rlc */
5113 gfx_v8_0_rlc_stop(adev);
5114
5115 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5116 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5117 /* Disable GFX parsing/prefetching */
5118 gfx_v8_0_cp_gfx_enable(adev, false);
5119
5120 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5121 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5122 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5123 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5124 int i;
5125
5126 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5127 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5128
5129 mutex_lock(&adev->srbm_mutex);
5130 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5131 gfx_v8_0_deactivate_hqd(adev, 2);
5132 vi_srbm_select(adev, 0, 0, 0, 0);
5133 mutex_unlock(&adev->srbm_mutex);
5134 }
5135 /* Disable MEC parsing/prefetching */
5136 gfx_v8_0_cp_compute_enable(adev, false);
5137 }
5138
5139 return 0;
5140 }
5141
5142 static int gfx_v8_0_soft_reset(void *handle)
5143 {
5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5146 u32 tmp;
5147
5148 if ((!adev->gfx.grbm_soft_reset) &&
5149 (!adev->gfx.srbm_soft_reset))
5150 return 0;
5151
5152 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5153 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5154
5155 if (grbm_soft_reset || srbm_soft_reset) {
5156 tmp = RREG32(mmGMCON_DEBUG);
5157 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5158 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5159 WREG32(mmGMCON_DEBUG, tmp);
5160 udelay(50);
5161 }
5162
5163 if (grbm_soft_reset) {
5164 tmp = RREG32(mmGRBM_SOFT_RESET);
5165 tmp |= grbm_soft_reset;
5166 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5167 WREG32(mmGRBM_SOFT_RESET, tmp);
5168 tmp = RREG32(mmGRBM_SOFT_RESET);
5169
5170 udelay(50);
5171
5172 tmp &= ~grbm_soft_reset;
5173 WREG32(mmGRBM_SOFT_RESET, tmp);
5174 tmp = RREG32(mmGRBM_SOFT_RESET);
5175 }
5176
5177 if (srbm_soft_reset) {
5178 tmp = RREG32(mmSRBM_SOFT_RESET);
5179 tmp |= srbm_soft_reset;
5180 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5181 WREG32(mmSRBM_SOFT_RESET, tmp);
5182 tmp = RREG32(mmSRBM_SOFT_RESET);
5183
5184 udelay(50);
5185
5186 tmp &= ~srbm_soft_reset;
5187 WREG32(mmSRBM_SOFT_RESET, tmp);
5188 tmp = RREG32(mmSRBM_SOFT_RESET);
5189 }
5190
5191 if (grbm_soft_reset || srbm_soft_reset) {
5192 tmp = RREG32(mmGMCON_DEBUG);
5193 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5194 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5195 WREG32(mmGMCON_DEBUG, tmp);
5196 }
5197
5198 /* Wait a little for things to settle down */
5199 udelay(50);
5200
5201 return 0;
5202 }
5203
5204 static int gfx_v8_0_post_soft_reset(void *handle)
5205 {
5206 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5207 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5208
5209 if ((!adev->gfx.grbm_soft_reset) &&
5210 (!adev->gfx.srbm_soft_reset))
5211 return 0;
5212
5213 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5214 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5215
5216 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5217 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5218 gfx_v8_0_cp_gfx_resume(adev);
5219
5220 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5221 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5222 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5223 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5224 int i;
5225
5226 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5227 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5228
5229 mutex_lock(&adev->srbm_mutex);
5230 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5231 gfx_v8_0_deactivate_hqd(adev, 2);
5232 vi_srbm_select(adev, 0, 0, 0, 0);
5233 mutex_unlock(&adev->srbm_mutex);
5234 }
5235 gfx_v8_0_kiq_resume(adev);
5236 }
5237 gfx_v8_0_rlc_start(adev);
5238
5239 return 0;
5240 }
5241
5242 /**
5243 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5244 *
5245 * @adev: amdgpu_device pointer
5246 *
5247 * Fetches a GPU clock counter snapshot.
5248 * Returns the 64 bit clock counter snapshot.
5249 */
5250 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5251 {
5252 uint64_t clock;
5253
5254 mutex_lock(&adev->gfx.gpu_clock_mutex);
5255 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5256 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5257 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5258 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5259 return clock;
5260 }
5261
5262 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5263 uint32_t vmid,
5264 uint32_t gds_base, uint32_t gds_size,
5265 uint32_t gws_base, uint32_t gws_size,
5266 uint32_t oa_base, uint32_t oa_size)
5267 {
5268 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5269 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5270
5271 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5272 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5273
5274 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5275 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5276
5277 /* GDS Base */
5278 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5279 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5280 WRITE_DATA_DST_SEL(0)));
5281 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5282 amdgpu_ring_write(ring, 0);
5283 amdgpu_ring_write(ring, gds_base);
5284
5285 /* GDS Size */
5286 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5287 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5288 WRITE_DATA_DST_SEL(0)));
5289 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5290 amdgpu_ring_write(ring, 0);
5291 amdgpu_ring_write(ring, gds_size);
5292
5293 /* GWS */
5294 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5295 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5296 WRITE_DATA_DST_SEL(0)));
5297 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5298 amdgpu_ring_write(ring, 0);
5299 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5300
5301 /* OA */
5302 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5303 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5304 WRITE_DATA_DST_SEL(0)));
5305 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5306 amdgpu_ring_write(ring, 0);
5307 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5308 }
5309
5310 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5311 {
5312 WREG32(mmSQ_IND_INDEX,
5313 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5314 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5315 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5316 (SQ_IND_INDEX__FORCE_READ_MASK));
5317 return RREG32(mmSQ_IND_DATA);
5318 }
5319
5320 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5321 uint32_t wave, uint32_t thread,
5322 uint32_t regno, uint32_t num, uint32_t *out)
5323 {
5324 WREG32(mmSQ_IND_INDEX,
5325 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5326 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5327 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5328 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5329 (SQ_IND_INDEX__FORCE_READ_MASK) |
5330 (SQ_IND_INDEX__AUTO_INCR_MASK));
5331 while (num--)
5332 *(out++) = RREG32(mmSQ_IND_DATA);
5333 }
5334
5335 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5336 {
5337 /* type 0 wave data */
5338 dst[(*no_fields)++] = 0;
5339 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5340 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5341 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5342 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5343 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5344 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5345 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5346 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5347 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5348 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5349 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5350 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5351 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5352 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5353 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5354 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5355 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5356 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5357 }
5358
5359 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5360 uint32_t wave, uint32_t start,
5361 uint32_t size, uint32_t *dst)
5362 {
5363 wave_read_regs(
5364 adev, simd, wave, 0,
5365 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5366 }
5367
5368
5369 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5370 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5371 .select_se_sh = &gfx_v8_0_select_se_sh,
5372 .read_wave_data = &gfx_v8_0_read_wave_data,
5373 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5374 };
5375
5376 static int gfx_v8_0_early_init(void *handle)
5377 {
5378 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5379
5380 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5381 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5382 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5383 gfx_v8_0_set_ring_funcs(adev);
5384 gfx_v8_0_set_irq_funcs(adev);
5385 gfx_v8_0_set_gds_init(adev);
5386 gfx_v8_0_set_rlc_funcs(adev);
5387
5388 return 0;
5389 }
5390
5391 static int gfx_v8_0_late_init(void *handle)
5392 {
5393 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5394 int r;
5395
5396 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5397 if (r)
5398 return r;
5399
5400 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5401 if (r)
5402 return r;
5403
5404 /* requires IBs so do in late init after IB pool is initialized */
5405 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5406 if (r)
5407 return r;
5408
5409 amdgpu_set_powergating_state(adev,
5410 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5411
5412 return 0;
5413 }
5414
5415 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5416 bool enable)
5417 {
5418 if ((adev->asic_type == CHIP_POLARIS11) ||
5419 (adev->asic_type == CHIP_POLARIS12))
5420 /* Send msg to SMU via Powerplay */
5421 amdgpu_set_powergating_state(adev,
5422 AMD_IP_BLOCK_TYPE_SMC,
5423 enable ?
5424 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5425
5426 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5427 }
5428
5429 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5430 bool enable)
5431 {
5432 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5433 }
5434
5435 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5436 bool enable)
5437 {
5438 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5439 }
5440
5441 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5442 bool enable)
5443 {
5444 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5445 }
5446
5447 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5448 bool enable)
5449 {
5450 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5451
5452 /* Read any GFX register to wake up GFX. */
5453 if (!enable)
5454 RREG32(mmDB_RENDER_CONTROL);
5455 }
5456
5457 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5458 bool enable)
5459 {
5460 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5461 cz_enable_gfx_cg_power_gating(adev, true);
5462 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5463 cz_enable_gfx_pipeline_power_gating(adev, true);
5464 } else {
5465 cz_enable_gfx_cg_power_gating(adev, false);
5466 cz_enable_gfx_pipeline_power_gating(adev, false);
5467 }
5468 }
5469
5470 static int gfx_v8_0_set_powergating_state(void *handle,
5471 enum amd_powergating_state state)
5472 {
5473 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5474 bool enable = (state == AMD_PG_STATE_GATE);
5475
5476 if (amdgpu_sriov_vf(adev))
5477 return 0;
5478
5479 switch (adev->asic_type) {
5480 case CHIP_CARRIZO:
5481 case CHIP_STONEY:
5482
5483 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5484 cz_enable_sck_slow_down_on_power_up(adev, true);
5485 cz_enable_sck_slow_down_on_power_down(adev, true);
5486 } else {
5487 cz_enable_sck_slow_down_on_power_up(adev, false);
5488 cz_enable_sck_slow_down_on_power_down(adev, false);
5489 }
5490 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5491 cz_enable_cp_power_gating(adev, true);
5492 else
5493 cz_enable_cp_power_gating(adev, false);
5494
5495 cz_update_gfx_cg_power_gating(adev, enable);
5496
5497 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5498 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5499 else
5500 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5501
5502 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5503 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5504 else
5505 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5506 break;
5507 case CHIP_POLARIS11:
5508 case CHIP_POLARIS12:
5509 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5510 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5511 else
5512 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5513
5514 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5515 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5516 else
5517 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5518
5519 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5520 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5521 else
5522 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5523 break;
5524 default:
5525 break;
5526 }
5527
5528 return 0;
5529 }
5530
5531 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5532 {
5533 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5534 int data;
5535
5536 if (amdgpu_sriov_vf(adev))
5537 *flags = 0;
5538
5539 /* AMD_CG_SUPPORT_GFX_MGCG */
5540 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5541 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5542 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5543
5544 /* AMD_CG_SUPPORT_GFX_CGLG */
5545 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5546 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5547 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5548
5549 /* AMD_CG_SUPPORT_GFX_CGLS */
5550 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5551 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5552
5553 /* AMD_CG_SUPPORT_GFX_CGTS */
5554 data = RREG32(mmCGTS_SM_CTRL_REG);
5555 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5556 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5557
5558 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5559 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5560 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5561
5562 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5563 data = RREG32(mmRLC_MEM_SLP_CNTL);
5564 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5565 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5566
5567 /* AMD_CG_SUPPORT_GFX_CP_LS */
5568 data = RREG32(mmCP_MEM_SLP_CNTL);
5569 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5570 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5571 }
5572
5573 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5574 uint32_t reg_addr, uint32_t cmd)
5575 {
5576 uint32_t data;
5577
5578 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5579
5580 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5581 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5582
5583 data = RREG32(mmRLC_SERDES_WR_CTRL);
5584 if (adev->asic_type == CHIP_STONEY)
5585 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5586 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5587 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5588 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5589 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5590 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5591 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5592 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5593 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5594 else
5595 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5596 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5597 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5598 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5599 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5600 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5601 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5602 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5603 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5604 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5605 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5606 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5607 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5608 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5609 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5610
5611 WREG32(mmRLC_SERDES_WR_CTRL, data);
5612 }
5613
5614 #define MSG_ENTER_RLC_SAFE_MODE 1
5615 #define MSG_EXIT_RLC_SAFE_MODE 0
5616 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5617 #define RLC_GPR_REG2__REQ__SHIFT 0
5618 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5619 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5620
5621 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5622 {
5623 u32 data;
5624 unsigned i;
5625
5626 data = RREG32(mmRLC_CNTL);
5627 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5628 return;
5629
5630 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5631 data |= RLC_SAFE_MODE__CMD_MASK;
5632 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5633 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5634 WREG32(mmRLC_SAFE_MODE, data);
5635
5636 for (i = 0; i < adev->usec_timeout; i++) {
5637 if ((RREG32(mmRLC_GPM_STAT) &
5638 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5639 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5640 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5641 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5642 break;
5643 udelay(1);
5644 }
5645
5646 for (i = 0; i < adev->usec_timeout; i++) {
5647 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5648 break;
5649 udelay(1);
5650 }
5651 adev->gfx.rlc.in_safe_mode = true;
5652 }
5653 }
5654
5655 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5656 {
5657 u32 data = 0;
5658 unsigned i;
5659
5660 data = RREG32(mmRLC_CNTL);
5661 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5662 return;
5663
5664 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5665 if (adev->gfx.rlc.in_safe_mode) {
5666 data |= RLC_SAFE_MODE__CMD_MASK;
5667 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5668 WREG32(mmRLC_SAFE_MODE, data);
5669 adev->gfx.rlc.in_safe_mode = false;
5670 }
5671 }
5672
5673 for (i = 0; i < adev->usec_timeout; i++) {
5674 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5675 break;
5676 udelay(1);
5677 }
5678 }
5679
5680 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5681 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5682 .exit_safe_mode = iceland_exit_rlc_safe_mode
5683 };
5684
5685 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5686 bool enable)
5687 {
5688 uint32_t temp, data;
5689
5690 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5691
5692 /* It is disabled by HW by default */
5693 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5694 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5695 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5696 /* 1 - RLC memory Light sleep */
5697 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5698
5699 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5700 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5701 }
5702
5703 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5704 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5705 if (adev->flags & AMD_IS_APU)
5706 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5709 else
5710 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5711 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5712 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5713 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5714
5715 if (temp != data)
5716 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5717
5718 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5719 gfx_v8_0_wait_for_rlc_serdes(adev);
5720
5721 /* 5 - clear mgcg override */
5722 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5723
5724 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5725 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5726 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5727 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5728 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5729 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5730 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5731 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5732 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5733 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5734 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5735 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5736 if (temp != data)
5737 WREG32(mmCGTS_SM_CTRL_REG, data);
5738 }
5739 udelay(50);
5740
5741 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5742 gfx_v8_0_wait_for_rlc_serdes(adev);
5743 } else {
5744 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5745 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5746 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5747 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5748 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5749 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5750 if (temp != data)
5751 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5752
5753 /* 2 - disable MGLS in RLC */
5754 data = RREG32(mmRLC_MEM_SLP_CNTL);
5755 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5756 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5757 WREG32(mmRLC_MEM_SLP_CNTL, data);
5758 }
5759
5760 /* 3 - disable MGLS in CP */
5761 data = RREG32(mmCP_MEM_SLP_CNTL);
5762 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5763 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5764 WREG32(mmCP_MEM_SLP_CNTL, data);
5765 }
5766
5767 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5768 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5769 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5770 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5771 if (temp != data)
5772 WREG32(mmCGTS_SM_CTRL_REG, data);
5773
5774 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775 gfx_v8_0_wait_for_rlc_serdes(adev);
5776
5777 /* 6 - set mgcg override */
5778 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5779
5780 udelay(50);
5781
5782 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5783 gfx_v8_0_wait_for_rlc_serdes(adev);
5784 }
5785
5786 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5787 }
5788
5789 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5790 bool enable)
5791 {
5792 uint32_t temp, temp1, data, data1;
5793
5794 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5795
5796 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5797
5798 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5799 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5800 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5801 if (temp1 != data1)
5802 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5803
5804 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5805 gfx_v8_0_wait_for_rlc_serdes(adev);
5806
5807 /* 2 - clear cgcg override */
5808 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5809
5810 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5811 gfx_v8_0_wait_for_rlc_serdes(adev);
5812
5813 /* 3 - write cmd to set CGLS */
5814 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5815
5816 /* 4 - enable cgcg */
5817 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5818
5819 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5820 /* enable cgls*/
5821 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5822
5823 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5824 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5825
5826 if (temp1 != data1)
5827 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5828 } else {
5829 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5830 }
5831
5832 if (temp != data)
5833 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5834
5835 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5836 * Cmp_busy/GFX_Idle interrupts
5837 */
5838 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5839 } else {
5840 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5841 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5842
5843 /* TEST CGCG */
5844 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5845 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5846 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5847 if (temp1 != data1)
5848 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5849
5850 /* read gfx register to wake up cgcg */
5851 RREG32(mmCB_CGTT_SCLK_CTRL);
5852 RREG32(mmCB_CGTT_SCLK_CTRL);
5853 RREG32(mmCB_CGTT_SCLK_CTRL);
5854 RREG32(mmCB_CGTT_SCLK_CTRL);
5855
5856 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5857 gfx_v8_0_wait_for_rlc_serdes(adev);
5858
5859 /* write cmd to Set CGCG Overrride */
5860 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5861
5862 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5863 gfx_v8_0_wait_for_rlc_serdes(adev);
5864
5865 /* write cmd to Clear CGLS */
5866 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5867
5868 /* disable cgcg, cgls should be disabled too. */
5869 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5870 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5871 if (temp != data)
5872 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5873 /* enable interrupts again for PG */
5874 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5875 }
5876
5877 gfx_v8_0_wait_for_rlc_serdes(adev);
5878
5879 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5880 }
5881 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5882 bool enable)
5883 {
5884 if (enable) {
5885 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5886 * === MGCG + MGLS + TS(CG/LS) ===
5887 */
5888 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5889 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5890 } else {
5891 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5892 * === CGCG + CGLS ===
5893 */
5894 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5895 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5896 }
5897 return 0;
5898 }
5899
5900 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5901 enum amd_clockgating_state state)
5902 {
5903 uint32_t msg_id, pp_state = 0;
5904 uint32_t pp_support_state = 0;
5905 void *pp_handle = adev->powerplay.pp_handle;
5906
5907 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5908 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5909 pp_support_state = PP_STATE_SUPPORT_LS;
5910 pp_state = PP_STATE_LS;
5911 }
5912 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5913 pp_support_state |= PP_STATE_SUPPORT_CG;
5914 pp_state |= PP_STATE_CG;
5915 }
5916 if (state == AMD_CG_STATE_UNGATE)
5917 pp_state = 0;
5918
5919 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5920 PP_BLOCK_GFX_CG,
5921 pp_support_state,
5922 pp_state);
5923 amd_set_clockgating_by_smu(pp_handle, msg_id);
5924 }
5925
5926 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5927 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5928 pp_support_state = PP_STATE_SUPPORT_LS;
5929 pp_state = PP_STATE_LS;
5930 }
5931
5932 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5933 pp_support_state |= PP_STATE_SUPPORT_CG;
5934 pp_state |= PP_STATE_CG;
5935 }
5936
5937 if (state == AMD_CG_STATE_UNGATE)
5938 pp_state = 0;
5939
5940 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5941 PP_BLOCK_GFX_MG,
5942 pp_support_state,
5943 pp_state);
5944 amd_set_clockgating_by_smu(pp_handle, msg_id);
5945 }
5946
5947 return 0;
5948 }
5949
5950 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5951 enum amd_clockgating_state state)
5952 {
5953
5954 uint32_t msg_id, pp_state = 0;
5955 uint32_t pp_support_state = 0;
5956 void *pp_handle = adev->powerplay.pp_handle;
5957
5958 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5959 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5960 pp_support_state = PP_STATE_SUPPORT_LS;
5961 pp_state = PP_STATE_LS;
5962 }
5963 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5964 pp_support_state |= PP_STATE_SUPPORT_CG;
5965 pp_state |= PP_STATE_CG;
5966 }
5967 if (state == AMD_CG_STATE_UNGATE)
5968 pp_state = 0;
5969
5970 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5971 PP_BLOCK_GFX_CG,
5972 pp_support_state,
5973 pp_state);
5974 amd_set_clockgating_by_smu(pp_handle, msg_id);
5975 }
5976
5977 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5978 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5979 pp_support_state = PP_STATE_SUPPORT_LS;
5980 pp_state = PP_STATE_LS;
5981 }
5982 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5983 pp_support_state |= PP_STATE_SUPPORT_CG;
5984 pp_state |= PP_STATE_CG;
5985 }
5986 if (state == AMD_CG_STATE_UNGATE)
5987 pp_state = 0;
5988
5989 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5990 PP_BLOCK_GFX_3D,
5991 pp_support_state,
5992 pp_state);
5993 amd_set_clockgating_by_smu(pp_handle, msg_id);
5994 }
5995
5996 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5997 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5998 pp_support_state = PP_STATE_SUPPORT_LS;
5999 pp_state = PP_STATE_LS;
6000 }
6001
6002 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6003 pp_support_state |= PP_STATE_SUPPORT_CG;
6004 pp_state |= PP_STATE_CG;
6005 }
6006
6007 if (state == AMD_CG_STATE_UNGATE)
6008 pp_state = 0;
6009
6010 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6011 PP_BLOCK_GFX_MG,
6012 pp_support_state,
6013 pp_state);
6014 amd_set_clockgating_by_smu(pp_handle, msg_id);
6015 }
6016
6017 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6018 pp_support_state = PP_STATE_SUPPORT_LS;
6019
6020 if (state == AMD_CG_STATE_UNGATE)
6021 pp_state = 0;
6022 else
6023 pp_state = PP_STATE_LS;
6024
6025 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6026 PP_BLOCK_GFX_RLC,
6027 pp_support_state,
6028 pp_state);
6029 amd_set_clockgating_by_smu(pp_handle, msg_id);
6030 }
6031
6032 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6033 pp_support_state = PP_STATE_SUPPORT_LS;
6034
6035 if (state == AMD_CG_STATE_UNGATE)
6036 pp_state = 0;
6037 else
6038 pp_state = PP_STATE_LS;
6039 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6040 PP_BLOCK_GFX_CP,
6041 pp_support_state,
6042 pp_state);
6043 amd_set_clockgating_by_smu(pp_handle, msg_id);
6044 }
6045
6046 return 0;
6047 }
6048
6049 static int gfx_v8_0_set_clockgating_state(void *handle,
6050 enum amd_clockgating_state state)
6051 {
6052 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6053
6054 if (amdgpu_sriov_vf(adev))
6055 return 0;
6056
6057 switch (adev->asic_type) {
6058 case CHIP_FIJI:
6059 case CHIP_CARRIZO:
6060 case CHIP_STONEY:
6061 gfx_v8_0_update_gfx_clock_gating(adev,
6062 state == AMD_CG_STATE_GATE);
6063 break;
6064 case CHIP_TONGA:
6065 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6066 break;
6067 case CHIP_POLARIS10:
6068 case CHIP_POLARIS11:
6069 case CHIP_POLARIS12:
6070 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6071 break;
6072 default:
6073 break;
6074 }
6075 return 0;
6076 }
6077
6078 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6079 {
6080 return ring->adev->wb.wb[ring->rptr_offs];
6081 }
6082
6083 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6084 {
6085 struct amdgpu_device *adev = ring->adev;
6086
6087 if (ring->use_doorbell)
6088 /* XXX check if swapping is necessary on BE */
6089 return ring->adev->wb.wb[ring->wptr_offs];
6090 else
6091 return RREG32(mmCP_RB0_WPTR);
6092 }
6093
6094 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6095 {
6096 struct amdgpu_device *adev = ring->adev;
6097
6098 if (ring->use_doorbell) {
6099 /* XXX check if swapping is necessary on BE */
6100 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6101 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6102 } else {
6103 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6104 (void)RREG32(mmCP_RB0_WPTR);
6105 }
6106 }
6107
6108 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6109 {
6110 u32 ref_and_mask, reg_mem_engine;
6111
6112 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6113 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6114 switch (ring->me) {
6115 case 1:
6116 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6117 break;
6118 case 2:
6119 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6120 break;
6121 default:
6122 return;
6123 }
6124 reg_mem_engine = 0;
6125 } else {
6126 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6127 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6128 }
6129
6130 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6131 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6132 WAIT_REG_MEM_FUNCTION(3) | /* == */
6133 reg_mem_engine));
6134 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6135 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6136 amdgpu_ring_write(ring, ref_and_mask);
6137 amdgpu_ring_write(ring, ref_and_mask);
6138 amdgpu_ring_write(ring, 0x20); /* poll interval */
6139 }
6140
6141 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6142 {
6143 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6144 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6145 EVENT_INDEX(4));
6146
6147 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6148 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6149 EVENT_INDEX(0));
6150 }
6151
6152
6153 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6154 {
6155 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6156 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6157 WRITE_DATA_DST_SEL(0) |
6158 WR_CONFIRM));
6159 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6160 amdgpu_ring_write(ring, 0);
6161 amdgpu_ring_write(ring, 1);
6162
6163 }
6164
6165 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6166 struct amdgpu_ib *ib,
6167 unsigned vm_id, bool ctx_switch)
6168 {
6169 u32 header, control = 0;
6170
6171 if (ib->flags & AMDGPU_IB_FLAG_CE)
6172 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6173 else
6174 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6175
6176 control |= ib->length_dw | (vm_id << 24);
6177
6178 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6179 control |= INDIRECT_BUFFER_PRE_ENB(1);
6180
6181 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6182 gfx_v8_0_ring_emit_de_meta(ring);
6183 }
6184
6185 amdgpu_ring_write(ring, header);
6186 amdgpu_ring_write(ring,
6187 #ifdef __BIG_ENDIAN
6188 (2 << 0) |
6189 #endif
6190 (ib->gpu_addr & 0xFFFFFFFC));
6191 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6192 amdgpu_ring_write(ring, control);
6193 }
6194
6195 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6196 struct amdgpu_ib *ib,
6197 unsigned vm_id, bool ctx_switch)
6198 {
6199 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6200
6201 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6202 amdgpu_ring_write(ring,
6203 #ifdef __BIG_ENDIAN
6204 (2 << 0) |
6205 #endif
6206 (ib->gpu_addr & 0xFFFFFFFC));
6207 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6208 amdgpu_ring_write(ring, control);
6209 }
6210
6211 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6212 u64 seq, unsigned flags)
6213 {
6214 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6215 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6216
6217 /* EVENT_WRITE_EOP - flush caches, send int */
6218 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6219 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6220 EOP_TC_ACTION_EN |
6221 EOP_TC_WB_ACTION_EN |
6222 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6223 EVENT_INDEX(5)));
6224 amdgpu_ring_write(ring, addr & 0xfffffffc);
6225 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6226 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6227 amdgpu_ring_write(ring, lower_32_bits(seq));
6228 amdgpu_ring_write(ring, upper_32_bits(seq));
6229
6230 }
6231
6232 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6233 {
6234 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6235 uint32_t seq = ring->fence_drv.sync_seq;
6236 uint64_t addr = ring->fence_drv.gpu_addr;
6237
6238 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6240 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6241 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6242 amdgpu_ring_write(ring, addr & 0xfffffffc);
6243 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6244 amdgpu_ring_write(ring, seq);
6245 amdgpu_ring_write(ring, 0xffffffff);
6246 amdgpu_ring_write(ring, 4); /* poll interval */
6247 }
6248
6249 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6250 unsigned vm_id, uint64_t pd_addr)
6251 {
6252 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6253
6254 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6255 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6256 WRITE_DATA_DST_SEL(0)) |
6257 WR_CONFIRM);
6258 if (vm_id < 8) {
6259 amdgpu_ring_write(ring,
6260 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6261 } else {
6262 amdgpu_ring_write(ring,
6263 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6264 }
6265 amdgpu_ring_write(ring, 0);
6266 amdgpu_ring_write(ring, pd_addr >> 12);
6267
6268 /* bits 0-15 are the VM contexts0-15 */
6269 /* invalidate the cache */
6270 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6271 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6272 WRITE_DATA_DST_SEL(0)));
6273 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6274 amdgpu_ring_write(ring, 0);
6275 amdgpu_ring_write(ring, 1 << vm_id);
6276
6277 /* wait for the invalidate to complete */
6278 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6279 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6280 WAIT_REG_MEM_FUNCTION(0) | /* always */
6281 WAIT_REG_MEM_ENGINE(0))); /* me */
6282 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6283 amdgpu_ring_write(ring, 0);
6284 amdgpu_ring_write(ring, 0); /* ref */
6285 amdgpu_ring_write(ring, 0); /* mask */
6286 amdgpu_ring_write(ring, 0x20); /* poll interval */
6287
6288 /* compute doesn't have PFP */
6289 if (usepfp) {
6290 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6291 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6292 amdgpu_ring_write(ring, 0x0);
6293 }
6294 }
6295
6296 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6297 {
6298 return ring->adev->wb.wb[ring->wptr_offs];
6299 }
6300
6301 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6302 {
6303 struct amdgpu_device *adev = ring->adev;
6304
6305 /* XXX check if swapping is necessary on BE */
6306 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6307 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6308 }
6309
6310 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6311 u64 addr, u64 seq,
6312 unsigned flags)
6313 {
6314 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6315 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6316
6317 /* RELEASE_MEM - flush caches, send int */
6318 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6319 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6320 EOP_TC_ACTION_EN |
6321 EOP_TC_WB_ACTION_EN |
6322 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6323 EVENT_INDEX(5)));
6324 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6325 amdgpu_ring_write(ring, addr & 0xfffffffc);
6326 amdgpu_ring_write(ring, upper_32_bits(addr));
6327 amdgpu_ring_write(ring, lower_32_bits(seq));
6328 amdgpu_ring_write(ring, upper_32_bits(seq));
6329 }
6330
6331 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6332 u64 seq, unsigned int flags)
6333 {
6334 /* we only allocate 32bit for each seq wb address */
6335 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6336
6337 /* write fence seq to the "addr" */
6338 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6339 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6340 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6341 amdgpu_ring_write(ring, lower_32_bits(addr));
6342 amdgpu_ring_write(ring, upper_32_bits(addr));
6343 amdgpu_ring_write(ring, lower_32_bits(seq));
6344
6345 if (flags & AMDGPU_FENCE_FLAG_INT) {
6346 /* set register to trigger INT */
6347 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6348 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6349 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6350 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6351 amdgpu_ring_write(ring, 0);
6352 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6353 }
6354 }
6355
6356 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6357 {
6358 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6359 amdgpu_ring_write(ring, 0);
6360 }
6361
6362 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6363 {
6364 uint32_t dw2 = 0;
6365
6366 if (amdgpu_sriov_vf(ring->adev))
6367 gfx_v8_0_ring_emit_ce_meta(ring);
6368
6369 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6370 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6371 gfx_v8_0_ring_emit_vgt_flush(ring);
6372 /* set load_global_config & load_global_uconfig */
6373 dw2 |= 0x8001;
6374 /* set load_cs_sh_regs */
6375 dw2 |= 0x01000000;
6376 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6377 dw2 |= 0x10002;
6378
6379 /* set load_ce_ram if preamble presented */
6380 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6381 dw2 |= 0x10000000;
6382 } else {
6383 /* still load_ce_ram if this is the first time preamble presented
6384 * although there is no context switch happens.
6385 */
6386 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6387 dw2 |= 0x10000000;
6388 }
6389
6390 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6391 amdgpu_ring_write(ring, dw2);
6392 amdgpu_ring_write(ring, 0);
6393 }
6394
6395 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6396 {
6397 unsigned ret;
6398
6399 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6400 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6401 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6402 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6403 ret = ring->wptr & ring->buf_mask;
6404 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6405 return ret;
6406 }
6407
6408 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6409 {
6410 unsigned cur;
6411
6412 BUG_ON(offset > ring->buf_mask);
6413 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6414
6415 cur = (ring->wptr & ring->buf_mask) - 1;
6416 if (likely(cur > offset))
6417 ring->ring[offset] = cur - offset;
6418 else
6419 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6420 }
6421
6422 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6423 {
6424 struct amdgpu_device *adev = ring->adev;
6425
6426 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6427 amdgpu_ring_write(ring, 0 | /* src: register*/
6428 (5 << 8) | /* dst: memory */
6429 (1 << 20)); /* write confirm */
6430 amdgpu_ring_write(ring, reg);
6431 amdgpu_ring_write(ring, 0);
6432 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6433 adev->virt.reg_val_offs * 4));
6434 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6435 adev->virt.reg_val_offs * 4));
6436 }
6437
6438 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6439 uint32_t val)
6440 {
6441 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6443 amdgpu_ring_write(ring, reg);
6444 amdgpu_ring_write(ring, 0);
6445 amdgpu_ring_write(ring, val);
6446 }
6447
6448 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6449 enum amdgpu_interrupt_state state)
6450 {
6451 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6452 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6453 }
6454
6455 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6456 int me, int pipe,
6457 enum amdgpu_interrupt_state state)
6458 {
6459 u32 mec_int_cntl, mec_int_cntl_reg;
6460
6461 /*
6462 * amdgpu controls only the first MEC. That's why this function only
6463 * handles the setting of interrupts for this specific MEC. All other
6464 * pipes' interrupts are set by amdkfd.
6465 */
6466
6467 if (me == 1) {
6468 switch (pipe) {
6469 case 0:
6470 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6471 break;
6472 case 1:
6473 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6474 break;
6475 case 2:
6476 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6477 break;
6478 case 3:
6479 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6480 break;
6481 default:
6482 DRM_DEBUG("invalid pipe %d\n", pipe);
6483 return;
6484 }
6485 } else {
6486 DRM_DEBUG("invalid me %d\n", me);
6487 return;
6488 }
6489
6490 switch (state) {
6491 case AMDGPU_IRQ_STATE_DISABLE:
6492 mec_int_cntl = RREG32(mec_int_cntl_reg);
6493 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6494 WREG32(mec_int_cntl_reg, mec_int_cntl);
6495 break;
6496 case AMDGPU_IRQ_STATE_ENABLE:
6497 mec_int_cntl = RREG32(mec_int_cntl_reg);
6498 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6499 WREG32(mec_int_cntl_reg, mec_int_cntl);
6500 break;
6501 default:
6502 break;
6503 }
6504 }
6505
6506 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6507 struct amdgpu_irq_src *source,
6508 unsigned type,
6509 enum amdgpu_interrupt_state state)
6510 {
6511 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6512 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6513
6514 return 0;
6515 }
6516
6517 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6518 struct amdgpu_irq_src *source,
6519 unsigned type,
6520 enum amdgpu_interrupt_state state)
6521 {
6522 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6523 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6524
6525 return 0;
6526 }
6527
6528 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6529 struct amdgpu_irq_src *src,
6530 unsigned type,
6531 enum amdgpu_interrupt_state state)
6532 {
6533 switch (type) {
6534 case AMDGPU_CP_IRQ_GFX_EOP:
6535 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6536 break;
6537 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6538 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6539 break;
6540 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6541 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6542 break;
6543 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6544 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6545 break;
6546 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6547 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6548 break;
6549 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6550 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6551 break;
6552 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6553 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6554 break;
6555 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6556 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6557 break;
6558 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6559 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6560 break;
6561 default:
6562 break;
6563 }
6564 return 0;
6565 }
6566
6567 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6568 struct amdgpu_irq_src *source,
6569 struct amdgpu_iv_entry *entry)
6570 {
6571 int i;
6572 u8 me_id, pipe_id, queue_id;
6573 struct amdgpu_ring *ring;
6574
6575 DRM_DEBUG("IH: CP EOP\n");
6576 me_id = (entry->ring_id & 0x0c) >> 2;
6577 pipe_id = (entry->ring_id & 0x03) >> 0;
6578 queue_id = (entry->ring_id & 0x70) >> 4;
6579
6580 switch (me_id) {
6581 case 0:
6582 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6583 break;
6584 case 1:
6585 case 2:
6586 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6587 ring = &adev->gfx.compute_ring[i];
6588 /* Per-queue interrupt is supported for MEC starting from VI.
6589 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6590 */
6591 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6592 amdgpu_fence_process(ring);
6593 }
6594 break;
6595 }
6596 return 0;
6597 }
6598
6599 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6600 struct amdgpu_irq_src *source,
6601 struct amdgpu_iv_entry *entry)
6602 {
6603 DRM_ERROR("Illegal register access in command stream\n");
6604 schedule_work(&adev->reset_work);
6605 return 0;
6606 }
6607
6608 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6609 struct amdgpu_irq_src *source,
6610 struct amdgpu_iv_entry *entry)
6611 {
6612 DRM_ERROR("Illegal instruction in command stream\n");
6613 schedule_work(&adev->reset_work);
6614 return 0;
6615 }
6616
6617 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6618 struct amdgpu_irq_src *src,
6619 unsigned int type,
6620 enum amdgpu_interrupt_state state)
6621 {
6622 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6623
6624 switch (type) {
6625 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6626 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6627 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6628 if (ring->me == 1)
6629 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6630 ring->pipe,
6631 GENERIC2_INT_ENABLE,
6632 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6633 else
6634 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6635 ring->pipe,
6636 GENERIC2_INT_ENABLE,
6637 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6638 break;
6639 default:
6640 BUG(); /* kiq only support GENERIC2_INT now */
6641 break;
6642 }
6643 return 0;
6644 }
6645
6646 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6647 struct amdgpu_irq_src *source,
6648 struct amdgpu_iv_entry *entry)
6649 {
6650 u8 me_id, pipe_id, queue_id;
6651 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6652
6653 me_id = (entry->ring_id & 0x0c) >> 2;
6654 pipe_id = (entry->ring_id & 0x03) >> 0;
6655 queue_id = (entry->ring_id & 0x70) >> 4;
6656 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6657 me_id, pipe_id, queue_id);
6658
6659 amdgpu_fence_process(ring);
6660 return 0;
6661 }
6662
6663 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6664 .name = "gfx_v8_0",
6665 .early_init = gfx_v8_0_early_init,
6666 .late_init = gfx_v8_0_late_init,
6667 .sw_init = gfx_v8_0_sw_init,
6668 .sw_fini = gfx_v8_0_sw_fini,
6669 .hw_init = gfx_v8_0_hw_init,
6670 .hw_fini = gfx_v8_0_hw_fini,
6671 .suspend = gfx_v8_0_suspend,
6672 .resume = gfx_v8_0_resume,
6673 .is_idle = gfx_v8_0_is_idle,
6674 .wait_for_idle = gfx_v8_0_wait_for_idle,
6675 .check_soft_reset = gfx_v8_0_check_soft_reset,
6676 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6677 .soft_reset = gfx_v8_0_soft_reset,
6678 .post_soft_reset = gfx_v8_0_post_soft_reset,
6679 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6680 .set_powergating_state = gfx_v8_0_set_powergating_state,
6681 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6682 };
6683
6684 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6685 .type = AMDGPU_RING_TYPE_GFX,
6686 .align_mask = 0xff,
6687 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6688 .support_64bit_ptrs = false,
6689 .get_rptr = gfx_v8_0_ring_get_rptr,
6690 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6691 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6692 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6693 5 + /* COND_EXEC */
6694 7 + /* PIPELINE_SYNC */
6695 19 + /* VM_FLUSH */
6696 8 + /* FENCE for VM_FLUSH */
6697 20 + /* GDS switch */
6698 4 + /* double SWITCH_BUFFER,
6699 the first COND_EXEC jump to the place just
6700 prior to this double SWITCH_BUFFER */
6701 5 + /* COND_EXEC */
6702 7 + /* HDP_flush */
6703 4 + /* VGT_flush */
6704 14 + /* CE_META */
6705 31 + /* DE_META */
6706 3 + /* CNTX_CTRL */
6707 5 + /* HDP_INVL */
6708 8 + 8 + /* FENCE x2 */
6709 2, /* SWITCH_BUFFER */
6710 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6711 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6712 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6713 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6714 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6715 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6716 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6717 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6718 .test_ring = gfx_v8_0_ring_test_ring,
6719 .test_ib = gfx_v8_0_ring_test_ib,
6720 .insert_nop = amdgpu_ring_insert_nop,
6721 .pad_ib = amdgpu_ring_generic_pad_ib,
6722 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6723 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6724 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6725 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6726 };
6727
6728 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6729 .type = AMDGPU_RING_TYPE_COMPUTE,
6730 .align_mask = 0xff,
6731 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6732 .support_64bit_ptrs = false,
6733 .get_rptr = gfx_v8_0_ring_get_rptr,
6734 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6735 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6736 .emit_frame_size =
6737 20 + /* gfx_v8_0_ring_emit_gds_switch */
6738 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6739 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6740 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6741 17 + /* gfx_v8_0_ring_emit_vm_flush */
6742 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6743 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6744 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6745 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6746 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6747 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6748 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6749 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6750 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6751 .test_ring = gfx_v8_0_ring_test_ring,
6752 .test_ib = gfx_v8_0_ring_test_ib,
6753 .insert_nop = amdgpu_ring_insert_nop,
6754 .pad_ib = amdgpu_ring_generic_pad_ib,
6755 };
6756
6757 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6758 .type = AMDGPU_RING_TYPE_KIQ,
6759 .align_mask = 0xff,
6760 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6761 .support_64bit_ptrs = false,
6762 .get_rptr = gfx_v8_0_ring_get_rptr,
6763 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6764 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6765 .emit_frame_size =
6766 20 + /* gfx_v8_0_ring_emit_gds_switch */
6767 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6768 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6769 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6770 17 + /* gfx_v8_0_ring_emit_vm_flush */
6771 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6772 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6773 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6774 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6775 .test_ring = gfx_v8_0_ring_test_ring,
6776 .test_ib = gfx_v8_0_ring_test_ib,
6777 .insert_nop = amdgpu_ring_insert_nop,
6778 .pad_ib = amdgpu_ring_generic_pad_ib,
6779 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6780 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6781 };
6782
6783 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6784 {
6785 int i;
6786
6787 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6788
6789 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6790 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6791
6792 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6793 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6794 }
6795
6796 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6797 .set = gfx_v8_0_set_eop_interrupt_state,
6798 .process = gfx_v8_0_eop_irq,
6799 };
6800
6801 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6802 .set = gfx_v8_0_set_priv_reg_fault_state,
6803 .process = gfx_v8_0_priv_reg_irq,
6804 };
6805
6806 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6807 .set = gfx_v8_0_set_priv_inst_fault_state,
6808 .process = gfx_v8_0_priv_inst_irq,
6809 };
6810
6811 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6812 .set = gfx_v8_0_kiq_set_interrupt_state,
6813 .process = gfx_v8_0_kiq_irq,
6814 };
6815
6816 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6817 {
6818 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6819 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6820
6821 adev->gfx.priv_reg_irq.num_types = 1;
6822 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6823
6824 adev->gfx.priv_inst_irq.num_types = 1;
6825 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6826
6827 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6828 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6829 }
6830
6831 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6832 {
6833 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6834 }
6835
6836 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6837 {
6838 /* init asci gds info */
6839 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6840 adev->gds.gws.total_size = 64;
6841 adev->gds.oa.total_size = 16;
6842
6843 if (adev->gds.mem.total_size == 64 * 1024) {
6844 adev->gds.mem.gfx_partition_size = 4096;
6845 adev->gds.mem.cs_partition_size = 4096;
6846
6847 adev->gds.gws.gfx_partition_size = 4;
6848 adev->gds.gws.cs_partition_size = 4;
6849
6850 adev->gds.oa.gfx_partition_size = 4;
6851 adev->gds.oa.cs_partition_size = 1;
6852 } else {
6853 adev->gds.mem.gfx_partition_size = 1024;
6854 adev->gds.mem.cs_partition_size = 1024;
6855
6856 adev->gds.gws.gfx_partition_size = 16;
6857 adev->gds.gws.cs_partition_size = 16;
6858
6859 adev->gds.oa.gfx_partition_size = 4;
6860 adev->gds.oa.cs_partition_size = 4;
6861 }
6862 }
6863
6864 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6865 u32 bitmap)
6866 {
6867 u32 data;
6868
6869 if (!bitmap)
6870 return;
6871
6872 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6873 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6874
6875 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6876 }
6877
6878 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6879 {
6880 u32 data, mask;
6881
6882 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6883 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6884
6885 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6886
6887 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6888 }
6889
6890 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6891 {
6892 int i, j, k, counter, active_cu_number = 0;
6893 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6894 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6895 unsigned disable_masks[4 * 2];
6896 u32 ao_cu_num;
6897
6898 memset(cu_info, 0, sizeof(*cu_info));
6899
6900 if (adev->flags & AMD_IS_APU)
6901 ao_cu_num = 2;
6902 else
6903 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6904
6905 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6906
6907 mutex_lock(&adev->grbm_idx_mutex);
6908 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6909 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6910 mask = 1;
6911 ao_bitmap = 0;
6912 counter = 0;
6913 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6914 if (i < 4 && j < 2)
6915 gfx_v8_0_set_user_cu_inactive_bitmap(
6916 adev, disable_masks[i * 2 + j]);
6917 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6918 cu_info->bitmap[i][j] = bitmap;
6919
6920 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6921 if (bitmap & mask) {
6922 if (counter < ao_cu_num)
6923 ao_bitmap |= mask;
6924 counter ++;
6925 }
6926 mask <<= 1;
6927 }
6928 active_cu_number += counter;
6929 if (i < 2 && j < 2)
6930 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6931 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6932 }
6933 }
6934 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6935 mutex_unlock(&adev->grbm_idx_mutex);
6936
6937 cu_info->number = active_cu_number;
6938 cu_info->ao_cu_mask = ao_cu_mask;
6939 }
6940
6941 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6942 {
6943 .type = AMD_IP_BLOCK_TYPE_GFX,
6944 .major = 8,
6945 .minor = 0,
6946 .rev = 0,
6947 .funcs = &gfx_v8_0_ip_funcs,
6948 };
6949
6950 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6951 {
6952 .type = AMD_IP_BLOCK_TYPE_GFX,
6953 .major = 8,
6954 .minor = 1,
6955 .rev = 0,
6956 .funcs = &gfx_v8_0_ip_funcs,
6957 };
6958
6959 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6960 {
6961 uint64_t ce_payload_addr;
6962 int cnt_ce;
6963 static union {
6964 struct vi_ce_ib_state regular;
6965 struct vi_ce_ib_state_chained_ib chained;
6966 } ce_payload = {};
6967
6968 if (ring->adev->virt.chained_ib_support) {
6969 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6970 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6971 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6972 } else {
6973 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6974 offsetof(struct vi_gfx_meta_data, ce_payload);
6975 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6976 }
6977
6978 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6979 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6980 WRITE_DATA_DST_SEL(8) |
6981 WR_CONFIRM) |
6982 WRITE_DATA_CACHE_POLICY(0));
6983 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6984 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6985 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6986 }
6987
6988 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6989 {
6990 uint64_t de_payload_addr, gds_addr, csa_addr;
6991 int cnt_de;
6992 static union {
6993 struct vi_de_ib_state regular;
6994 struct vi_de_ib_state_chained_ib chained;
6995 } de_payload = {};
6996
6997 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6998 gds_addr = csa_addr + 4096;
6999 if (ring->adev->virt.chained_ib_support) {
7000 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7001 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7002 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7003 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7004 } else {
7005 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7006 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7007 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7008 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7009 }
7010
7011 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7012 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7013 WRITE_DATA_DST_SEL(8) |
7014 WR_CONFIRM) |
7015 WRITE_DATA_CACHE_POLICY(0));
7016 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7017 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7018 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7019 }