]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: unify MQD programming sequence for kfd and amdgpu v2
[mirror_ubuntu-hirsute-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
aeab2032 28#include "vi_structs.h"
aaa36a97
AD
29#include "vid.h"
30#include "amdgpu_ucode.h"
68182d90 31#include "amdgpu_atombios.h"
eeade25a 32#include "atombios_i2c.h"
aaa36a97
AD
33#include "clearstate_vi.h"
34
35#include "gmc/gmc_8_2_d.h"
36#include "gmc/gmc_8_2_sh_mask.h"
37
38#include "oss/oss_3_0_d.h"
39#include "oss/oss_3_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
aaa36a97
AD
54#define GFX8_NUM_GFX_RINGS 1
55#define GFX8_NUM_COMPUTE_RINGS 8
268cb4c7 56#define GFX8_MEC_HPD_SIZE 2048
aaa36a97
AD
57
58#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
59#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 60#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
61#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62
63#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
64#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
65#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
66#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
67#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
68#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
69#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
70#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
71#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72
6e378858
EH
73#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
74#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
75#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
76#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
77#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
78#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79
80/* BPM SERDES CMD */
81#define SET_BPM_SERDES_CMD 1
82#define CLE_BPM_SERDES_CMD 0
83
84/* BPM Register Address*/
85enum {
86 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
87 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
88 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
89 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
90 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
91 BPM_REG_FGCG_MAX
92};
93
2b6cd977
EH
94#define RLC_FormatDirectRegListLength 14
95
c65444fe
JZ
96MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
101MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102
e3c7656c
SL
103MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
104MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
105MODULE_FIRMWARE("amdgpu/stoney_me.bin");
106MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
107MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108
c65444fe
JZ
109MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
110MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
111MODULE_FIRMWARE("amdgpu/tonga_me.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
113MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
114MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115
116MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
117MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
118MODULE_FIRMWARE("amdgpu/topaz_me.bin");
119MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 120MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 121
af15a2d5
DZ
122MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
123MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
124MODULE_FIRMWARE("amdgpu/fiji_me.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
126MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
127MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128
2cc0c0b5
FC
129MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
133MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
134MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 135
2cc0c0b5
FC
136MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
141MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 142
c4642a47
JZ
143MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
147MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
148MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
149
aaa36a97
AD
150static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
151{
152 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
153 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
154 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
155 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
156 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
157 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
158 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
159 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
160 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
161 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
162 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
163 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
164 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
165 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
166 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
167 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
168};
169
170static const u32 golden_settings_tonga_a11[] =
171{
172 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
173 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
174 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
175 mmGB_GPU_ID, 0x0000000f, 0x00000000,
176 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
177 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
178 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 179 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 180 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
181 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
182 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 183 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
184 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
185 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
186 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 187 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
188};
189
190static const u32 tonga_golden_common_all[] =
191{
192 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
194 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
195 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
196 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
198 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
199 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
200};
201
202static const u32 tonga_mgcg_cgcg_init[] =
203{
204 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
205 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
206 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
211 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
212 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
213 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
215 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
225 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
226 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
229 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
230 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
231 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
234 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
235 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
238 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
248 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
258 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
266 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
267 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
268 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
269 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
270 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
271 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
272 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
273 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
274 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
275 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
276 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
277 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
278 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
279};
280
2cc0c0b5 281static const u32 golden_settings_polaris11_a11[] =
68182d90 282{
9761bc53
HR
283 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
284 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
285 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
286 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
287 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
288 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
289 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
290 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
291 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
292 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 293 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
294 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
295 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
296 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
297 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
298 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 299 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
300};
301
2cc0c0b5 302static const u32 polaris11_golden_common_all[] =
68182d90
FC
303{
304 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
305 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
306 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
308 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
309 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
310};
311
2cc0c0b5 312static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
313{
314 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
315 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
316 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
317 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
318 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
319 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
320 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
321 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
322 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
323 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
324 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
325 mmSQ_CONFIG, 0x07f80000, 0x07180000,
326 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
327 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
328 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
329 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 330 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
331};
332
2cc0c0b5 333static const u32 polaris10_golden_common_all[] =
68182d90
FC
334{
335 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
336 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
337 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
338 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
339 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
341 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
342 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
343};
344
af15a2d5
DZ
345static const u32 fiji_golden_common_all[] =
346{
347 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
348 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
349 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 350 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
351 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
353 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
354 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
355 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
356 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
357};
358
359static const u32 golden_settings_fiji_a10[] =
360{
361 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
362 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
363 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 364 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
365 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
366 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 367 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
368 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
369 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 370 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 371 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
372};
373
374static const u32 fiji_mgcg_cgcg_init[] =
375{
a7ca8ef9 376 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
377 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
378 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
383 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
386 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
387 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
397 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
398 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
400 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
401 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
402 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
403 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
405 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
406 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
407 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
408 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
409 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
410 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
411};
412
aaa36a97
AD
413static const u32 golden_settings_iceland_a11[] =
414{
415 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
416 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
418 mmGB_GPU_ID, 0x0000000f, 0x00000000,
419 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
420 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
421 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
422 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 423 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 424 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
425 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 427 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
428 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
429 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
430 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
431};
432
433static const u32 iceland_golden_common_all[] =
434{
435 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
437 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
438 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
439 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
441 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
442 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
443};
444
445static const u32 iceland_mgcg_cgcg_init[] =
446{
447 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
448 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
449 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
454 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
458 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
468 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
469 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
470 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
471 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
472 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
473 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
474 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
476 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
477 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
478 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
481 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
491 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
496 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
499 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
500 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
501 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
502 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
503 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
504 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
505 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
506 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
507 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
508 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
509 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
510 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
511};
512
513static const u32 cz_golden_settings_a11[] =
514{
515 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
516 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
517 mmGB_GPU_ID, 0x0000000f, 0x00000000,
518 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
519 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 520 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 521 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 522 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 523 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 524 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
525 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
526 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
527};
528
529static const u32 cz_golden_common_all[] =
530{
531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
533 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
534 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
535 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
537 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
538 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
539};
540
541static const u32 cz_mgcg_cgcg_init[] =
542{
543 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
544 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
545 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
551 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
552 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
554 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
564 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
565 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
567 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
568 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
569 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
570 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
572 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
573 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
574 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
577 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
587 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
597 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
602 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
605 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
606 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
607 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
608 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
609 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
610 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
611 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
612 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
613 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
614 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
615 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
616 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
617 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
618};
619
e3c7656c
SL
620static const u32 stoney_golden_settings_a11[] =
621{
622 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
623 mmGB_GPU_ID, 0x0000000f, 0x00000000,
624 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
625 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
626 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
627 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 628 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
629 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
630 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
631 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
632};
633
634static const u32 stoney_golden_common_all[] =
635{
636 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
637 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
638 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
639 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
640 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
642 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
643 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
644};
645
646static const u32 stoney_mgcg_cgcg_init[] =
647{
648 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
649 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
650 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
652 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
653};
654
aaa36a97
AD
655static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
656static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
657static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 658static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 659static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 660static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
95243543
ML
661static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
662static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
0875a242
AD
663static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
664static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
aaa36a97
AD
665
666static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
667{
668 switch (adev->asic_type) {
669 case CHIP_TOPAZ:
670 amdgpu_program_register_sequence(adev,
671 iceland_mgcg_cgcg_init,
672 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
673 amdgpu_program_register_sequence(adev,
674 golden_settings_iceland_a11,
675 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
676 amdgpu_program_register_sequence(adev,
677 iceland_golden_common_all,
678 (const u32)ARRAY_SIZE(iceland_golden_common_all));
679 break;
af15a2d5
DZ
680 case CHIP_FIJI:
681 amdgpu_program_register_sequence(adev,
682 fiji_mgcg_cgcg_init,
683 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
684 amdgpu_program_register_sequence(adev,
685 golden_settings_fiji_a10,
686 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
687 amdgpu_program_register_sequence(adev,
688 fiji_golden_common_all,
689 (const u32)ARRAY_SIZE(fiji_golden_common_all));
690 break;
691
aaa36a97
AD
692 case CHIP_TONGA:
693 amdgpu_program_register_sequence(adev,
694 tonga_mgcg_cgcg_init,
695 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
696 amdgpu_program_register_sequence(adev,
697 golden_settings_tonga_a11,
698 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
699 amdgpu_program_register_sequence(adev,
700 tonga_golden_common_all,
701 (const u32)ARRAY_SIZE(tonga_golden_common_all));
702 break;
2cc0c0b5 703 case CHIP_POLARIS11:
c4642a47 704 case CHIP_POLARIS12:
68182d90 705 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
706 golden_settings_polaris11_a11,
707 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 708 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
709 polaris11_golden_common_all,
710 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 711 break;
2cc0c0b5 712 case CHIP_POLARIS10:
68182d90 713 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
714 golden_settings_polaris10_a11,
715 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 716 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
717 polaris10_golden_common_all,
718 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 719 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
720 if (adev->pdev->revision == 0xc7 &&
721 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
722 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
723 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
725 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
726 }
68182d90 727 break;
aaa36a97
AD
728 case CHIP_CARRIZO:
729 amdgpu_program_register_sequence(adev,
730 cz_mgcg_cgcg_init,
731 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
732 amdgpu_program_register_sequence(adev,
733 cz_golden_settings_a11,
734 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
735 amdgpu_program_register_sequence(adev,
736 cz_golden_common_all,
737 (const u32)ARRAY_SIZE(cz_golden_common_all));
738 break;
e3c7656c
SL
739 case CHIP_STONEY:
740 amdgpu_program_register_sequence(adev,
741 stoney_mgcg_cgcg_init,
742 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
743 amdgpu_program_register_sequence(adev,
744 stoney_golden_settings_a11,
745 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
746 amdgpu_program_register_sequence(adev,
747 stoney_golden_common_all,
748 (const u32)ARRAY_SIZE(stoney_golden_common_all));
749 break;
aaa36a97
AD
750 default:
751 break;
752 }
753}
754
755static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
756{
aaa36a97
AD
757 adev->gfx.scratch.num_reg = 7;
758 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 759 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
760}
761
762static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763{
764 struct amdgpu_device *adev = ring->adev;
765 uint32_t scratch;
766 uint32_t tmp = 0;
767 unsigned i;
768 int r;
769
770 r = amdgpu_gfx_scratch_get(adev, &scratch);
771 if (r) {
772 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
773 return r;
774 }
775 WREG32(scratch, 0xCAFEDEAD);
a27de35c 776 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
777 if (r) {
778 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
779 ring->idx, r);
780 amdgpu_gfx_scratch_free(adev, scratch);
781 return r;
782 }
783 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
784 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
785 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 786 amdgpu_ring_commit(ring);
aaa36a97
AD
787
788 for (i = 0; i < adev->usec_timeout; i++) {
789 tmp = RREG32(scratch);
790 if (tmp == 0xDEADBEEF)
791 break;
792 DRM_UDELAY(1);
793 }
794 if (i < adev->usec_timeout) {
795 DRM_INFO("ring test on %d succeeded in %d usecs\n",
796 ring->idx, i);
797 } else {
798 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
799 ring->idx, scratch, tmp);
800 r = -EINVAL;
801 }
802 amdgpu_gfx_scratch_free(adev, scratch);
803 return r;
804}
805
bbec97aa 806static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
807{
808 struct amdgpu_device *adev = ring->adev;
809 struct amdgpu_ib ib;
f54d1867 810 struct dma_fence *f = NULL;
aaa36a97
AD
811 uint32_t scratch;
812 uint32_t tmp = 0;
bbec97aa 813 long r;
aaa36a97
AD
814
815 r = amdgpu_gfx_scratch_get(adev, &scratch);
816 if (r) {
bbec97aa 817 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
818 return r;
819 }
820 WREG32(scratch, 0xCAFEDEAD);
b203dd95 821 memset(&ib, 0, sizeof(ib));
b07c60c0 822 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 823 if (r) {
bbec97aa 824 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 825 goto err1;
aaa36a97
AD
826 }
827 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
828 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
829 ib.ptr[2] = 0xDEADBEEF;
830 ib.length_dw = 3;
42d13693 831
50ddc75e 832 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
833 if (r)
834 goto err2;
835
f54d1867 836 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
837 if (r == 0) {
838 DRM_ERROR("amdgpu: IB test timed out.\n");
839 r = -ETIMEDOUT;
840 goto err2;
841 } else if (r < 0) {
842 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 843 goto err2;
aaa36a97 844 }
6d44565d
CK
845 tmp = RREG32(scratch);
846 if (tmp == 0xDEADBEEF) {
847 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 848 r = 0;
aaa36a97
AD
849 } else {
850 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
851 scratch, tmp);
852 r = -EINVAL;
853 }
42d13693 854err2:
cc55c45d 855 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 856 dma_fence_put(f);
42d13693
CZ
857err1:
858 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
859 return r;
860}
861
13331ac3
ML
862
863static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
864 release_firmware(adev->gfx.pfp_fw);
865 adev->gfx.pfp_fw = NULL;
866 release_firmware(adev->gfx.me_fw);
867 adev->gfx.me_fw = NULL;
868 release_firmware(adev->gfx.ce_fw);
869 adev->gfx.ce_fw = NULL;
870 release_firmware(adev->gfx.rlc_fw);
871 adev->gfx.rlc_fw = NULL;
872 release_firmware(adev->gfx.mec_fw);
873 adev->gfx.mec_fw = NULL;
874 if ((adev->asic_type != CHIP_STONEY) &&
875 (adev->asic_type != CHIP_TOPAZ))
876 release_firmware(adev->gfx.mec2_fw);
877 adev->gfx.mec2_fw = NULL;
878
879 kfree(adev->gfx.rlc.register_list_format);
880}
881
aaa36a97
AD
882static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
883{
884 const char *chip_name;
885 char fw_name[30];
886 int err;
887 struct amdgpu_firmware_info *info = NULL;
888 const struct common_firmware_header *header = NULL;
595fd013 889 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
890 const struct rlc_firmware_header_v2_0 *rlc_hdr;
891 unsigned int *tmp = NULL, i;
aaa36a97
AD
892
893 DRM_DEBUG("\n");
894
895 switch (adev->asic_type) {
896 case CHIP_TOPAZ:
897 chip_name = "topaz";
898 break;
899 case CHIP_TONGA:
900 chip_name = "tonga";
901 break;
902 case CHIP_CARRIZO:
903 chip_name = "carrizo";
904 break;
af15a2d5
DZ
905 case CHIP_FIJI:
906 chip_name = "fiji";
907 break;
2cc0c0b5
FC
908 case CHIP_POLARIS11:
909 chip_name = "polaris11";
68182d90 910 break;
2cc0c0b5
FC
911 case CHIP_POLARIS10:
912 chip_name = "polaris10";
68182d90 913 break;
c4642a47
JZ
914 case CHIP_POLARIS12:
915 chip_name = "polaris12";
916 break;
e3c7656c
SL
917 case CHIP_STONEY:
918 chip_name = "stoney";
919 break;
aaa36a97
AD
920 default:
921 BUG();
922 }
923
c65444fe 924 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
925 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
926 if (err)
927 goto out;
928 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
929 if (err)
930 goto out;
595fd013
JZ
931 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
932 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 934
c65444fe 935 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
936 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
937 if (err)
938 goto out;
939 err = amdgpu_ucode_validate(adev->gfx.me_fw);
940 if (err)
941 goto out;
595fd013
JZ
942 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
943 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d 944
595fd013 945 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 946
c65444fe 947 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
948 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
949 if (err)
950 goto out;
951 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
952 if (err)
953 goto out;
595fd013
JZ
954 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
955 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
956 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 957
63a7c748
TH
958 /*
959 * Support for MCBP/Virtualization in combination with chained IBs is
960 * formal released on feature version #46
961 */
962 if (adev->gfx.ce_feature_version >= 46 &&
963 adev->gfx.pfp_feature_version >= 46) {
964 adev->virt.chained_ib_support = true;
965 DRM_INFO("Chained IB support enabled!\n");
966 } else
967 adev->virt.chained_ib_support = false;
968
c65444fe 969 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
970 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
971 if (err)
972 goto out;
973 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
974 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
975 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
976 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
977
978 adev->gfx.rlc.save_and_restore_offset =
979 le32_to_cpu(rlc_hdr->save_and_restore_offset);
980 adev->gfx.rlc.clear_state_descriptor_offset =
981 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
982 adev->gfx.rlc.avail_scratch_ram_locations =
983 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
984 adev->gfx.rlc.reg_restore_list_size =
985 le32_to_cpu(rlc_hdr->reg_restore_list_size);
986 adev->gfx.rlc.reg_list_format_start =
987 le32_to_cpu(rlc_hdr->reg_list_format_start);
988 adev->gfx.rlc.reg_list_format_separate_start =
989 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
990 adev->gfx.rlc.starting_offsets_start =
991 le32_to_cpu(rlc_hdr->starting_offsets_start);
992 adev->gfx.rlc.reg_list_format_size_bytes =
993 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
994 adev->gfx.rlc.reg_list_size_bytes =
995 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
996
997 adev->gfx.rlc.register_list_format =
998 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
999 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1000
1001 if (!adev->gfx.rlc.register_list_format) {
1002 err = -ENOMEM;
1003 goto out;
1004 }
1005
ae17c999 1006 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1007 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1008 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1009 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1010
1011 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1012
ae17c999 1013 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1014 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1015 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1016 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1017
c65444fe 1018 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
1019 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1020 if (err)
1021 goto out;
1022 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1023 if (err)
1024 goto out;
595fd013
JZ
1025 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1026 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1027 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1028
97dde76a
AD
1029 if ((adev->asic_type != CHIP_STONEY) &&
1030 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1031 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1032 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1033 if (!err) {
1034 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1035 if (err)
1036 goto out;
1037 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1038 adev->gfx.mec2_fw->data;
1039 adev->gfx.mec2_fw_version =
1040 le32_to_cpu(cp_hdr->header.ucode_version);
1041 adev->gfx.mec2_feature_version =
1042 le32_to_cpu(cp_hdr->ucode_feature_version);
1043 } else {
1044 err = 0;
1045 adev->gfx.mec2_fw = NULL;
1046 }
aaa36a97
AD
1047 }
1048
e635ee07 1049 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
aaa36a97
AD
1050 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1051 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1052 info->fw = adev->gfx.pfp_fw;
1053 header = (const struct common_firmware_header *)info->fw->data;
1054 adev->firmware.fw_size +=
1055 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1056
1057 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1058 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1059 info->fw = adev->gfx.me_fw;
1060 header = (const struct common_firmware_header *)info->fw->data;
1061 adev->firmware.fw_size +=
1062 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1063
1064 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1065 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1066 info->fw = adev->gfx.ce_fw;
1067 header = (const struct common_firmware_header *)info->fw->data;
1068 adev->firmware.fw_size +=
1069 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1070
1071 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1072 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1073 info->fw = adev->gfx.rlc_fw;
1074 header = (const struct common_firmware_header *)info->fw->data;
1075 adev->firmware.fw_size +=
1076 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1077
1078 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1079 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1080 info->fw = adev->gfx.mec_fw;
1081 header = (const struct common_firmware_header *)info->fw->data;
1082 adev->firmware.fw_size +=
1083 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1084
4c2b2453
ML
1085 /* we need account JT in */
1086 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1087 adev->firmware.fw_size +=
1088 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1089
bed5712e
ML
1090 if (amdgpu_sriov_vf(adev)) {
1091 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1092 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1093 info->fw = adev->gfx.mec_fw;
1094 adev->firmware.fw_size +=
1095 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1096 }
1097
aaa36a97
AD
1098 if (adev->gfx.mec2_fw) {
1099 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1100 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1101 info->fw = adev->gfx.mec2_fw;
1102 header = (const struct common_firmware_header *)info->fw->data;
1103 adev->firmware.fw_size +=
1104 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1105 }
1106
1107 }
1108
1109out:
1110 if (err) {
1111 dev_err(adev->dev,
1112 "gfx8: Failed to load firmware \"%s\"\n",
1113 fw_name);
1114 release_firmware(adev->gfx.pfp_fw);
1115 adev->gfx.pfp_fw = NULL;
1116 release_firmware(adev->gfx.me_fw);
1117 adev->gfx.me_fw = NULL;
1118 release_firmware(adev->gfx.ce_fw);
1119 adev->gfx.ce_fw = NULL;
1120 release_firmware(adev->gfx.rlc_fw);
1121 adev->gfx.rlc_fw = NULL;
1122 release_firmware(adev->gfx.mec_fw);
1123 adev->gfx.mec_fw = NULL;
1124 release_firmware(adev->gfx.mec2_fw);
1125 adev->gfx.mec2_fw = NULL;
1126 }
1127 return err;
1128}
1129
2b6cd977
EH
1130static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1131 volatile u32 *buffer)
1132{
1133 u32 count = 0, i;
1134 const struct cs_section_def *sect = NULL;
1135 const struct cs_extent_def *ext = NULL;
1136
1137 if (adev->gfx.rlc.cs_data == NULL)
1138 return;
1139 if (buffer == NULL)
1140 return;
1141
1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1143 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1144
1145 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1146 buffer[count++] = cpu_to_le32(0x80000000);
1147 buffer[count++] = cpu_to_le32(0x80000000);
1148
1149 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1150 for (ext = sect->section; ext->extent != NULL; ++ext) {
1151 if (sect->id == SECT_CONTEXT) {
1152 buffer[count++] =
1153 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1154 buffer[count++] = cpu_to_le32(ext->reg_index -
1155 PACKET3_SET_CONTEXT_REG_START);
1156 for (i = 0; i < ext->reg_count; i++)
1157 buffer[count++] = cpu_to_le32(ext->extent[i]);
1158 } else {
1159 return;
1160 }
1161 }
1162 }
1163
1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1165 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1166 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1167 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1168 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1169
1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1171 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1172
1173 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1174 buffer[count++] = cpu_to_le32(0);
1175}
1176
fb16007b
AD
1177static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1178{
1179 const __le32 *fw_data;
1180 volatile u32 *dst_ptr;
1181 int me, i, max_me = 4;
1182 u32 bo_offset = 0;
1183 u32 table_offset, table_size;
1184
1185 if (adev->asic_type == CHIP_CARRIZO)
1186 max_me = 5;
1187
1188 /* write the cp table buffer */
1189 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1190 for (me = 0; me < max_me; me++) {
1191 if (me == 0) {
1192 const struct gfx_firmware_header_v1_0 *hdr =
1193 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1194 fw_data = (const __le32 *)
1195 (adev->gfx.ce_fw->data +
1196 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197 table_offset = le32_to_cpu(hdr->jt_offset);
1198 table_size = le32_to_cpu(hdr->jt_size);
1199 } else if (me == 1) {
1200 const struct gfx_firmware_header_v1_0 *hdr =
1201 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1202 fw_data = (const __le32 *)
1203 (adev->gfx.pfp_fw->data +
1204 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205 table_offset = le32_to_cpu(hdr->jt_offset);
1206 table_size = le32_to_cpu(hdr->jt_size);
1207 } else if (me == 2) {
1208 const struct gfx_firmware_header_v1_0 *hdr =
1209 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1210 fw_data = (const __le32 *)
1211 (adev->gfx.me_fw->data +
1212 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1213 table_offset = le32_to_cpu(hdr->jt_offset);
1214 table_size = le32_to_cpu(hdr->jt_size);
1215 } else if (me == 3) {
1216 const struct gfx_firmware_header_v1_0 *hdr =
1217 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1218 fw_data = (const __le32 *)
1219 (adev->gfx.mec_fw->data +
1220 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1221 table_offset = le32_to_cpu(hdr->jt_offset);
1222 table_size = le32_to_cpu(hdr->jt_size);
1223 } else if (me == 4) {
1224 const struct gfx_firmware_header_v1_0 *hdr =
1225 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1226 fw_data = (const __le32 *)
1227 (adev->gfx.mec2_fw->data +
1228 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1229 table_offset = le32_to_cpu(hdr->jt_offset);
1230 table_size = le32_to_cpu(hdr->jt_size);
1231 }
1232
1233 for (i = 0; i < table_size; i ++) {
1234 dst_ptr[bo_offset + i] =
1235 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1236 }
1237
1238 bo_offset += table_size;
1239 }
1240}
1241
2b6cd977
EH
1242static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1243{
1244 int r;
1245
1246 /* clear state block */
1247 if (adev->gfx.rlc.clear_state_obj) {
c81a1a74 1248 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
2b6cd977 1249 if (unlikely(r != 0))
62d2ce4b 1250 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
2b6cd977
EH
1251 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1252 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2b6cd977
EH
1253 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1254 adev->gfx.rlc.clear_state_obj = NULL;
1255 }
fb16007b
AD
1256
1257 /* jump table block */
1258 if (adev->gfx.rlc.cp_table_obj) {
c81a1a74 1259 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
fb16007b
AD
1260 if (unlikely(r != 0))
1261 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1262 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1263 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1264 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1265 adev->gfx.rlc.cp_table_obj = NULL;
1266 }
2b6cd977
EH
1267}
1268
1269static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1270{
1271 volatile u32 *dst_ptr;
1272 u32 dws;
1273 const struct cs_section_def *cs_data;
1274 int r;
1275
1276 adev->gfx.rlc.cs_data = vi_cs_data;
1277
1278 cs_data = adev->gfx.rlc.cs_data;
1279
1280 if (cs_data) {
1281 /* clear state block */
1282 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1283
1284 if (adev->gfx.rlc.clear_state_obj == NULL) {
1285 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1286 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1287 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1288 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
2b6cd977
EH
1289 NULL, NULL,
1290 &adev->gfx.rlc.clear_state_obj);
1291 if (r) {
1292 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1293 gfx_v8_0_rlc_fini(adev);
1294 return r;
1295 }
1296 }
1297 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1298 if (unlikely(r != 0)) {
1299 gfx_v8_0_rlc_fini(adev);
1300 return r;
1301 }
1302 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1303 &adev->gfx.rlc.clear_state_gpu_addr);
1304 if (r) {
1305 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
62d2ce4b 1306 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
2b6cd977
EH
1307 gfx_v8_0_rlc_fini(adev);
1308 return r;
1309 }
1310
1311 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1312 if (r) {
62d2ce4b 1313 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
2b6cd977
EH
1314 gfx_v8_0_rlc_fini(adev);
1315 return r;
1316 }
1317 /* set up the cs buffer */
1318 dst_ptr = adev->gfx.rlc.cs_ptr;
1319 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1320 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1321 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1322 }
1323
fb16007b
AD
1324 if ((adev->asic_type == CHIP_CARRIZO) ||
1325 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1326 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1327 if (adev->gfx.rlc.cp_table_obj == NULL) {
1328 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1329 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1330 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1331 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
fb16007b
AD
1332 NULL, NULL,
1333 &adev->gfx.rlc.cp_table_obj);
1334 if (r) {
1335 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1336 return r;
1337 }
1338 }
1339
1340 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1341 if (unlikely(r != 0)) {
1342 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1343 return r;
1344 }
1345 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1346 &adev->gfx.rlc.cp_table_gpu_addr);
1347 if (r) {
1348 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
62d2ce4b 1349 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
fb16007b
AD
1350 return r;
1351 }
1352 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1353 if (r) {
1354 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1355 return r;
1356 }
1357
1358 cz_init_cp_jump_table(adev);
1359
1360 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1361 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1362 }
1363
2b6cd977
EH
1364 return 0;
1365}
1366
aaa36a97
AD
1367static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1368{
1369 int r;
1370
1371 if (adev->gfx.mec.hpd_eop_obj) {
c81a1a74 1372 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
aaa36a97
AD
1373 if (unlikely(r != 0))
1374 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1375 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1376 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
aaa36a97
AD
1377 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1378 adev->gfx.mec.hpd_eop_obj = NULL;
1379 }
1380}
1381
4e638ae9
XY
1382static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1383 struct amdgpu_ring *ring,
1384 struct amdgpu_irq_src *irq)
1385{
34534610 1386 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4e638ae9
XY
1387 int r = 0;
1388
cdf6adb2
SL
1389 mutex_init(&kiq->ring_mutex);
1390
bffa2280
ML
1391 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1392 if (r)
1393 return r;
880e87e3 1394
4e638ae9
XY
1395 ring->adev = NULL;
1396 ring->ring_obj = NULL;
1397 ring->use_doorbell = true;
1398 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1399 if (adev->gfx.mec2_fw) {
1400 ring->me = 2;
1401 ring->pipe = 0;
1402 } else {
1403 ring->me = 1;
1404 ring->pipe = 1;
1405 }
1406
4e638ae9 1407 ring->queue = 0;
34534610 1408 ring->eop_gpu_addr = kiq->eop_gpu_addr;
4e638ae9
XY
1409 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1410 r = amdgpu_ring_init(adev, ring, 1024,
1411 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1412 if (r)
1413 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1414
1415 return r;
1416}
2d0806ca 1417
4e638ae9
XY
1418static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1419 struct amdgpu_irq_src *irq)
1420{
bffa2280 1421 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
4e638ae9 1422 amdgpu_ring_fini(ring);
4e638ae9
XY
1423}
1424
aaa36a97
AD
1425static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1426{
1427 int r;
1428 u32 *hpd;
1429
1430 /*
1431 * we assign only 1 pipe because all other pipes will
1432 * be handled by KFD
1433 */
1434 adev->gfx.mec.num_mec = 1;
1435 adev->gfx.mec.num_pipe = 1;
1436 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1437
1438 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1439 r = amdgpu_bo_create(adev,
268cb4c7 1440 adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE,
aaa36a97 1441 PAGE_SIZE, true,
72d7668b 1442 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1443 &adev->gfx.mec.hpd_eop_obj);
1444 if (r) {
1445 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1446 return r;
1447 }
1448 }
1449
1450 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1451 if (unlikely(r != 0)) {
1452 gfx_v8_0_mec_fini(adev);
1453 return r;
1454 }
1455 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1456 &adev->gfx.mec.hpd_eop_gpu_addr);
1457 if (r) {
1458 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1459 gfx_v8_0_mec_fini(adev);
1460 return r;
1461 }
1462 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1463 if (r) {
1464 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1465 gfx_v8_0_mec_fini(adev);
1466 return r;
1467 }
1468
268cb4c7 1469 memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE);
aaa36a97
AD
1470
1471 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1472 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1473
1474 return 0;
1475}
1476
4e638ae9
XY
1477static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1478{
1479 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1480
1481 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
4e638ae9
XY
1482}
1483
1484static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1485{
1486 int r;
1487 u32 *hpd;
1488 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1489
268cb4c7 1490 r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE,
4e638ae9
XY
1491 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1492 &kiq->eop_gpu_addr, (void **)&hpd);
1493 if (r) {
1494 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1495 return r;
1496 }
1497
268cb4c7 1498 memset(hpd, 0, GFX8_MEC_HPD_SIZE);
4e638ae9 1499
c81a1a74 1500 r = amdgpu_bo_reserve(kiq->eop_obj, true);
f2effd49
AD
1501 if (unlikely(r != 0))
1502 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
4e638ae9 1503 amdgpu_bo_kunmap(kiq->eop_obj);
f2effd49 1504 amdgpu_bo_unreserve(kiq->eop_obj);
4e638ae9
XY
1505
1506 return 0;
1507}
1508
ccba7691
AD
1509static const u32 vgpr_init_compute_shader[] =
1510{
1511 0x7e000209, 0x7e020208,
1512 0x7e040207, 0x7e060206,
1513 0x7e080205, 0x7e0a0204,
1514 0x7e0c0203, 0x7e0e0202,
1515 0x7e100201, 0x7e120200,
1516 0x7e140209, 0x7e160208,
1517 0x7e180207, 0x7e1a0206,
1518 0x7e1c0205, 0x7e1e0204,
1519 0x7e200203, 0x7e220202,
1520 0x7e240201, 0x7e260200,
1521 0x7e280209, 0x7e2a0208,
1522 0x7e2c0207, 0x7e2e0206,
1523 0x7e300205, 0x7e320204,
1524 0x7e340203, 0x7e360202,
1525 0x7e380201, 0x7e3a0200,
1526 0x7e3c0209, 0x7e3e0208,
1527 0x7e400207, 0x7e420206,
1528 0x7e440205, 0x7e460204,
1529 0x7e480203, 0x7e4a0202,
1530 0x7e4c0201, 0x7e4e0200,
1531 0x7e500209, 0x7e520208,
1532 0x7e540207, 0x7e560206,
1533 0x7e580205, 0x7e5a0204,
1534 0x7e5c0203, 0x7e5e0202,
1535 0x7e600201, 0x7e620200,
1536 0x7e640209, 0x7e660208,
1537 0x7e680207, 0x7e6a0206,
1538 0x7e6c0205, 0x7e6e0204,
1539 0x7e700203, 0x7e720202,
1540 0x7e740201, 0x7e760200,
1541 0x7e780209, 0x7e7a0208,
1542 0x7e7c0207, 0x7e7e0206,
1543 0xbf8a0000, 0xbf810000,
1544};
1545
1546static const u32 sgpr_init_compute_shader[] =
1547{
1548 0xbe8a0100, 0xbe8c0102,
1549 0xbe8e0104, 0xbe900106,
1550 0xbe920108, 0xbe940100,
1551 0xbe960102, 0xbe980104,
1552 0xbe9a0106, 0xbe9c0108,
1553 0xbe9e0100, 0xbea00102,
1554 0xbea20104, 0xbea40106,
1555 0xbea60108, 0xbea80100,
1556 0xbeaa0102, 0xbeac0104,
1557 0xbeae0106, 0xbeb00108,
1558 0xbeb20100, 0xbeb40102,
1559 0xbeb60104, 0xbeb80106,
1560 0xbeba0108, 0xbebc0100,
1561 0xbebe0102, 0xbec00104,
1562 0xbec20106, 0xbec40108,
1563 0xbec60100, 0xbec80102,
1564 0xbee60004, 0xbee70005,
1565 0xbeea0006, 0xbeeb0007,
1566 0xbee80008, 0xbee90009,
1567 0xbefc0000, 0xbf8a0000,
1568 0xbf810000, 0x00000000,
1569};
1570
1571static const u32 vgpr_init_regs[] =
1572{
1573 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1574 mmCOMPUTE_RESOURCE_LIMITS, 0,
1575 mmCOMPUTE_NUM_THREAD_X, 256*4,
1576 mmCOMPUTE_NUM_THREAD_Y, 1,
1577 mmCOMPUTE_NUM_THREAD_Z, 1,
1578 mmCOMPUTE_PGM_RSRC2, 20,
1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1589};
1590
1591static const u32 sgpr1_init_regs[] =
1592{
1593 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1594 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1595 mmCOMPUTE_NUM_THREAD_X, 256*5,
1596 mmCOMPUTE_NUM_THREAD_Y, 1,
1597 mmCOMPUTE_NUM_THREAD_Z, 1,
1598 mmCOMPUTE_PGM_RSRC2, 20,
1599 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1600 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1601 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1602 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1603 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1604 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1605 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1606 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1607 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1608 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1609};
1610
1611static const u32 sgpr2_init_regs[] =
1612{
1613 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1614 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1615 mmCOMPUTE_NUM_THREAD_X, 256*5,
1616 mmCOMPUTE_NUM_THREAD_Y, 1,
1617 mmCOMPUTE_NUM_THREAD_Z, 1,
1618 mmCOMPUTE_PGM_RSRC2, 20,
1619 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1620 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1621 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1622 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1623 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1624 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1625 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1626 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1627 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1628 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1629};
1630
1631static const u32 sec_ded_counter_registers[] =
1632{
1633 mmCPC_EDC_ATC_CNT,
1634 mmCPC_EDC_SCRATCH_CNT,
1635 mmCPC_EDC_UCODE_CNT,
1636 mmCPF_EDC_ATC_CNT,
1637 mmCPF_EDC_ROQ_CNT,
1638 mmCPF_EDC_TAG_CNT,
1639 mmCPG_EDC_ATC_CNT,
1640 mmCPG_EDC_DMA_CNT,
1641 mmCPG_EDC_TAG_CNT,
1642 mmDC_EDC_CSINVOC_CNT,
1643 mmDC_EDC_RESTORE_CNT,
1644 mmDC_EDC_STATE_CNT,
1645 mmGDS_EDC_CNT,
1646 mmGDS_EDC_GRBM_CNT,
1647 mmGDS_EDC_OA_DED,
1648 mmSPI_EDC_CNT,
1649 mmSQC_ATC_EDC_GATCL1_CNT,
1650 mmSQC_EDC_CNT,
1651 mmSQ_EDC_DED_CNT,
1652 mmSQ_EDC_INFO,
1653 mmSQ_EDC_SEC_CNT,
1654 mmTCC_EDC_CNT,
1655 mmTCP_ATC_EDC_GATCL1_CNT,
1656 mmTCP_EDC_CNT,
1657 mmTD_EDC_CNT
1658};
1659
1660static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1661{
1662 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1663 struct amdgpu_ib ib;
f54d1867 1664 struct dma_fence *f = NULL;
ccba7691
AD
1665 int r, i;
1666 u32 tmp;
1667 unsigned total_size, vgpr_offset, sgpr_offset;
1668 u64 gpu_addr;
1669
1670 /* only supported on CZ */
1671 if (adev->asic_type != CHIP_CARRIZO)
1672 return 0;
1673
1674 /* bail if the compute ring is not ready */
1675 if (!ring->ready)
1676 return 0;
1677
1678 tmp = RREG32(mmGB_EDC_MODE);
1679 WREG32(mmGB_EDC_MODE, 0);
1680
1681 total_size =
1682 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1683 total_size +=
1684 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1685 total_size +=
1686 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1687 total_size = ALIGN(total_size, 256);
1688 vgpr_offset = total_size;
1689 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1690 sgpr_offset = total_size;
1691 total_size += sizeof(sgpr_init_compute_shader);
1692
1693 /* allocate an indirect buffer to put the commands in */
1694 memset(&ib, 0, sizeof(ib));
b07c60c0 1695 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1696 if (r) {
1697 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1698 return r;
1699 }
1700
1701 /* load the compute shaders */
1702 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1703 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1704
1705 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1706 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1707
1708 /* init the ib length to 0 */
1709 ib.length_dw = 0;
1710
1711 /* VGPR */
1712 /* write the register state for the compute dispatch */
1713 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1714 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1715 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1716 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1717 }
1718 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1719 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1721 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1722 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1723 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1724
1725 /* write dispatch packet */
1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1727 ib.ptr[ib.length_dw++] = 8; /* x */
1728 ib.ptr[ib.length_dw++] = 1; /* y */
1729 ib.ptr[ib.length_dw++] = 1; /* z */
1730 ib.ptr[ib.length_dw++] =
1731 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1732
1733 /* write CS partial flush packet */
1734 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1735 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1736
1737 /* SGPR1 */
1738 /* write the register state for the compute dispatch */
1739 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1740 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1741 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1742 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1743 }
1744 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1745 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1747 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1748 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1749 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1750
1751 /* write dispatch packet */
1752 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1753 ib.ptr[ib.length_dw++] = 8; /* x */
1754 ib.ptr[ib.length_dw++] = 1; /* y */
1755 ib.ptr[ib.length_dw++] = 1; /* z */
1756 ib.ptr[ib.length_dw++] =
1757 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1758
1759 /* write CS partial flush packet */
1760 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1761 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1762
1763 /* SGPR2 */
1764 /* write the register state for the compute dispatch */
1765 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1766 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1767 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1768 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1769 }
1770 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1771 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1772 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1773 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1774 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1775 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1776
1777 /* write dispatch packet */
1778 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1779 ib.ptr[ib.length_dw++] = 8; /* x */
1780 ib.ptr[ib.length_dw++] = 1; /* y */
1781 ib.ptr[ib.length_dw++] = 1; /* z */
1782 ib.ptr[ib.length_dw++] =
1783 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1784
1785 /* write CS partial flush packet */
1786 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1787 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1788
1789 /* shedule the ib on the ring */
50ddc75e 1790 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1791 if (r) {
1792 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1793 goto fail;
1794 }
1795
1796 /* wait for the GPU to finish processing the IB */
f54d1867 1797 r = dma_fence_wait(f, false);
ccba7691
AD
1798 if (r) {
1799 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1800 goto fail;
1801 }
1802
1803 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1804 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1805 WREG32(mmGB_EDC_MODE, tmp);
1806
1807 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1808 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1809 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1810
1811
1812 /* read back registers to clear the counters */
1813 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1814 RREG32(sec_ded_counter_registers[i]);
1815
1816fail:
cc55c45d 1817 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1818 dma_fence_put(f);
ccba7691
AD
1819
1820 return r;
1821}
1822
68182d90 1823static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1824{
1825 u32 gb_addr_config;
1826 u32 mc_shared_chmap, mc_arb_ramcfg;
1827 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1828 u32 tmp;
68182d90 1829 int ret;
0bde3a95
AD
1830
1831 switch (adev->asic_type) {
1832 case CHIP_TOPAZ:
1833 adev->gfx.config.max_shader_engines = 1;
1834 adev->gfx.config.max_tile_pipes = 2;
1835 adev->gfx.config.max_cu_per_sh = 6;
1836 adev->gfx.config.max_sh_per_se = 1;
1837 adev->gfx.config.max_backends_per_se = 2;
1838 adev->gfx.config.max_texture_channel_caches = 2;
1839 adev->gfx.config.max_gprs = 256;
1840 adev->gfx.config.max_gs_threads = 32;
1841 adev->gfx.config.max_hw_contexts = 8;
1842
1843 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1844 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1845 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1846 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1847 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1848 break;
1849 case CHIP_FIJI:
1850 adev->gfx.config.max_shader_engines = 4;
1851 adev->gfx.config.max_tile_pipes = 16;
1852 adev->gfx.config.max_cu_per_sh = 16;
1853 adev->gfx.config.max_sh_per_se = 1;
1854 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1855 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1856 adev->gfx.config.max_gprs = 256;
1857 adev->gfx.config.max_gs_threads = 32;
1858 adev->gfx.config.max_hw_contexts = 8;
1859
68182d90
FC
1860 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1861 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1862 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1863 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1864 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1865 break;
2cc0c0b5 1866 case CHIP_POLARIS11:
c4642a47 1867 case CHIP_POLARIS12:
68182d90
FC
1868 ret = amdgpu_atombios_get_gfx_info(adev);
1869 if (ret)
1870 return ret;
1871 adev->gfx.config.max_gprs = 256;
1872 adev->gfx.config.max_gs_threads = 32;
1873 adev->gfx.config.max_hw_contexts = 8;
1874
1875 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1879 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1880 break;
2cc0c0b5 1881 case CHIP_POLARIS10:
68182d90
FC
1882 ret = amdgpu_atombios_get_gfx_info(adev);
1883 if (ret)
1884 return ret;
1885 adev->gfx.config.max_gprs = 256;
1886 adev->gfx.config.max_gs_threads = 32;
1887 adev->gfx.config.max_hw_contexts = 8;
1888
0bde3a95
AD
1889 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1893 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1894 break;
1895 case CHIP_TONGA:
1896 adev->gfx.config.max_shader_engines = 4;
1897 adev->gfx.config.max_tile_pipes = 8;
1898 adev->gfx.config.max_cu_per_sh = 8;
1899 adev->gfx.config.max_sh_per_se = 1;
1900 adev->gfx.config.max_backends_per_se = 2;
1901 adev->gfx.config.max_texture_channel_caches = 8;
1902 adev->gfx.config.max_gprs = 256;
1903 adev->gfx.config.max_gs_threads = 32;
1904 adev->gfx.config.max_hw_contexts = 8;
1905
1906 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1907 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1908 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1909 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1910 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1911 break;
1912 case CHIP_CARRIZO:
1913 adev->gfx.config.max_shader_engines = 1;
1914 adev->gfx.config.max_tile_pipes = 2;
1915 adev->gfx.config.max_sh_per_se = 1;
1916 adev->gfx.config.max_backends_per_se = 2;
1917
1918 switch (adev->pdev->revision) {
1919 case 0xc4:
1920 case 0x84:
1921 case 0xc8:
1922 case 0xcc:
b8b339ea
AD
1923 case 0xe1:
1924 case 0xe3:
0bde3a95
AD
1925 /* B10 */
1926 adev->gfx.config.max_cu_per_sh = 8;
1927 break;
1928 case 0xc5:
1929 case 0x81:
1930 case 0x85:
1931 case 0xc9:
1932 case 0xcd:
b8b339ea
AD
1933 case 0xe2:
1934 case 0xe4:
0bde3a95
AD
1935 /* B8 */
1936 adev->gfx.config.max_cu_per_sh = 6;
1937 break;
1938 case 0xc6:
1939 case 0xca:
1940 case 0xce:
b8b339ea 1941 case 0x88:
80112bff 1942 case 0xe6:
0bde3a95
AD
1943 /* B6 */
1944 adev->gfx.config.max_cu_per_sh = 6;
1945 break;
1946 case 0xc7:
1947 case 0x87:
1948 case 0xcb:
b8b339ea
AD
1949 case 0xe5:
1950 case 0x89:
0bde3a95
AD
1951 default:
1952 /* B4 */
1953 adev->gfx.config.max_cu_per_sh = 4;
1954 break;
1955 }
1956
1957 adev->gfx.config.max_texture_channel_caches = 2;
1958 adev->gfx.config.max_gprs = 256;
1959 adev->gfx.config.max_gs_threads = 32;
1960 adev->gfx.config.max_hw_contexts = 8;
1961
e3c7656c
SL
1962 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1963 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1964 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1965 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1966 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1967 break;
1968 case CHIP_STONEY:
1969 adev->gfx.config.max_shader_engines = 1;
1970 adev->gfx.config.max_tile_pipes = 2;
1971 adev->gfx.config.max_sh_per_se = 1;
1972 adev->gfx.config.max_backends_per_se = 1;
1973
1974 switch (adev->pdev->revision) {
80112bff
AD
1975 case 0x80:
1976 case 0x81:
e3c7656c
SL
1977 case 0xc0:
1978 case 0xc1:
1979 case 0xc2:
1980 case 0xc4:
1981 case 0xc8:
1982 case 0xc9:
80112bff
AD
1983 case 0xd6:
1984 case 0xda:
1985 case 0xe9:
1986 case 0xea:
e3c7656c
SL
1987 adev->gfx.config.max_cu_per_sh = 3;
1988 break;
80112bff 1989 case 0x83:
e3c7656c
SL
1990 case 0xd0:
1991 case 0xd1:
1992 case 0xd2:
80112bff
AD
1993 case 0xd4:
1994 case 0xdb:
1995 case 0xe1:
1996 case 0xe2:
e3c7656c
SL
1997 default:
1998 adev->gfx.config.max_cu_per_sh = 2;
1999 break;
2000 }
2001
2002 adev->gfx.config.max_texture_channel_caches = 2;
2003 adev->gfx.config.max_gprs = 256;
2004 adev->gfx.config.max_gs_threads = 16;
2005 adev->gfx.config.max_hw_contexts = 8;
2006
0bde3a95
AD
2007 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2008 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2009 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2010 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2011 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
2012 break;
2013 default:
2014 adev->gfx.config.max_shader_engines = 2;
2015 adev->gfx.config.max_tile_pipes = 4;
2016 adev->gfx.config.max_cu_per_sh = 2;
2017 adev->gfx.config.max_sh_per_se = 1;
2018 adev->gfx.config.max_backends_per_se = 2;
2019 adev->gfx.config.max_texture_channel_caches = 4;
2020 adev->gfx.config.max_gprs = 256;
2021 adev->gfx.config.max_gs_threads = 32;
2022 adev->gfx.config.max_hw_contexts = 8;
2023
2024 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2025 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2026 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2027 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2028 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2029 break;
2030 }
2031
2032 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2033 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2034 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2035
2036 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2037 adev->gfx.config.mem_max_burst_length_bytes = 256;
2038 if (adev->flags & AMD_IS_APU) {
2039 /* Get memory bank mapping mode. */
2040 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2041 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2042 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2043
2044 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2045 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2046 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2047
2048 /* Validate settings in case only one DIMM installed. */
2049 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2050 dimm00_addr_map = 0;
2051 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2052 dimm01_addr_map = 0;
2053 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2054 dimm10_addr_map = 0;
2055 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2056 dimm11_addr_map = 0;
2057
2058 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2059 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2060 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2061 adev->gfx.config.mem_row_size_in_kb = 2;
2062 else
2063 adev->gfx.config.mem_row_size_in_kb = 1;
2064 } else {
2065 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2066 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2067 if (adev->gfx.config.mem_row_size_in_kb > 4)
2068 adev->gfx.config.mem_row_size_in_kb = 4;
2069 }
2070
2071 adev->gfx.config.shader_engine_tile_size = 32;
2072 adev->gfx.config.num_gpus = 1;
2073 adev->gfx.config.multi_gpu_tile_size = 64;
2074
2075 /* fix up row size */
2076 switch (adev->gfx.config.mem_row_size_in_kb) {
2077 case 1:
2078 default:
2079 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2080 break;
2081 case 2:
2082 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2083 break;
2084 case 4:
2085 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2086 break;
2087 }
2088 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
2089
2090 return 0;
0bde3a95
AD
2091}
2092
5fc3aeeb 2093static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
2094{
2095 int i, r;
2096 struct amdgpu_ring *ring;
4e638ae9 2097 struct amdgpu_kiq *kiq;
5fc3aeeb 2098 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2099
4e638ae9 2100 /* KIQ event */
d766e6a3 2101 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2102 if (r)
2103 return r;
2104
aaa36a97 2105 /* EOP Event */
d766e6a3 2106 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2107 if (r)
2108 return r;
2109
2110 /* Privileged reg */
d766e6a3
AD
2111 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2112 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2113 if (r)
2114 return r;
2115
2116 /* Privileged inst */
d766e6a3
AD
2117 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2118 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2119 if (r)
2120 return r;
2121
2122 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2123
2124 gfx_v8_0_scratch_init(adev);
2125
2126 r = gfx_v8_0_init_microcode(adev);
2127 if (r) {
2128 DRM_ERROR("Failed to load gfx firmware!\n");
2129 return r;
2130 }
2131
2b6cd977
EH
2132 r = gfx_v8_0_rlc_init(adev);
2133 if (r) {
2134 DRM_ERROR("Failed to init rlc BOs!\n");
2135 return r;
2136 }
2137
aaa36a97
AD
2138 r = gfx_v8_0_mec_init(adev);
2139 if (r) {
2140 DRM_ERROR("Failed to init MEC BOs!\n");
2141 return r;
2142 }
2143
aaa36a97
AD
2144 /* set up the gfx ring */
2145 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2146 ring = &adev->gfx.gfx_ring[i];
2147 ring->ring_obj = NULL;
2148 sprintf(ring->name, "gfx");
2149 /* no gfx doorbells on iceland */
2150 if (adev->asic_type != CHIP_TOPAZ) {
2151 ring->use_doorbell = true;
2152 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2153 }
2154
79887142
CK
2155 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2156 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2157 if (r)
2158 return r;
2159 }
2160
2161 /* set up the compute queues */
2162 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2163 unsigned irq_type;
2164
2165 /* max 32 queues per MEC */
2166 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2167 DRM_ERROR("Too many (%d) compute rings!\n", i);
2168 break;
2169 }
2170 ring = &adev->gfx.compute_ring[i];
2171 ring->ring_obj = NULL;
2172 ring->use_doorbell = true;
2173 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2174 ring->me = 1; /* first MEC */
2175 ring->pipe = i / 8;
2176 ring->queue = i % 8;
268cb4c7 2177 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE);
771c8ec1 2178 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
aaa36a97
AD
2179 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2180 /* type-2 packets are deprecated on MEC, use type-3 instead */
79887142
CK
2181 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2182 irq_type);
aaa36a97
AD
2183 if (r)
2184 return r;
2185 }
2186
b4e40676
DP
2187 r = gfx_v8_0_kiq_init(adev);
2188 if (r) {
2189 DRM_ERROR("Failed to init KIQ BOs!\n");
2190 return r;
2191 }
596c67d0 2192
b4e40676
DP
2193 kiq = &adev->gfx.kiq;
2194 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2195 if (r)
2196 return r;
596c67d0 2197
b4e40676
DP
2198 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2199 r = gfx_v8_0_compute_mqd_sw_init(adev);
2200 if (r)
2201 return r;
596c67d0 2202
aaa36a97 2203 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2204 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2205 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2206 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2207 if (r)
2208 return r;
2209
78bbbd9c
CK
2210 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2211 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2212 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2213 if (r)
2214 return r;
2215
78bbbd9c
CK
2216 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2217 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2218 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2219 if (r)
2220 return r;
2221
a101a899
KW
2222 adev->gfx.ce_ram_size = 0x8000;
2223
68182d90
FC
2224 r = gfx_v8_0_gpu_early_init(adev);
2225 if (r)
2226 return r;
0bde3a95 2227
aaa36a97
AD
2228 return 0;
2229}
2230
5fc3aeeb 2231static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2232{
2233 int i;
5fc3aeeb 2234 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2235
8640faed
JZ
2236 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2237 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2238 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2239
2240 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2241 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2242 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2243 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2244
b4e40676
DP
2245 gfx_v8_0_compute_mqd_sw_fini(adev);
2246 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2247 gfx_v8_0_kiq_fini(adev);
596c67d0 2248
aaa36a97 2249 gfx_v8_0_mec_fini(adev);
2b6cd977 2250 gfx_v8_0_rlc_fini(adev);
13331ac3 2251 gfx_v8_0_free_microcode(adev);
2b6cd977 2252
aaa36a97
AD
2253 return 0;
2254}
2255
2256static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2257{
90bea0ab 2258 uint32_t *modearray, *mod2array;
eb64526f
TSD
2259 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2260 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2261 u32 reg_offset;
aaa36a97 2262
90bea0ab
TSD
2263 modearray = adev->gfx.config.tile_mode_array;
2264 mod2array = adev->gfx.config.macrotile_mode_array;
2265
2266 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2267 modearray[reg_offset] = 0;
2268
2269 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2270 mod2array[reg_offset] = 0;
aaa36a97
AD
2271
2272 switch (adev->asic_type) {
2273 case CHIP_TOPAZ:
90bea0ab
TSD
2274 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P2) |
2276 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2278 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2282 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2290 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2303 PIPE_CONFIG(ADDR_SURF_P2));
2304 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2305 PIPE_CONFIG(ADDR_SURF_P2) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2308 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 PIPE_CONFIG(ADDR_SURF_P2) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2313 PIPE_CONFIG(ADDR_SURF_P2) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2316 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317 PIPE_CONFIG(ADDR_SURF_P2) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 PIPE_CONFIG(ADDR_SURF_P2) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2324 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2325 PIPE_CONFIG(ADDR_SURF_P2) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2329 PIPE_CONFIG(ADDR_SURF_P2) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2332 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2333 PIPE_CONFIG(ADDR_SURF_P2) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2336 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2340 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2341 PIPE_CONFIG(ADDR_SURF_P2) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2344 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2345 PIPE_CONFIG(ADDR_SURF_P2) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2348 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2349 PIPE_CONFIG(ADDR_SURF_P2) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2352 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2353 PIPE_CONFIG(ADDR_SURF_P2) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2357 PIPE_CONFIG(ADDR_SURF_P2) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2361 PIPE_CONFIG(ADDR_SURF_P2) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P2) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P2) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P2) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376
2377 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2380 NUM_BANKS(ADDR_SURF_8_BANK));
2381 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2384 NUM_BANKS(ADDR_SURF_8_BANK));
2385 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2388 NUM_BANKS(ADDR_SURF_8_BANK));
2389 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2392 NUM_BANKS(ADDR_SURF_8_BANK));
2393 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2396 NUM_BANKS(ADDR_SURF_8_BANK));
2397 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 NUM_BANKS(ADDR_SURF_8_BANK));
2401 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2404 NUM_BANKS(ADDR_SURF_8_BANK));
2405 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2408 NUM_BANKS(ADDR_SURF_16_BANK));
2409 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2412 NUM_BANKS(ADDR_SURF_16_BANK));
2413 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2416 NUM_BANKS(ADDR_SURF_16_BANK));
2417 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2420 NUM_BANKS(ADDR_SURF_16_BANK));
2421 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2424 NUM_BANKS(ADDR_SURF_16_BANK));
2425 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2428 NUM_BANKS(ADDR_SURF_16_BANK));
2429 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432 NUM_BANKS(ADDR_SURF_8_BANK));
2433
2434 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2435 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2436 reg_offset != 23)
2437 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2438
2439 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2440 if (reg_offset != 7)
2441 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2442
8cdacf44 2443 break;
af15a2d5 2444 case CHIP_FIJI:
90bea0ab
TSD
2445 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2449 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2451 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2453 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2455 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2457 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2461 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2465 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2469 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2473 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2477 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2478 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2479 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2480 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2481 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2483 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2485 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2489 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2491 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2495 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2499 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2504 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2513 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2516 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2517 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2518 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2519 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2520 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2521 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2522 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2523 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2524 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2525 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2526 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2527 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2528 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2529 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2530 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2531 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2532 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2533 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2534 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2535 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2536 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2537 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2538 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2539 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2540 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2541 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2542 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2543 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2544 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2545 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2546 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2547 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2548 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2549 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2551 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2553 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2557 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2560 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2561 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2563 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2564 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2567
2568 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2571 NUM_BANKS(ADDR_SURF_8_BANK));
2572 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2574 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2575 NUM_BANKS(ADDR_SURF_8_BANK));
2576 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2579 NUM_BANKS(ADDR_SURF_8_BANK));
2580 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2583 NUM_BANKS(ADDR_SURF_8_BANK));
2584 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2586 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2587 NUM_BANKS(ADDR_SURF_8_BANK));
2588 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2590 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2591 NUM_BANKS(ADDR_SURF_8_BANK));
2592 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2594 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2595 NUM_BANKS(ADDR_SURF_8_BANK));
2596 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2598 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599 NUM_BANKS(ADDR_SURF_8_BANK));
2600 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2602 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603 NUM_BANKS(ADDR_SURF_8_BANK));
2604 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2606 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2607 NUM_BANKS(ADDR_SURF_8_BANK));
2608 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611 NUM_BANKS(ADDR_SURF_8_BANK));
2612 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2614 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2615 NUM_BANKS(ADDR_SURF_8_BANK));
2616 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2619 NUM_BANKS(ADDR_SURF_8_BANK));
2620 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2623 NUM_BANKS(ADDR_SURF_4_BANK));
2624
2625 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2626 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2627
2628 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2629 if (reg_offset != 7)
2630 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2631
5f2e816b 2632 break;
aaa36a97 2633 case CHIP_TONGA:
90bea0ab
TSD
2634 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2636 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2638 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2640 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2642 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2646 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2649 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2650 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2654 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2658 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2660 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2661 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2662 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2663 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2665 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2666 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2667 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2668 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2669 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2670 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2674 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2676 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2677 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2678 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2680 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2684 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2686 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2690 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2693 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2694 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2698 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2700 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2701 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2704 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2705 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2707 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2708 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2709 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2710 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2712 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2713 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2715 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2716 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2717 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2718 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2719 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2720 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2721 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2722 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2724 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2725 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2727 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2728 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2729 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2731 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2732 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2733 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2736 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2737 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2738 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2740 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2741 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2742 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2746 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2749 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2750 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2751 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2752 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2753 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2756
2757 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2760 NUM_BANKS(ADDR_SURF_16_BANK));
2761 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2764 NUM_BANKS(ADDR_SURF_16_BANK));
2765 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768 NUM_BANKS(ADDR_SURF_16_BANK));
2769 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2770 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2771 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2772 NUM_BANKS(ADDR_SURF_16_BANK));
2773 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776 NUM_BANKS(ADDR_SURF_16_BANK));
2777 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2779 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2780 NUM_BANKS(ADDR_SURF_16_BANK));
2781 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2784 NUM_BANKS(ADDR_SURF_16_BANK));
2785 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2788 NUM_BANKS(ADDR_SURF_16_BANK));
2789 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792 NUM_BANKS(ADDR_SURF_16_BANK));
2793 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2795 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2796 NUM_BANKS(ADDR_SURF_16_BANK));
2797 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2799 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2800 NUM_BANKS(ADDR_SURF_16_BANK));
2801 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2804 NUM_BANKS(ADDR_SURF_8_BANK));
2805 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2808 NUM_BANKS(ADDR_SURF_4_BANK));
2809 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2812 NUM_BANKS(ADDR_SURF_4_BANK));
2813
2814 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2815 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2816
2817 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2818 if (reg_offset != 7)
2819 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2820
68182d90 2821 break;
2cc0c0b5 2822 case CHIP_POLARIS11:
c4642a47 2823 case CHIP_POLARIS12:
68182d90
FC
2824 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2825 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2826 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2827 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2828 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2831 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2832 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2835 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2836 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2837 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2839 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2840 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2843 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2844 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2847 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2848 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2851 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2852 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2853 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2855 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2856 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2857 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2858 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2859 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2866 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2867 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2870 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2874 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2875 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2878 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2882 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2883 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2884 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2888 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2890 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2891 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2894 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2895 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2896 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2897 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2898 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2899 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2900 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2902 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2903 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2904 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2905 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2906 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2907 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2908 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2909 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2910 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2911 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2912 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2913 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2914 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2915 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2916 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2917 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2918 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2919 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2922 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2923 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2924 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2925 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2926 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2927 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2928 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2929 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2930 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2936 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2940 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2944 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946
2947 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950 NUM_BANKS(ADDR_SURF_16_BANK));
2951
2952 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2955 NUM_BANKS(ADDR_SURF_16_BANK));
2956
2957 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 NUM_BANKS(ADDR_SURF_16_BANK));
2966
2967 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2970 NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2975 NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2980 NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985 NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 NUM_BANKS(ADDR_SURF_16_BANK));
2996
2997 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000 NUM_BANKS(ADDR_SURF_16_BANK));
3001
3002 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3005 NUM_BANKS(ADDR_SURF_16_BANK));
3006
3007 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010 NUM_BANKS(ADDR_SURF_8_BANK));
3011
3012 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015 NUM_BANKS(ADDR_SURF_4_BANK));
3016
3017 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3019
3020 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3021 if (reg_offset != 7)
3022 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3023
3024 break;
2cc0c0b5 3025 case CHIP_POLARIS10:
68182d90
FC
3026 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3029 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3030 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3032 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3033 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3034 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3035 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3036 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3037 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3038 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3041 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3042 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3045 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3046 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3047 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3048 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3049 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3050 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3051 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3052 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3053 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3054 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3055 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3056 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3057 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3058 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3059 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3060 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3061 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3062 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3064 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3065 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3066 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3068 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3069 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3070 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3072 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3073 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3074 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3076 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3078 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3082 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3084 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3086 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3088 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3089 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3090 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3092 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3093 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3094 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3096 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3097 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3098 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3099 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3100 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3101 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3102 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3104 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3105 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3106 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3108 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3109 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3110 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3112 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3113 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3114 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3116 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3117 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3118 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3120 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3121 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3122 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3124 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3125 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3126 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3128 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3129 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3130 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3132 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3133 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3134 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3138 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3141 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3142 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3144 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3145 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3146 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3148
3149 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3152 NUM_BANKS(ADDR_SURF_16_BANK));
3153
3154 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3157 NUM_BANKS(ADDR_SURF_16_BANK));
3158
3159 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3162 NUM_BANKS(ADDR_SURF_16_BANK));
3163
3164 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3167 NUM_BANKS(ADDR_SURF_16_BANK));
3168
3169 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3172 NUM_BANKS(ADDR_SURF_16_BANK));
3173
3174 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3177 NUM_BANKS(ADDR_SURF_16_BANK));
3178
3179 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3182 NUM_BANKS(ADDR_SURF_16_BANK));
3183
3184 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3187 NUM_BANKS(ADDR_SURF_16_BANK));
3188
3189 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3192 NUM_BANKS(ADDR_SURF_16_BANK));
3193
3194 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3197 NUM_BANKS(ADDR_SURF_16_BANK));
3198
3199 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3202 NUM_BANKS(ADDR_SURF_16_BANK));
3203
3204 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3207 NUM_BANKS(ADDR_SURF_8_BANK));
3208
3209 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3212 NUM_BANKS(ADDR_SURF_4_BANK));
3213
3214 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3217 NUM_BANKS(ADDR_SURF_4_BANK));
3218
3219 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3220 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3221
3222 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3223 if (reg_offset != 7)
3224 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3225
aaa36a97 3226 break;
e3c7656c 3227 case CHIP_STONEY:
90bea0ab
TSD
3228 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229 PIPE_CONFIG(ADDR_SURF_P2) |
3230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233 PIPE_CONFIG(ADDR_SURF_P2) |
3234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3244 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3248 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3252 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3256 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3257 PIPE_CONFIG(ADDR_SURF_P2));
3258 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3259 PIPE_CONFIG(ADDR_SURF_P2) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3263 PIPE_CONFIG(ADDR_SURF_P2) |
3264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267 PIPE_CONFIG(ADDR_SURF_P2) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3270 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271 PIPE_CONFIG(ADDR_SURF_P2) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3278 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3279 PIPE_CONFIG(ADDR_SURF_P2) |
3280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3282 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3283 PIPE_CONFIG(ADDR_SURF_P2) |
3284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3286 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3287 PIPE_CONFIG(ADDR_SURF_P2) |
3288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3291 PIPE_CONFIG(ADDR_SURF_P2) |
3292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3295 PIPE_CONFIG(ADDR_SURF_P2) |
3296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3299 PIPE_CONFIG(ADDR_SURF_P2) |
3300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3303 PIPE_CONFIG(ADDR_SURF_P2) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3306 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3310 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3314 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3315 PIPE_CONFIG(ADDR_SURF_P2) |
3316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3318 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3319 PIPE_CONFIG(ADDR_SURF_P2) |
3320 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3322 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3323 PIPE_CONFIG(ADDR_SURF_P2) |
3324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3326 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3327 PIPE_CONFIG(ADDR_SURF_P2) |
3328 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3330
3331 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3334 NUM_BANKS(ADDR_SURF_8_BANK));
3335 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3338 NUM_BANKS(ADDR_SURF_8_BANK));
3339 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342 NUM_BANKS(ADDR_SURF_8_BANK));
3343 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3346 NUM_BANKS(ADDR_SURF_8_BANK));
3347 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3350 NUM_BANKS(ADDR_SURF_8_BANK));
3351 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3354 NUM_BANKS(ADDR_SURF_8_BANK));
3355 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3358 NUM_BANKS(ADDR_SURF_8_BANK));
3359 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362 NUM_BANKS(ADDR_SURF_16_BANK));
3363 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366 NUM_BANKS(ADDR_SURF_16_BANK));
3367 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370 NUM_BANKS(ADDR_SURF_16_BANK));
3371 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374 NUM_BANKS(ADDR_SURF_16_BANK));
3375 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3378 NUM_BANKS(ADDR_SURF_16_BANK));
3379 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3382 NUM_BANKS(ADDR_SURF_16_BANK));
3383 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3386 NUM_BANKS(ADDR_SURF_8_BANK));
3387
3388 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3389 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3390 reg_offset != 23)
3391 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3392
3393 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3394 if (reg_offset != 7)
3395 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3396
e3c7656c 3397 break;
aaa36a97 3398 default:
90bea0ab
TSD
3399 dev_warn(adev->dev,
3400 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3401 adev->asic_type);
3402
3403 case CHIP_CARRIZO:
3404 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3405 PIPE_CONFIG(ADDR_SURF_P2) |
3406 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3408 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3409 PIPE_CONFIG(ADDR_SURF_P2) |
3410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3412 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3413 PIPE_CONFIG(ADDR_SURF_P2) |
3414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3416 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3417 PIPE_CONFIG(ADDR_SURF_P2) |
3418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3420 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3421 PIPE_CONFIG(ADDR_SURF_P2) |
3422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3424 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3425 PIPE_CONFIG(ADDR_SURF_P2) |
3426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3428 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3429 PIPE_CONFIG(ADDR_SURF_P2) |
3430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3432 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3433 PIPE_CONFIG(ADDR_SURF_P2));
3434 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3435 PIPE_CONFIG(ADDR_SURF_P2) |
3436 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3438 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3439 PIPE_CONFIG(ADDR_SURF_P2) |
3440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3442 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3443 PIPE_CONFIG(ADDR_SURF_P2) |
3444 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3446 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3447 PIPE_CONFIG(ADDR_SURF_P2) |
3448 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3450 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3451 PIPE_CONFIG(ADDR_SURF_P2) |
3452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3454 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3455 PIPE_CONFIG(ADDR_SURF_P2) |
3456 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3458 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3459 PIPE_CONFIG(ADDR_SURF_P2) |
3460 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3462 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3463 PIPE_CONFIG(ADDR_SURF_P2) |
3464 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3466 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3467 PIPE_CONFIG(ADDR_SURF_P2) |
3468 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3470 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3471 PIPE_CONFIG(ADDR_SURF_P2) |
3472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3474 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3475 PIPE_CONFIG(ADDR_SURF_P2) |
3476 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3478 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3479 PIPE_CONFIG(ADDR_SURF_P2) |
3480 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3482 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3483 PIPE_CONFIG(ADDR_SURF_P2) |
3484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3486 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3487 PIPE_CONFIG(ADDR_SURF_P2) |
3488 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3490 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3491 PIPE_CONFIG(ADDR_SURF_P2) |
3492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3494 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3495 PIPE_CONFIG(ADDR_SURF_P2) |
3496 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3498 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3499 PIPE_CONFIG(ADDR_SURF_P2) |
3500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3502 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3503 PIPE_CONFIG(ADDR_SURF_P2) |
3504 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3506
3507 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3510 NUM_BANKS(ADDR_SURF_8_BANK));
3511 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3514 NUM_BANKS(ADDR_SURF_8_BANK));
3515 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3518 NUM_BANKS(ADDR_SURF_8_BANK));
3519 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3522 NUM_BANKS(ADDR_SURF_8_BANK));
3523 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3526 NUM_BANKS(ADDR_SURF_8_BANK));
3527 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3530 NUM_BANKS(ADDR_SURF_8_BANK));
3531 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3534 NUM_BANKS(ADDR_SURF_8_BANK));
3535 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3538 NUM_BANKS(ADDR_SURF_16_BANK));
3539 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3542 NUM_BANKS(ADDR_SURF_16_BANK));
3543 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3546 NUM_BANKS(ADDR_SURF_16_BANK));
3547 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3550 NUM_BANKS(ADDR_SURF_16_BANK));
3551 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3554 NUM_BANKS(ADDR_SURF_16_BANK));
3555 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3558 NUM_BANKS(ADDR_SURF_16_BANK));
3559 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3562 NUM_BANKS(ADDR_SURF_8_BANK));
3563
3564 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3565 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3566 reg_offset != 23)
3567 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3568
3569 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3570 if (reg_offset != 7)
3571 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3572
3573 break;
aaa36a97
AD
3574 }
3575}
3576
05fb7291 3577static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3578 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3579{
9559ef5b
TSD
3580 u32 data;
3581
3582 if (instance == 0xffffffff)
3583 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3584 else
3585 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3586
5003f278 3587 if (se_num == 0xffffffff)
aaa36a97 3588 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3589 else
aaa36a97 3590 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3591
3592 if (sh_num == 0xffffffff)
3593 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3594 else
aaa36a97 3595 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3596
aaa36a97
AD
3597 WREG32(mmGRBM_GFX_INDEX, data);
3598}
3599
8f8e00c1
AD
3600static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3601{
3602 return (u32)((1ULL << bit_width) - 1);
3603}
3604
3605static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3606{
3607 u32 data, mask;
3608
5003f278
TSD
3609 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3610 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3611
5003f278 3612 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3613
8f8e00c1
AD
3614 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3615 adev->gfx.config.max_sh_per_se);
aaa36a97 3616
8f8e00c1 3617 return (~data) & mask;
aaa36a97
AD
3618}
3619
167ac573
HR
3620static void
3621gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3622{
3623 switch (adev->asic_type) {
3624 case CHIP_FIJI:
3625 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3626 RB_XSEL2(1) | PKR_MAP(2) |
3627 PKR_XSEL(1) | PKR_YSEL(1) |
3628 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3629 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3630 SE_PAIR_YSEL(2);
3631 break;
3632 case CHIP_TONGA:
3633 case CHIP_POLARIS10:
3634 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3635 SE_XSEL(1) | SE_YSEL(1);
3636 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3637 SE_PAIR_YSEL(2);
3638 break;
3639 case CHIP_TOPAZ:
3640 case CHIP_CARRIZO:
3641 *rconf |= RB_MAP_PKR0(2);
3642 *rconf1 |= 0x0;
3643 break;
3644 case CHIP_POLARIS11:
c4642a47 3645 case CHIP_POLARIS12:
167ac573
HR
3646 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3647 SE_XSEL(1) | SE_YSEL(1);
3648 *rconf1 |= 0x0;
3649 break;
3650 case CHIP_STONEY:
3651 *rconf |= 0x0;
3652 *rconf1 |= 0x0;
3653 break;
3654 default:
3655 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3656 break;
3657 }
3658}
3659
3660static void
3661gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3662 u32 raster_config, u32 raster_config_1,
3663 unsigned rb_mask, unsigned num_rb)
3664{
3665 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3666 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3667 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3668 unsigned rb_per_se = num_rb / num_se;
3669 unsigned se_mask[4];
3670 unsigned se;
3671
3672 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3673 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3674 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3675 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3676
3677 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3678 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3679 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3680
3681 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3682 (!se_mask[2] && !se_mask[3]))) {
3683 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3684
3685 if (!se_mask[0] && !se_mask[1]) {
3686 raster_config_1 |=
3687 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3688 } else {
3689 raster_config_1 |=
3690 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3691 }
3692 }
3693
3694 for (se = 0; se < num_se; se++) {
3695 unsigned raster_config_se = raster_config;
3696 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3697 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3698 int idx = (se / 2) * 2;
3699
3700 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3701 raster_config_se &= ~SE_MAP_MASK;
3702
3703 if (!se_mask[idx]) {
3704 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3705 } else {
3706 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3707 }
3708 }
3709
3710 pkr0_mask &= rb_mask;
3711 pkr1_mask &= rb_mask;
3712 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3713 raster_config_se &= ~PKR_MAP_MASK;
3714
3715 if (!pkr0_mask) {
3716 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3717 } else {
3718 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3719 }
3720 }
3721
3722 if (rb_per_se >= 2) {
3723 unsigned rb0_mask = 1 << (se * rb_per_se);
3724 unsigned rb1_mask = rb0_mask << 1;
3725
3726 rb0_mask &= rb_mask;
3727 rb1_mask &= rb_mask;
3728 if (!rb0_mask || !rb1_mask) {
3729 raster_config_se &= ~RB_MAP_PKR0_MASK;
3730
3731 if (!rb0_mask) {
3732 raster_config_se |=
3733 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3734 } else {
3735 raster_config_se |=
3736 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3737 }
3738 }
3739
3740 if (rb_per_se > 2) {
3741 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3742 rb1_mask = rb0_mask << 1;
3743 rb0_mask &= rb_mask;
3744 rb1_mask &= rb_mask;
3745 if (!rb0_mask || !rb1_mask) {
3746 raster_config_se &= ~RB_MAP_PKR1_MASK;
3747
3748 if (!rb0_mask) {
3749 raster_config_se |=
3750 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3751 } else {
3752 raster_config_se |=
3753 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3754 }
3755 }
3756 }
3757 }
3758
3759 /* GRBM_GFX_INDEX has a different offset on VI */
3760 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3761 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3762 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3763 }
3764
3765 /* GRBM_GFX_INDEX has a different offset on VI */
3766 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3767}
3768
8f8e00c1 3769static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3770{
3771 int i, j;
aac1e3ca 3772 u32 data;
167ac573 3773 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3774 u32 active_rbs = 0;
6157bd7a
FC
3775 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3776 adev->gfx.config.max_sh_per_se;
167ac573 3777 unsigned num_rb_pipes;
aaa36a97
AD
3778
3779 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3780 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3782 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3783 data = gfx_v8_0_get_rb_active_bitmap(adev);
3784 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3785 rb_bitmap_width_per_sh);
aaa36a97
AD
3786 }
3787 }
9559ef5b 3788 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3789
8f8e00c1 3790 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3791 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3792
3793 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3794 adev->gfx.config.max_shader_engines, 16);
3795
3796 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3797
3798 if (!adev->gfx.config.backend_enable_mask ||
3799 adev->gfx.config.num_rbs >= num_rb_pipes) {
3800 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3801 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3802 } else {
3803 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3804 adev->gfx.config.backend_enable_mask,
3805 num_rb_pipes);
3806 }
3807
392f0c77
AD
3808 /* cache the values for userspace */
3809 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3810 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3811 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3812 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3813 RREG32(mmCC_RB_BACKEND_DISABLE);
3814 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3815 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3816 adev->gfx.config.rb_config[i][j].raster_config =
3817 RREG32(mmPA_SC_RASTER_CONFIG);
3818 adev->gfx.config.rb_config[i][j].raster_config_1 =
3819 RREG32(mmPA_SC_RASTER_CONFIG_1);
3820 }
3821 }
3822 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3823 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3824}
3825
cd06bf68 3826/**
35c7a952 3827 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
3828 *
3829 * @rdev: amdgpu_device pointer
3830 *
3831 * Initialize compute vmid sh_mem registers
3832 *
3833 */
3834#define DEFAULT_SH_MEM_BASES (0x6000)
3835#define FIRST_COMPUTE_VMID (8)
3836#define LAST_COMPUTE_VMID (16)
35c7a952 3837static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3838{
3839 int i;
3840 uint32_t sh_mem_config;
3841 uint32_t sh_mem_bases;
3842
3843 /*
3844 * Configure apertures:
3845 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3846 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3847 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3848 */
3849 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3850
3851 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3852 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3853 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3854 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3855 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3856 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3857
3858 mutex_lock(&adev->srbm_mutex);
3859 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3860 vi_srbm_select(adev, 0, 0, 0, i);
3861 /* CP and shaders */
3862 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3863 WREG32(mmSH_MEM_APE1_BASE, 1);
3864 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3865 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3866 }
3867 vi_srbm_select(adev, 0, 0, 0, 0);
3868 mutex_unlock(&adev->srbm_mutex);
3869}
3870
df6e2c4a
JZ
3871static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3872{
3873 switch (adev->asic_type) {
3874 default:
3875 adev->gfx.config.double_offchip_lds_buf = 1;
3876 break;
3877 case CHIP_CARRIZO:
3878 case CHIP_STONEY:
3879 adev->gfx.config.double_offchip_lds_buf = 0;
3880 break;
3881 }
3882}
3883
aaa36a97
AD
3884static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3885{
8fe73328 3886 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3887 int i;
3888
61cb8cef 3889 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3890 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3891 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3892 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3893
3894 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3895 gfx_v8_0_setup_rb(adev);
7dae69a2 3896 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3897 gfx_v8_0_config_init(adev);
aaa36a97
AD
3898
3899 /* XXX SH_MEM regs */
3900 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3901 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3902 SWIZZLE_ENABLE, 1);
3903 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3904 ELEMENT_SIZE, 1);
3905 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3906 INDEX_STRIDE, 3);
aaa36a97 3907 mutex_lock(&adev->srbm_mutex);
7645670d 3908 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
aaa36a97
AD
3909 vi_srbm_select(adev, 0, 0, 0, i);
3910 /* CP and shaders */
3911 if (i == 0) {
3912 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3913 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3914 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3915 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3916 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3917 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3918 } else {
3919 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3920 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3921 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3922 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3923 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328
JZ
3924 tmp = adev->mc.shared_aperture_start >> 48;
3925 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3926 }
3927
3928 WREG32(mmSH_MEM_APE1_BASE, 1);
3929 WREG32(mmSH_MEM_APE1_LIMIT, 0);
8fe73328 3930 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
aaa36a97
AD
3931 }
3932 vi_srbm_select(adev, 0, 0, 0, 0);
3933 mutex_unlock(&adev->srbm_mutex);
3934
35c7a952 3935 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3936
aaa36a97
AD
3937 mutex_lock(&adev->grbm_idx_mutex);
3938 /*
3939 * making sure that the following register writes will be broadcasted
3940 * to all the shaders
3941 */
9559ef5b 3942 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3943
3944 WREG32(mmPA_SC_FIFO_SIZE,
3945 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3946 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3947 (adev->gfx.config.sc_prim_fifo_size_backend <<
3948 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3949 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3950 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3951 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3952 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3953
3954 tmp = RREG32(mmSPI_ARB_PRIORITY);
3955 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3956 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3957 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3958 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3959 WREG32(mmSPI_ARB_PRIORITY, tmp);
3960
aaa36a97
AD
3961 mutex_unlock(&adev->grbm_idx_mutex);
3962
3963}
3964
3965static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3966{
3967 u32 i, j, k;
3968 u32 mask;
3969
3970 mutex_lock(&adev->grbm_idx_mutex);
3971 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3972 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3973 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3974 for (k = 0; k < adev->usec_timeout; k++) {
3975 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3976 break;
3977 udelay(1);
3978 }
3979 }
3980 }
9559ef5b 3981 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3982 mutex_unlock(&adev->grbm_idx_mutex);
3983
3984 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3985 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3986 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3987 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3988 for (k = 0; k < adev->usec_timeout; k++) {
3989 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3990 break;
3991 udelay(1);
3992 }
3993}
3994
3995static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3996 bool enable)
3997{
3998 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3999
0d07db7e
TSD
4000 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
4001 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
4002 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
4003 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
4004
aaa36a97
AD
4005 WREG32(mmCP_INT_CNTL_RING0, tmp);
4006}
4007
2b6cd977
EH
4008static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
4009{
4010 /* csib */
4011 WREG32(mmRLC_CSIB_ADDR_HI,
4012 adev->gfx.rlc.clear_state_gpu_addr >> 32);
4013 WREG32(mmRLC_CSIB_ADDR_LO,
4014 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4015 WREG32(mmRLC_CSIB_LENGTH,
4016 adev->gfx.rlc.clear_state_size);
4017}
4018
4019static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4020 int ind_offset,
4021 int list_size,
4022 int *unique_indices,
4023 int *indices_count,
4024 int max_indices,
4025 int *ind_start_offsets,
4026 int *offset_count,
4027 int max_offset)
4028{
4029 int indices;
4030 bool new_entry = true;
4031
4032 for (; ind_offset < list_size; ind_offset++) {
4033
4034 if (new_entry) {
4035 new_entry = false;
4036 ind_start_offsets[*offset_count] = ind_offset;
4037 *offset_count = *offset_count + 1;
4038 BUG_ON(*offset_count >= max_offset);
4039 }
4040
4041 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4042 new_entry = true;
4043 continue;
4044 }
4045
4046 ind_offset += 2;
4047
4048 /* look for the matching indice */
4049 for (indices = 0;
4050 indices < *indices_count;
4051 indices++) {
4052 if (unique_indices[indices] ==
4053 register_list_format[ind_offset])
4054 break;
4055 }
4056
4057 if (indices >= *indices_count) {
4058 unique_indices[*indices_count] =
4059 register_list_format[ind_offset];
4060 indices = *indices_count;
4061 *indices_count = *indices_count + 1;
4062 BUG_ON(*indices_count >= max_indices);
4063 }
4064
4065 register_list_format[ind_offset] = indices;
4066 }
4067}
4068
4069static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4070{
4071 int i, temp, data;
4072 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4073 int indices_count = 0;
4074 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4075 int offset_count = 0;
4076
4077 int list_size;
4078 unsigned int *register_list_format =
4079 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4080 if (!register_list_format)
2b6cd977
EH
4081 return -ENOMEM;
4082 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4083 adev->gfx.rlc.reg_list_format_size_bytes);
4084
4085 gfx_v8_0_parse_ind_reg_list(register_list_format,
4086 RLC_FormatDirectRegListLength,
4087 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4088 unique_indices,
4089 &indices_count,
4090 sizeof(unique_indices) / sizeof(int),
4091 indirect_start_offsets,
4092 &offset_count,
4093 sizeof(indirect_start_offsets)/sizeof(int));
4094
4095 /* save and restore list */
61cb8cef 4096 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4097
4098 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4099 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4100 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4101
4102 /* indirect list */
4103 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4104 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4105 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4106
4107 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4108 list_size = list_size >> 1;
4109 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4110 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4111
4112 /* starting offsets starts */
4113 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4114 adev->gfx.rlc.starting_offsets_start);
4115 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4116 WREG32(mmRLC_GPM_SCRATCH_DATA,
4117 indirect_start_offsets[i]);
4118
4119 /* unique indices */
4120 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4121 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4122 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
202e0b22 4123 if (unique_indices[i] != 0) {
b85c9d2a
ML
4124 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4125 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4126 }
2b6cd977
EH
4127 }
4128 kfree(register_list_format);
4129
4130 return 0;
4131}
4132
4133static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4134{
61cb8cef 4135 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4136}
4137
fb16007b 4138static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4139{
4140 uint32_t data;
4141
c4d17b81
RZ
4142 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4143
4144 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4145 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4146 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4147 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4148 WREG32(mmRLC_PG_DELAY, data);
4149
4150 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4151 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4152
f4bfffdd
EH
4153}
4154
2c547165
AD
4155static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4156 bool enable)
4157{
61cb8cef 4158 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4159}
4160
4161static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4162 bool enable)
4163{
61cb8cef 4164 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4165}
4166
4167static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4168{
eb584241 4169 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4170}
4171
2b6cd977
EH
4172static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4173{
c4d17b81
RZ
4174 if ((adev->asic_type == CHIP_CARRIZO) ||
4175 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4176 gfx_v8_0_init_csb(adev);
4177 gfx_v8_0_init_save_restore_list(adev);
4178 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4179 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4180 gfx_v8_0_init_power_gating(adev);
4181 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47
JZ
4182 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4183 (adev->asic_type == CHIP_POLARIS12)) {
c4d17b81
RZ
4184 gfx_v8_0_init_csb(adev);
4185 gfx_v8_0_init_save_restore_list(adev);
4186 gfx_v8_0_enable_save_restore_machine(adev);
4187 gfx_v8_0_init_power_gating(adev);
2b6cd977 4188 }
c4d17b81 4189
2b6cd977
EH
4190}
4191
761c2e82 4192static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4193{
61cb8cef 4194 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4195
4196 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4197 gfx_v8_0_wait_for_rlc_serdes(adev);
4198}
4199
4200static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4201{
61cb8cef 4202 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4203 udelay(50);
61cb8cef
TSD
4204
4205 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4206 udelay(50);
4207}
4208
4209static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4210{
61cb8cef 4211 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4212
4213 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4214 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4215 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4216
4217 udelay(50);
4218}
4219
4220static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4221{
4222 const struct rlc_firmware_header_v2_0 *hdr;
4223 const __le32 *fw_data;
4224 unsigned i, fw_size;
4225
4226 if (!adev->gfx.rlc_fw)
4227 return -EINVAL;
4228
4229 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4230 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4231
4232 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4233 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4234 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4235
4236 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4237 for (i = 0; i < fw_size; i++)
4238 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4239 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4240
4241 return 0;
4242}
4243
4244static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4245{
4246 int r;
6ae81452 4247 u32 tmp;
aaa36a97
AD
4248
4249 gfx_v8_0_rlc_stop(adev);
4250
4251 /* disable CG */
6ae81452
AD
4252 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4253 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4254 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4255 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4256 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
4257 adev->asic_type == CHIP_POLARIS10 ||
4258 adev->asic_type == CHIP_POLARIS12) {
6ae81452
AD
4259 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4260 tmp &= ~0x3;
4261 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4262 }
aaa36a97
AD
4263
4264 /* disable PG */
4265 WREG32(mmRLC_PG_CNTL, 0);
4266
4267 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4268 gfx_v8_0_init_pg(adev);
4269
e61710c5 4270 if (!adev->pp_enabled) {
e635ee07 4271 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
4272 /* legacy rlc firmware loading */
4273 r = gfx_v8_0_rlc_load_microcode(adev);
4274 if (r)
4275 return r;
4276 } else {
4277 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4278 AMDGPU_UCODE_ID_RLC_G);
4279 if (r)
4280 return -EINVAL;
4281 }
aaa36a97
AD
4282 }
4283
4284 gfx_v8_0_rlc_start(adev);
4285
4286 return 0;
4287}
4288
4289static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4290{
4291 int i;
4292 u32 tmp = RREG32(mmCP_ME_CNTL);
4293
4294 if (enable) {
4295 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4296 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4297 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4298 } else {
4299 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4300 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4301 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4302 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4303 adev->gfx.gfx_ring[i].ready = false;
4304 }
4305 WREG32(mmCP_ME_CNTL, tmp);
4306 udelay(50);
4307}
4308
4309static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4310{
4311 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4312 const struct gfx_firmware_header_v1_0 *ce_hdr;
4313 const struct gfx_firmware_header_v1_0 *me_hdr;
4314 const __le32 *fw_data;
4315 unsigned i, fw_size;
4316
4317 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4318 return -EINVAL;
4319
4320 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4321 adev->gfx.pfp_fw->data;
4322 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4323 adev->gfx.ce_fw->data;
4324 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4325 adev->gfx.me_fw->data;
4326
4327 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4328 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4329 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4330
4331 gfx_v8_0_cp_gfx_enable(adev, false);
4332
4333 /* PFP */
4334 fw_data = (const __le32 *)
4335 (adev->gfx.pfp_fw->data +
4336 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4337 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4338 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4339 for (i = 0; i < fw_size; i++)
4340 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4341 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4342
4343 /* CE */
4344 fw_data = (const __le32 *)
4345 (adev->gfx.ce_fw->data +
4346 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4347 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4348 WREG32(mmCP_CE_UCODE_ADDR, 0);
4349 for (i = 0; i < fw_size; i++)
4350 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4351 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4352
4353 /* ME */
4354 fw_data = (const __le32 *)
4355 (adev->gfx.me_fw->data +
4356 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4357 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4358 WREG32(mmCP_ME_RAM_WADDR, 0);
4359 for (i = 0; i < fw_size; i++)
4360 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4361 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4362
4363 return 0;
4364}
4365
4366static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4367{
4368 u32 count = 0;
4369 const struct cs_section_def *sect = NULL;
4370 const struct cs_extent_def *ext = NULL;
4371
4372 /* begin clear state */
4373 count += 2;
4374 /* context control state */
4375 count += 3;
4376
4377 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4378 for (ext = sect->section; ext->extent != NULL; ++ext) {
4379 if (sect->id == SECT_CONTEXT)
4380 count += 2 + ext->reg_count;
4381 else
4382 return 0;
4383 }
4384 }
4385 /* pa_sc_raster_config/pa_sc_raster_config1 */
4386 count += 4;
4387 /* end clear state */
4388 count += 2;
4389 /* clear state */
4390 count += 2;
4391
4392 return count;
4393}
4394
4395static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4396{
4397 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4398 const struct cs_section_def *sect = NULL;
4399 const struct cs_extent_def *ext = NULL;
4400 int r, i;
4401
4402 /* init the CP */
4403 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4404 WREG32(mmCP_ENDIAN_SWAP, 0);
4405 WREG32(mmCP_DEVICE_ID, 1);
4406
4407 gfx_v8_0_cp_gfx_enable(adev, true);
4408
a27de35c 4409 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4410 if (r) {
4411 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4412 return r;
4413 }
4414
4415 /* clear state buffer */
4416 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4417 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4418
4419 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4420 amdgpu_ring_write(ring, 0x80000000);
4421 amdgpu_ring_write(ring, 0x80000000);
4422
4423 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4424 for (ext = sect->section; ext->extent != NULL; ++ext) {
4425 if (sect->id == SECT_CONTEXT) {
4426 amdgpu_ring_write(ring,
4427 PACKET3(PACKET3_SET_CONTEXT_REG,
4428 ext->reg_count));
4429 amdgpu_ring_write(ring,
4430 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4431 for (i = 0; i < ext->reg_count; i++)
4432 amdgpu_ring_write(ring, ext->extent[i]);
4433 }
4434 }
4435 }
4436
4437 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4438 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4439 switch (adev->asic_type) {
4440 case CHIP_TONGA:
2cc0c0b5 4441 case CHIP_POLARIS10:
aaa36a97
AD
4442 amdgpu_ring_write(ring, 0x16000012);
4443 amdgpu_ring_write(ring, 0x0000002A);
4444 break;
2cc0c0b5 4445 case CHIP_POLARIS11:
c4642a47 4446 case CHIP_POLARIS12:
68182d90
FC
4447 amdgpu_ring_write(ring, 0x16000012);
4448 amdgpu_ring_write(ring, 0x00000000);
4449 break;
fa676048
FC
4450 case CHIP_FIJI:
4451 amdgpu_ring_write(ring, 0x3a00161a);
4452 amdgpu_ring_write(ring, 0x0000002e);
4453 break;
aaa36a97
AD
4454 case CHIP_CARRIZO:
4455 amdgpu_ring_write(ring, 0x00000002);
4456 amdgpu_ring_write(ring, 0x00000000);
4457 break;
d1a7f7aa
KW
4458 case CHIP_TOPAZ:
4459 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4460 0x00000000 : 0x00000002);
4461 amdgpu_ring_write(ring, 0x00000000);
4462 break;
e3c7656c
SL
4463 case CHIP_STONEY:
4464 amdgpu_ring_write(ring, 0x00000000);
4465 amdgpu_ring_write(ring, 0x00000000);
4466 break;
aaa36a97
AD
4467 default:
4468 BUG();
4469 }
4470
4471 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4472 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4473
4474 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4475 amdgpu_ring_write(ring, 0);
4476
4477 /* init the CE partitions */
4478 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4479 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4480 amdgpu_ring_write(ring, 0x8000);
4481 amdgpu_ring_write(ring, 0x8000);
4482
a27de35c 4483 amdgpu_ring_commit(ring);
aaa36a97
AD
4484
4485 return 0;
4486}
4f339b29
RZ
4487static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4488{
4489 u32 tmp;
4490 /* no gfx doorbells on iceland */
4491 if (adev->asic_type == CHIP_TOPAZ)
4492 return;
4493
4494 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4495
4496 if (ring->use_doorbell) {
4497 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4498 DOORBELL_OFFSET, ring->doorbell_index);
4499 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4500 DOORBELL_HIT, 0);
4501 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4502 DOORBELL_EN, 1);
4503 } else {
4504 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4505 }
4506
4507 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4508
4509 if (adev->flags & AMD_IS_APU)
4510 return;
4511
4512 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4513 DOORBELL_RANGE_LOWER,
4514 AMDGPU_DOORBELL_GFX_RING0);
4515 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4516
4517 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4518 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4519}
aaa36a97
AD
4520
4521static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4522{
4523 struct amdgpu_ring *ring;
4524 u32 tmp;
4525 u32 rb_bufsz;
42e8cb50 4526 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4527 int r;
4528
4529 /* Set the write pointer delay */
4530 WREG32(mmCP_RB_WPTR_DELAY, 0);
4531
4532 /* set the RB to use vmid 0 */
4533 WREG32(mmCP_RB_VMID, 0);
4534
4535 /* Set ring buffer size */
4536 ring = &adev->gfx.gfx_ring[0];
4537 rb_bufsz = order_base_2(ring->ring_size / 8);
4538 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4539 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4540 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4541 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4542#ifdef __BIG_ENDIAN
4543 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4544#endif
4545 WREG32(mmCP_RB0_CNTL, tmp);
4546
4547 /* Initialize the ring buffer's read and write pointers */
4548 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4549 ring->wptr = 0;
536fbf94 4550 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4551
4552 /* set the wb address wether it's enabled or not */
4553 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4554 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4555 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4556
42e8cb50
FM
4557 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4558 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4559 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4560 mdelay(1);
4561 WREG32(mmCP_RB0_CNTL, tmp);
4562
4563 rb_addr = ring->gpu_addr >> 8;
4564 WREG32(mmCP_RB0_BASE, rb_addr);
4565 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4566
4f339b29 4567 gfx_v8_0_set_cpg_door_bell(adev, ring);
aaa36a97 4568 /* start the ring */
f6bd7942 4569 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4570 gfx_v8_0_cp_gfx_start(adev);
4571 ring->ready = true;
4572 r = amdgpu_ring_test_ring(ring);
5003f278 4573 if (r)
aaa36a97 4574 ring->ready = false;
aaa36a97 4575
5003f278 4576 return r;
aaa36a97
AD
4577}
4578
4579static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4580{
4581 int i;
4582
4583 if (enable) {
4584 WREG32(mmCP_MEC_CNTL, 0);
4585 } else {
4586 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4587 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4588 adev->gfx.compute_ring[i].ready = false;
fcf17a43 4589 adev->gfx.kiq.ring.ready = false;
aaa36a97
AD
4590 }
4591 udelay(50);
4592}
4593
aaa36a97
AD
4594static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4595{
4596 const struct gfx_firmware_header_v1_0 *mec_hdr;
4597 const __le32 *fw_data;
4598 unsigned i, fw_size;
4599
4600 if (!adev->gfx.mec_fw)
4601 return -EINVAL;
4602
4603 gfx_v8_0_cp_compute_enable(adev, false);
4604
4605 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4606 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4607
4608 fw_data = (const __le32 *)
4609 (adev->gfx.mec_fw->data +
4610 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4611 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4612
4613 /* MEC1 */
4614 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4615 for (i = 0; i < fw_size; i++)
4616 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4617 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4618
4619 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4620 if (adev->gfx.mec2_fw) {
4621 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4622
4623 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4624 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4625
4626 fw_data = (const __le32 *)
4627 (adev->gfx.mec2_fw->data +
4628 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4629 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4630
4631 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4632 for (i = 0; i < fw_size; i++)
4633 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4634 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4635 }
4636
4637 return 0;
4638}
4639
4e638ae9
XY
4640/* KIQ functions */
4641static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4642{
4643 uint32_t tmp;
4644 struct amdgpu_device *adev = ring->adev;
4645
4646 /* tell RLC which is KIQ queue */
4647 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4648 tmp &= 0xffffff00;
4649 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4650 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4651 tmp |= 0x80;
4652 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4653}
4654
346586d5 4655static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4e638ae9 4656{
c3a49ab5 4657 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
f776952b
AD
4658 uint32_t scratch, tmp = 0;
4659 int r, i;
4660
4661 r = amdgpu_gfx_scratch_get(adev, &scratch);
4662 if (r) {
4663 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4664 return r;
4665 }
4666 WREG32(scratch, 0xCAFEDEAD);
4e638ae9 4667
346586d5 4668 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
f776952b
AD
4669 if (r) {
4670 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4671 amdgpu_gfx_scratch_free(adev, scratch);
4672 return r;
4673 }
346586d5
AD
4674 /* set resources */
4675 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4676 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4677 amdgpu_ring_write(kiq_ring, 0x000000FF); /* queue mask lo */
4678 amdgpu_ring_write(kiq_ring, 0); /* queue mask hi */
4679 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4680 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4681 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4682 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
c3a49ab5
AD
4683 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4684 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4685 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4686 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4687
4688 /* map queues */
4689 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4690 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3d7e30b3
AD
4691 amdgpu_ring_write(kiq_ring,
4692 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4693 amdgpu_ring_write(kiq_ring,
4694 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4695 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4696 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4697 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
c3a49ab5
AD
4698 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4699 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4700 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4701 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4702 }
f776952b
AD
4703 /* write to scratch for completion */
4704 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4705 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4706 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4e638ae9 4707 amdgpu_ring_commit(kiq_ring);
f776952b
AD
4708
4709 for (i = 0; i < adev->usec_timeout; i++) {
4710 tmp = RREG32(scratch);
4711 if (tmp == 0xDEADBEEF)
4712 break;
4713 DRM_UDELAY(1);
4714 }
4715 if (i >= adev->usec_timeout) {
c3a49ab5
AD
4716 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4717 scratch, tmp);
f776952b
AD
4718 r = -EINVAL;
4719 }
4720 amdgpu_gfx_scratch_free(adev, scratch);
4721
4722 return r;
4e638ae9
XY
4723}
4724
9d11ca9c
AD
4725static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
4726{
4727 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4728 uint32_t scratch, tmp = 0;
4729 int r, i;
4730
4731 r = amdgpu_gfx_scratch_get(adev, &scratch);
4732 if (r) {
4733 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4734 return r;
4735 }
4736 WREG32(scratch, 0xCAFEDEAD);
4737
4738 r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
4739 if (r) {
4740 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4741 amdgpu_gfx_scratch_free(adev, scratch);
4742 return r;
4743 }
4744 /* unmap queues */
4745 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4746 amdgpu_ring_write(kiq_ring,
4747 PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
4748 PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
4749 amdgpu_ring_write(kiq_ring, 0);
4750 amdgpu_ring_write(kiq_ring, 0);
4751 amdgpu_ring_write(kiq_ring, 0);
4752 amdgpu_ring_write(kiq_ring, 0);
4753 /* write to scratch for completion */
4754 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4755 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4756 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4757 amdgpu_ring_commit(kiq_ring);
4758
4759 for (i = 0; i < adev->usec_timeout; i++) {
4760 tmp = RREG32(scratch);
4761 if (tmp == 0xDEADBEEF)
4762 break;
4763 DRM_UDELAY(1);
4764 }
4765 if (i >= adev->usec_timeout) {
2d0806ca 4766 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n",
9d11ca9c
AD
4767 scratch, tmp);
4768 r = -EINVAL;
4769 }
4770 amdgpu_gfx_scratch_free(adev, scratch);
4771
4772 return r;
4773}
4774
34130fb1
AR
4775static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4776{
4777 int i, r = 0;
4778
4779 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4780 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4781 for (i = 0; i < adev->usec_timeout; i++) {
4782 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4783 break;
4784 udelay(1);
4785 }
4786 if (i == adev->usec_timeout)
4787 r = -ETIMEDOUT;
4788 }
4789 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4790 WREG32(mmCP_HQD_PQ_RPTR, 0);
4791 WREG32(mmCP_HQD_PQ_WPTR, 0);
4792
4793 return r;
4794}
4795
a2140e00 4796static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4e638ae9 4797{
015c2360 4798 struct amdgpu_device *adev = ring->adev;
a2140e00 4799 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4800 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4801 uint32_t tmp;
4802
34130fb1
AR
4803 /* init the mqd struct */
4804 memset(mqd, 0, sizeof(struct vi_mqd));
4805
4e638ae9
XY
4806 mqd->header = 0xC0310800;
4807 mqd->compute_pipelinestat_enable = 0x00000001;
4808 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4809 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4810 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4811 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4812 mqd->compute_misc_reserved = 0x00000003;
4813
34534610 4814 eop_base_addr = ring->eop_gpu_addr >> 8;
4e638ae9
XY
4815 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4816 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4817
4818 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4819 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4820 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
268cb4c7 4821 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4e638ae9
XY
4822
4823 mqd->cp_hqd_eop_control = tmp;
4824
4825 /* enable doorbell? */
bb215962
TSD
4826 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4827 CP_HQD_PQ_DOORBELL_CONTROL,
4828 DOORBELL_EN,
4829 ring->use_doorbell ? 1 : 0);
4e638ae9
XY
4830
4831 mqd->cp_hqd_pq_doorbell_control = tmp;
4832
4e638ae9 4833 /* set the pointer to the MQD */
015c2360
AD
4834 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4835 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4836
4837 /* set MQD vmid to 0 */
4838 tmp = RREG32(mmCP_MQD_CONTROL);
4839 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4840 mqd->cp_mqd_control = tmp;
4841
4842 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4843 hqd_gpu_addr = ring->gpu_addr >> 8;
4844 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4845 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4846
4847 /* set up the HQD, this is similar to CP_RB0_CNTL */
4848 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4849 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4850 (order_base_2(ring->ring_size / 4) - 1));
4851 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4852 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4853#ifdef __BIG_ENDIAN
4854 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4855#endif
4856 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4857 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4858 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4859 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4860 mqd->cp_hqd_pq_control = tmp;
4861
4862 /* set the wb address whether it's enabled or not */
4863 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4864 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4865 mqd->cp_hqd_pq_rptr_report_addr_hi =
4866 upper_32_bits(wb_gpu_addr) & 0xffff;
4867
4868 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4869 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4870 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4871 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4872
4873 tmp = 0;
4874 /* enable the doorbell if requested */
4875 if (ring->use_doorbell) {
4876 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4877 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4878 DOORBELL_OFFSET, ring->doorbell_index);
4879
4880 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4881 DOORBELL_EN, 1);
4882 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4883 DOORBELL_SOURCE, 0);
4884 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4885 DOORBELL_HIT, 0);
4886 }
4887
4888 mqd->cp_hqd_pq_doorbell_control = tmp;
4889
4890 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4891 ring->wptr = 0;
4892 mqd->cp_hqd_pq_wptr = ring->wptr;
4893 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4894
4895 /* set the vmid for the queue */
4896 mqd->cp_hqd_vmid = 0;
4897
4898 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4899 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4900 mqd->cp_hqd_persistent_state = tmp;
4901
ed6f55d1
AD
4902 /* set MTYPE */
4903 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4904 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4905 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4906 mqd->cp_hqd_ib_control = tmp;
4907
4908 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4909 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4910 mqd->cp_hqd_iq_timer = tmp;
4911
4912 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4913 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4914 mqd->cp_hqd_ctx_save_control = tmp;
4915
97bf47b2
AR
4916 /* defaults */
4917 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4918 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4919 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4920 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4921 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4922 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4923 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4924 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4925 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4926 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4927 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4928 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4929 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4930 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4931 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4932
4e638ae9
XY
4933 /* activate the queue */
4934 mqd->cp_hqd_active = 1;
4935
4936 return 0;
4937}
4938
97bf47b2
AR
4939int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4940 struct vi_mqd *mqd)
4e638ae9 4941{
4e638ae9 4942 /* disable wptr polling */
0ac642c5 4943 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4e638ae9
XY
4944
4945 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4946 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4947
4948 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4949 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4950
4951 /* enable doorbell? */
4952 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4953
34130fb1
AR
4954 /* set pq read/write pointers */
4955 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4956 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4957 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4e638ae9
XY
4958
4959 /* set the pointer to the MQD */
4960 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4961 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4962
4963 /* set MQD vmid to 0 */
4964 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4965
4966 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4967 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4968 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4969
4970 /* set up the HQD, this is similar to CP_RB0_CNTL */
4971 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4972
4973 /* set the wb address whether it's enabled or not */
4974 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4975 mqd->cp_hqd_pq_rptr_report_addr_lo);
4976 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4977 mqd->cp_hqd_pq_rptr_report_addr_hi);
4978
4979 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4980 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4981 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4982
34130fb1 4983 /* enable the doorbell if requested */
4e638ae9
XY
4984 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4985
4986 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4987 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
97bf47b2
AR
4988 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4989 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4990
4991 /* set the HQD priority */
4992 WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
4993 WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
4994 WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
4995
4996 /* set cwsr save area */
4997 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo);
4998 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi);
4999 WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control);
5000 WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset);
5001 WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size);
5002 WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset);
5003 WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size);
5004
5005 WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
5006 WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events);
5007 WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error);
5008 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
5009 WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones);
4e638ae9
XY
5010
5011 /* set the vmid for the queue */
5012 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5013
5014 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
5015
5016 /* activate the queue */
5017 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5018
4e638ae9
XY
5019 return 0;
5020}
5021
a2140e00 5022static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4e638ae9 5023{
bf13ed6b 5024 int r = 0;
4e638ae9 5025 struct amdgpu_device *adev = ring->adev;
a2140e00 5026 struct vi_mqd *mqd = ring->mqd_ptr;
1fb37a3d 5027 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9 5028
39300115 5029 gfx_v8_0_kiq_setting(ring);
4e638ae9 5030
a545e491 5031 if (adev->gfx.in_reset) { /* for GPU_RESET case */
1fb37a3d
ML
5032 /* reset MQD to a clean status */
5033 if (adev->gfx.mec.mqd_backup[mqd_idx])
5034 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
5035
5036 /* reset ring buffer */
5037 ring->wptr = 0;
5038 amdgpu_ring_clear_ring(ring);
39300115
AD
5039 mutex_lock(&adev->srbm_mutex);
5040 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
bf13ed6b
AR
5041 r = gfx_v8_0_deactivate_hqd(adev, 1);
5042 if (r) {
5043 dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
5044 goto out_unlock;
5045 }
97bf47b2 5046 gfx_v8_0_mqd_commit(adev, mqd);
39300115
AD
5047 vi_srbm_select(adev, 0, 0, 0, 0);
5048 mutex_unlock(&adev->srbm_mutex);
a545e491 5049 } else {
a545e491
AD
5050 mutex_lock(&adev->srbm_mutex);
5051 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5052 gfx_v8_0_mqd_init(ring);
bf13ed6b
AR
5053 r = gfx_v8_0_deactivate_hqd(adev, 1);
5054 if (r) {
5055 dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
5056 goto out_unlock;
5057 }
97bf47b2 5058 gfx_v8_0_mqd_commit(adev, mqd);
a545e491
AD
5059 vi_srbm_select(adev, 0, 0, 0, 0);
5060 mutex_unlock(&adev->srbm_mutex);
5061
5062 if (adev->gfx.mec.mqd_backup[mqd_idx])
5063 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
1fb37a3d 5064 }
4e638ae9 5065
bf13ed6b
AR
5066 return r;
5067
5068out_unlock:
5069 vi_srbm_select(adev, 0, 0, 0, 0);
5070 mutex_unlock(&adev->srbm_mutex);
5071 return r;
39300115
AD
5072}
5073
5074static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
5075{
5076 struct amdgpu_device *adev = ring->adev;
5077 struct vi_mqd *mqd = ring->mqd_ptr;
5078 int mqd_idx = ring - &adev->gfx.compute_ring[0];
39300115
AD
5079
5080 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
39300115
AD
5081 mutex_lock(&adev->srbm_mutex);
5082 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5083 gfx_v8_0_mqd_init(ring);
5084 vi_srbm_select(adev, 0, 0, 0, 0);
5085 mutex_unlock(&adev->srbm_mutex);
5086
5087 if (adev->gfx.mec.mqd_backup[mqd_idx])
5088 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
a545e491 5089 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
39300115
AD
5090 /* reset MQD to a clean status */
5091 if (adev->gfx.mec.mqd_backup[mqd_idx])
5092 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
39300115
AD
5093 /* reset ring buffer */
5094 ring->wptr = 0;
5095 amdgpu_ring_clear_ring(ring);
94c9cead
RZ
5096 } else {
5097 amdgpu_ring_clear_ring(ring);
39300115 5098 }
c3a49ab5 5099 return 0;
4e638ae9
XY
5100}
5101
4f339b29
RZ
5102static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
5103{
5104 if (adev->asic_type > CHIP_TONGA) {
5105 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
5106 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
5107 }
6a124e67
AD
5108 /* enable doorbells */
5109 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4f339b29
RZ
5110}
5111
596c67d0 5112static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
5113{
5114 struct amdgpu_ring *ring = NULL;
596c67d0 5115 int r = 0, i;
4e638ae9 5116
596c67d0 5117 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
5118
5119 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
5120
5121 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5122 if (unlikely(r != 0))
5123 goto done;
5124
5125 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5126 if (!r) {
a2140e00 5127 r = gfx_v8_0_kiq_init_queue(ring);
596c67d0 5128 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 5129 ring->mqd_ptr = NULL;
4e638ae9 5130 }
6a6f380f
AD
5131 amdgpu_bo_unreserve(ring->mqd_obj);
5132 if (r)
5133 goto done;
4e638ae9 5134
4e638ae9
XY
5135 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5136 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
5137
5138 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5139 if (unlikely(r != 0))
5140 goto done;
5141 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5142 if (!r) {
39300115 5143 r = gfx_v8_0_kcq_init_queue(ring);
596c67d0 5144 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 5145 ring->mqd_ptr = NULL;
596c67d0 5146 }
6a6f380f
AD
5147 amdgpu_bo_unreserve(ring->mqd_obj);
5148 if (r)
5149 goto done;
c3a49ab5 5150 }
4e638ae9 5151
4f339b29 5152 gfx_v8_0_set_mec_doorbell_range(adev);
d17c0faf 5153
346586d5 5154 r = gfx_v8_0_kiq_kcq_enable(adev);
c3a49ab5
AD
5155 if (r)
5156 goto done;
5157
346586d5
AD
5158 /* Test KIQ */
5159 ring = &adev->gfx.kiq.ring;
5160 ring->ready = true;
5161 r = amdgpu_ring_test_ring(ring);
5162 if (r) {
5163 ring->ready = false;
5164 goto done;
5165 }
5166
5167 /* Test KCQs */
c3a49ab5
AD
5168 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5169 ring = &adev->gfx.compute_ring[i];
4e638ae9
XY
5170 ring->ready = true;
5171 r = amdgpu_ring_test_ring(ring);
5172 if (r)
5173 ring->ready = false;
5174 }
5175
6a6f380f
AD
5176done:
5177 return r;
4e638ae9
XY
5178}
5179
aaa36a97
AD
5180static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5181{
5182 int r;
5183
e3c7656c 5184 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5185 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5186
e61710c5 5187 if (!adev->pp_enabled) {
e635ee07 5188 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
5189 /* legacy firmware loading */
5190 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5191 if (r)
5192 return r;
aaa36a97 5193
ba5c2a87
RZ
5194 r = gfx_v8_0_cp_compute_load_microcode(adev);
5195 if (r)
5196 return r;
5197 } else {
5198 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5199 AMDGPU_UCODE_ID_CP_CE);
5200 if (r)
5201 return -EINVAL;
5202
5203 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5204 AMDGPU_UCODE_ID_CP_PFP);
5205 if (r)
5206 return -EINVAL;
5207
5208 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5209 AMDGPU_UCODE_ID_CP_ME);
5210 if (r)
5211 return -EINVAL;
5212
951e0962
AD
5213 if (adev->asic_type == CHIP_TOPAZ) {
5214 r = gfx_v8_0_cp_compute_load_microcode(adev);
5215 if (r)
5216 return r;
5217 } else {
5218 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5219 AMDGPU_UCODE_ID_CP_MEC1);
5220 if (r)
5221 return -EINVAL;
5222 }
ba5c2a87 5223 }
aaa36a97
AD
5224 }
5225
5226 r = gfx_v8_0_cp_gfx_resume(adev);
5227 if (r)
5228 return r;
5229
b4e40676 5230 r = gfx_v8_0_kiq_resume(adev);
aaa36a97
AD
5231 if (r)
5232 return r;
5233
5234 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5235
5236 return 0;
5237}
5238
5239static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5240{
5241 gfx_v8_0_cp_gfx_enable(adev, enable);
5242 gfx_v8_0_cp_compute_enable(adev, enable);
5243}
5244
5fc3aeeb 5245static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5246{
5247 int r;
5fc3aeeb 5248 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5249
5250 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5251 gfx_v8_0_gpu_init(adev);
5252
5253 r = gfx_v8_0_rlc_resume(adev);
5254 if (r)
5255 return r;
5256
5257 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5258
5259 return r;
5260}
5261
5fc3aeeb 5262static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5263{
5fc3aeeb 5264 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5265
1d22a454
AD
5266 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5267 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
84f3f05b
XY
5268 if (amdgpu_sriov_vf(adev)) {
5269 pr_debug("For SRIOV client, shouldn't do anything.\n");
5270 return 0;
5271 }
9d11ca9c 5272 gfx_v8_0_kiq_kcq_disable(adev);
aaa36a97
AD
5273 gfx_v8_0_cp_enable(adev, false);
5274 gfx_v8_0_rlc_stop(adev);
aaa36a97 5275
62a86fc2
EH
5276 amdgpu_set_powergating_state(adev,
5277 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5278
aaa36a97
AD
5279 return 0;
5280}
5281
5fc3aeeb 5282static int gfx_v8_0_suspend(void *handle)
aaa36a97 5283{
5fc3aeeb 5284 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
b4e40676 5285 adev->gfx.in_suspend = true;
aaa36a97
AD
5286 return gfx_v8_0_hw_fini(adev);
5287}
5288
5fc3aeeb 5289static int gfx_v8_0_resume(void *handle)
aaa36a97 5290{
b4e40676 5291 int r;
5fc3aeeb 5292 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5293
b4e40676
DP
5294 r = gfx_v8_0_hw_init(adev);
5295 adev->gfx.in_suspend = false;
5296 return r;
aaa36a97
AD
5297}
5298
5fc3aeeb 5299static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5300{
5fc3aeeb 5301 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5302
aaa36a97
AD
5303 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5304 return false;
5305 else
5306 return true;
5307}
5308
5fc3aeeb 5309static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5310{
5311 unsigned i;
5fc3aeeb 5312 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5313
5314 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5315 if (gfx_v8_0_is_idle(handle))
aaa36a97 5316 return 0;
5003f278 5317
aaa36a97
AD
5318 udelay(1);
5319 }
5320 return -ETIMEDOUT;
5321}
5322
da146d3b 5323static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5324{
3d7c6384 5325 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5326 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5327 u32 tmp;
5328
5329 /* GRBM_STATUS */
5330 tmp = RREG32(mmGRBM_STATUS);
5331 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5332 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5333 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5334 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5335 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5336 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5337 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5338 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5339 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5340 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5341 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5342 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5343 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5344 }
5345
5346 /* GRBM_STATUS2 */
5347 tmp = RREG32(mmGRBM_STATUS2);
5348 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5349 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5350 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5351
3d7c6384
CZ
5352 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5353 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5354 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5355 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5356 SOFT_RESET_CPF, 1);
5357 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5358 SOFT_RESET_CPC, 1);
5359 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5360 SOFT_RESET_CPG, 1);
5361 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5362 SOFT_RESET_GRBM, 1);
5363 }
5364
aaa36a97
AD
5365 /* SRBM_STATUS */
5366 tmp = RREG32(mmSRBM_STATUS);
5367 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5368 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5369 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5370 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5371 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5372 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5373
5374 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5375 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5376 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5377 return true;
3d7c6384 5378 } else {
3d7c6384
CZ
5379 adev->gfx.grbm_soft_reset = 0;
5380 adev->gfx.srbm_soft_reset = 0;
da146d3b 5381 return false;
3d7c6384 5382 }
3d7c6384 5383}
aaa36a97 5384
1057f20c
CZ
5385static int gfx_v8_0_pre_soft_reset(void *handle)
5386{
5387 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5388 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5389
da146d3b
AD
5390 if ((!adev->gfx.grbm_soft_reset) &&
5391 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5392 return 0;
5393
5394 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5395 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5396
5397 /* stop the rlc */
5398 gfx_v8_0_rlc_stop(adev);
5399
5400 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5401 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5402 /* Disable GFX parsing/prefetching */
5403 gfx_v8_0_cp_gfx_enable(adev, false);
5404
1057f20c
CZ
5405 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5406 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5407 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5408 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5409 int i;
5410
5411 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5412 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5413
a99f249d
AD
5414 mutex_lock(&adev->srbm_mutex);
5415 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5416 gfx_v8_0_deactivate_hqd(adev, 2);
5417 vi_srbm_select(adev, 0, 0, 0, 0);
5418 mutex_unlock(&adev->srbm_mutex);
1057f20c 5419 }
aaa36a97 5420 /* Disable MEC parsing/prefetching */
7776a693 5421 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5422 }
7776a693 5423
1057f20c
CZ
5424 return 0;
5425}
7776a693 5426
3d7c6384
CZ
5427static int gfx_v8_0_soft_reset(void *handle)
5428{
5429 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5430 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5431 u32 tmp;
aaa36a97 5432
da146d3b
AD
5433 if ((!adev->gfx.grbm_soft_reset) &&
5434 (!adev->gfx.srbm_soft_reset))
3d7c6384 5435 return 0;
aaa36a97 5436
3d7c6384
CZ
5437 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5438 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5439
3d7c6384
CZ
5440 if (grbm_soft_reset || srbm_soft_reset) {
5441 tmp = RREG32(mmGMCON_DEBUG);
5442 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5443 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5444 WREG32(mmGMCON_DEBUG, tmp);
5445 udelay(50);
5446 }
aaa36a97 5447
3d7c6384
CZ
5448 if (grbm_soft_reset) {
5449 tmp = RREG32(mmGRBM_SOFT_RESET);
5450 tmp |= grbm_soft_reset;
5451 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5452 WREG32(mmGRBM_SOFT_RESET, tmp);
5453 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5454
3d7c6384 5455 udelay(50);
aaa36a97 5456
3d7c6384
CZ
5457 tmp &= ~grbm_soft_reset;
5458 WREG32(mmGRBM_SOFT_RESET, tmp);
5459 tmp = RREG32(mmGRBM_SOFT_RESET);
5460 }
7776a693 5461
3d7c6384
CZ
5462 if (srbm_soft_reset) {
5463 tmp = RREG32(mmSRBM_SOFT_RESET);
5464 tmp |= srbm_soft_reset;
5465 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5466 WREG32(mmSRBM_SOFT_RESET, tmp);
5467 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5468
aaa36a97 5469 udelay(50);
7776a693 5470
3d7c6384
CZ
5471 tmp &= ~srbm_soft_reset;
5472 WREG32(mmSRBM_SOFT_RESET, tmp);
5473 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5474 }
7776a693 5475
3d7c6384
CZ
5476 if (grbm_soft_reset || srbm_soft_reset) {
5477 tmp = RREG32(mmGMCON_DEBUG);
5478 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5479 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5480 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5481 }
3d7c6384
CZ
5482
5483 /* Wait a little for things to settle down */
5484 udelay(50);
5485
aaa36a97
AD
5486 return 0;
5487}
5488
e4ae0fc3
CZ
5489static int gfx_v8_0_post_soft_reset(void *handle)
5490{
5491 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5492 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5493
da146d3b
AD
5494 if ((!adev->gfx.grbm_soft_reset) &&
5495 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5496 return 0;
5497
5498 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5499 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5500
5501 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5502 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5503 gfx_v8_0_cp_gfx_resume(adev);
5504
5505 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5506 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5507 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5508 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5509 int i;
5510
5511 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5512 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5513
a99f249d
AD
5514 mutex_lock(&adev->srbm_mutex);
5515 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5516 gfx_v8_0_deactivate_hqd(adev, 2);
5517 vi_srbm_select(adev, 0, 0, 0, 0);
5518 mutex_unlock(&adev->srbm_mutex);
e4ae0fc3 5519 }
b4e40676 5520 gfx_v8_0_kiq_resume(adev);
e4ae0fc3
CZ
5521 }
5522 gfx_v8_0_rlc_start(adev);
5523
aaa36a97
AD
5524 return 0;
5525}
5526
5527/**
5528 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5529 *
5530 * @adev: amdgpu_device pointer
5531 *
5532 * Fetches a GPU clock counter snapshot.
5533 * Returns the 64 bit clock counter snapshot.
5534 */
b95e31fd 5535static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5536{
5537 uint64_t clock;
5538
5539 mutex_lock(&adev->gfx.gpu_clock_mutex);
5540 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5541 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5542 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5543 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5544 return clock;
5545}
5546
5547static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5548 uint32_t vmid,
5549 uint32_t gds_base, uint32_t gds_size,
5550 uint32_t gws_base, uint32_t gws_size,
5551 uint32_t oa_base, uint32_t oa_size)
5552{
5553 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5554 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5555
5556 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5557 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5558
5559 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5560 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5561
5562 /* GDS Base */
5563 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5564 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5565 WRITE_DATA_DST_SEL(0)));
5566 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5567 amdgpu_ring_write(ring, 0);
5568 amdgpu_ring_write(ring, gds_base);
5569
5570 /* GDS Size */
5571 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5572 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5573 WRITE_DATA_DST_SEL(0)));
5574 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5575 amdgpu_ring_write(ring, 0);
5576 amdgpu_ring_write(ring, gds_size);
5577
5578 /* GWS */
5579 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5580 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5581 WRITE_DATA_DST_SEL(0)));
5582 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5583 amdgpu_ring_write(ring, 0);
5584 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5585
5586 /* OA */
5587 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5588 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5589 WRITE_DATA_DST_SEL(0)));
5590 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5591 amdgpu_ring_write(ring, 0);
5592 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5593}
5594
472259f0
TSD
5595static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5596{
bc24fbe9
TSD
5597 WREG32(mmSQ_IND_INDEX,
5598 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5599 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5600 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5601 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5602 return RREG32(mmSQ_IND_DATA);
5603}
5604
c5a60ce8
TSD
5605static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5606 uint32_t wave, uint32_t thread,
5607 uint32_t regno, uint32_t num, uint32_t *out)
5608{
5609 WREG32(mmSQ_IND_INDEX,
5610 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5611 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5612 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5613 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5614 (SQ_IND_INDEX__FORCE_READ_MASK) |
5615 (SQ_IND_INDEX__AUTO_INCR_MASK));
5616 while (num--)
5617 *(out++) = RREG32(mmSQ_IND_DATA);
5618}
5619
472259f0
TSD
5620static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5621{
5622 /* type 0 wave data */
5623 dst[(*no_fields)++] = 0;
5624 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5625 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5626 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5627 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5628 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5629 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5630 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5631 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5632 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5633 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5634 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5635 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5636 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5637 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5638 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5639 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5640 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5641 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5642}
5643
c5a60ce8
TSD
5644static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5645 uint32_t wave, uint32_t start,
5646 uint32_t size, uint32_t *dst)
5647{
5648 wave_read_regs(
5649 adev, simd, wave, 0,
5650 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5651}
5652
472259f0 5653
b95e31fd
AD
5654static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5655 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5656 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5657 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5658 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
b95e31fd
AD
5659};
5660
5fc3aeeb 5661static int gfx_v8_0_early_init(void *handle)
aaa36a97 5662{
5fc3aeeb 5663 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5664
5665 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5666 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
b95e31fd 5667 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5668 gfx_v8_0_set_ring_funcs(adev);
5669 gfx_v8_0_set_irq_funcs(adev);
5670 gfx_v8_0_set_gds_init(adev);
dbff57bc 5671 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5672
5673 return 0;
5674}
5675
ccba7691
AD
5676static int gfx_v8_0_late_init(void *handle)
5677{
5678 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5679 int r;
5680
1d22a454
AD
5681 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5682 if (r)
5683 return r;
5684
5685 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5686 if (r)
5687 return r;
5688
ccba7691
AD
5689 /* requires IBs so do in late init after IB pool is initialized */
5690 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5691 if (r)
5692 return r;
5693
62a86fc2
EH
5694 amdgpu_set_powergating_state(adev,
5695 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5696
ccba7691
AD
5697 return 0;
5698}
5699
c2546f55
AD
5700static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5701 bool enable)
62a86fc2 5702{
c4642a47
JZ
5703 if ((adev->asic_type == CHIP_POLARIS11) ||
5704 (adev->asic_type == CHIP_POLARIS12))
c2546f55
AD
5705 /* Send msg to SMU via Powerplay */
5706 amdgpu_set_powergating_state(adev,
5707 AMD_IP_BLOCK_TYPE_SMC,
5708 enable ?
5709 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5710
61cb8cef 5711 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5712}
5713
c2546f55
AD
5714static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5715 bool enable)
62a86fc2 5716{
61cb8cef 5717 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5718}
5719
2cc0c0b5 5720static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5721 bool enable)
5722{
61cb8cef 5723 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5724}
5725
2c547165
AD
5726static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5727 bool enable)
5728{
61cb8cef 5729 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5730}
5731
5732static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5733 bool enable)
5734{
61cb8cef 5735 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5736
5737 /* Read any GFX register to wake up GFX. */
5738 if (!enable)
61cb8cef 5739 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5740}
5741
5742static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5743 bool enable)
5744{
5745 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5746 cz_enable_gfx_cg_power_gating(adev, true);
5747 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5748 cz_enable_gfx_pipeline_power_gating(adev, true);
5749 } else {
5750 cz_enable_gfx_cg_power_gating(adev, false);
5751 cz_enable_gfx_pipeline_power_gating(adev, false);
5752 }
5753}
5754
5fc3aeeb 5755static int gfx_v8_0_set_powergating_state(void *handle,
5756 enum amd_powergating_state state)
aaa36a97 5757{
62a86fc2 5758 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5759 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5760
ce137c04
ML
5761 if (amdgpu_sriov_vf(adev))
5762 return 0;
5763
62a86fc2 5764 switch (adev->asic_type) {
2c547165
AD
5765 case CHIP_CARRIZO:
5766 case CHIP_STONEY:
ad1830d5 5767
5c964221
RZ
5768 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5769 cz_enable_sck_slow_down_on_power_up(adev, true);
5770 cz_enable_sck_slow_down_on_power_down(adev, true);
5771 } else {
5772 cz_enable_sck_slow_down_on_power_up(adev, false);
5773 cz_enable_sck_slow_down_on_power_down(adev, false);
5774 }
5775 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5776 cz_enable_cp_power_gating(adev, true);
5777 else
5778 cz_enable_cp_power_gating(adev, false);
5779
ad1830d5 5780 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5781
5782 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5783 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5784 else
5785 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5786
5787 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5788 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5789 else
5790 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5791 break;
2cc0c0b5 5792 case CHIP_POLARIS11:
c4642a47 5793 case CHIP_POLARIS12:
7ba0eb6d
AD
5794 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5795 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5796 else
5797 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5798
5799 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5800 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5801 else
5802 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5803
5804 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5805 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5806 else
7ba0eb6d 5807 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5808 break;
5809 default:
5810 break;
5811 }
5812
aaa36a97
AD
5813 return 0;
5814}
5815
ebd843d6
HR
5816static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5817{
5818 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5819 int data;
5820
ce137c04
ML
5821 if (amdgpu_sriov_vf(adev))
5822 *flags = 0;
5823
ebd843d6
HR
5824 /* AMD_CG_SUPPORT_GFX_MGCG */
5825 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5826 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5827 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5828
5829 /* AMD_CG_SUPPORT_GFX_CGLG */
5830 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5831 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5832 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5833
5834 /* AMD_CG_SUPPORT_GFX_CGLS */
5835 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5836 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5837
5838 /* AMD_CG_SUPPORT_GFX_CGTS */
5839 data = RREG32(mmCGTS_SM_CTRL_REG);
5840 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5841 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5842
5843 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5844 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5845 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5846
5847 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5848 data = RREG32(mmRLC_MEM_SLP_CNTL);
5849 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5850 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5851
5852 /* AMD_CG_SUPPORT_GFX_CP_LS */
5853 data = RREG32(mmCP_MEM_SLP_CNTL);
5854 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5855 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5856}
5857
79deaaf4 5858static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5859 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5860{
5861 uint32_t data;
5862
9559ef5b 5863 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5864
5865 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5866 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5867
5868 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5869 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5870 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5871 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5872 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5873 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5874 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5875 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5876 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5877 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5878 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5879 else
5880 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5881 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5882 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5883 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5884 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5885 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5886 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5887 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5888 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5889 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5890 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5891 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5892 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5893 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5894 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5895
5896 WREG32(mmRLC_SERDES_WR_CTRL, data);
5897}
5898
dbff57bc
AD
5899#define MSG_ENTER_RLC_SAFE_MODE 1
5900#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
5901#define RLC_GPR_REG2__REQ_MASK 0x00000001
5902#define RLC_GPR_REG2__REQ__SHIFT 0
5903#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5904#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 5905
dbff57bc
AD
5906static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5907{
5908 u32 data;
5909 unsigned i;
5910
5911 data = RREG32(mmRLC_CNTL);
5912 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5913 return;
5914
5915 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5916 data |= RLC_SAFE_MODE__CMD_MASK;
5917 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5918 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5919 WREG32(mmRLC_SAFE_MODE, data);
5920
5921 for (i = 0; i < adev->usec_timeout; i++) {
5922 if ((RREG32(mmRLC_GPM_STAT) &
5923 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5924 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5925 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5926 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5927 break;
5928 udelay(1);
5929 }
5930
5931 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5932 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5933 break;
5934 udelay(1);
5935 }
5936 adev->gfx.rlc.in_safe_mode = true;
5937 }
5938}
5939
5940static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5941{
5942 u32 data = 0;
5943 unsigned i;
5944
5945 data = RREG32(mmRLC_CNTL);
5946 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5947 return;
5948
5949 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5950 if (adev->gfx.rlc.in_safe_mode) {
5951 data |= RLC_SAFE_MODE__CMD_MASK;
5952 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5953 WREG32(mmRLC_SAFE_MODE, data);
5954 adev->gfx.rlc.in_safe_mode = false;
5955 }
5956 }
5957
5958 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5959 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5960 break;
5961 udelay(1);
5962 }
5963}
5964
dbff57bc
AD
5965static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5966 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5967 .exit_safe_mode = iceland_exit_rlc_safe_mode
5968};
5969
dbff57bc
AD
5970static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5971 bool enable)
6e378858
EH
5972{
5973 uint32_t temp, data;
5974
dbff57bc
AD
5975 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5976
6e378858 5977 /* It is disabled by HW by default */
14698b6c
AD
5978 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5979 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 5980 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 5981 /* 1 - RLC memory Light sleep */
61cb8cef 5982 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 5983
61cb8cef
TSD
5984 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5985 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 5986 }
6e378858
EH
5987
5988 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5989 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
5990 if (adev->flags & AMD_IS_APU)
5991 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5992 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5993 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5994 else
5995 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5996 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5997 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5998 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
5999
6000 if (temp != data)
6001 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6002
6003 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6004 gfx_v8_0_wait_for_rlc_serdes(adev);
6005
6006 /* 5 - clear mgcg override */
79deaaf4 6007 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 6008
14698b6c
AD
6009 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6010 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6011 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6012 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6013 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6014 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6015 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6016 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6017 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6018 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6019 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6020 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6021 if (temp != data)
6022 WREG32(mmCGTS_SM_CTRL_REG, data);
6023 }
6e378858
EH
6024 udelay(50);
6025
6026 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6027 gfx_v8_0_wait_for_rlc_serdes(adev);
6028 } else {
6029 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6030 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6031 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6032 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6033 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6034 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6035 if (temp != data)
6036 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6037
6038 /* 2 - disable MGLS in RLC */
6039 data = RREG32(mmRLC_MEM_SLP_CNTL);
6040 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6041 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6042 WREG32(mmRLC_MEM_SLP_CNTL, data);
6043 }
6044
6045 /* 3 - disable MGLS in CP */
6046 data = RREG32(mmCP_MEM_SLP_CNTL);
6047 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6048 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6049 WREG32(mmCP_MEM_SLP_CNTL, data);
6050 }
6051
6052 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6053 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6054 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6055 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6056 if (temp != data)
6057 WREG32(mmCGTS_SM_CTRL_REG, data);
6058
6059 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6060 gfx_v8_0_wait_for_rlc_serdes(adev);
6061
6062 /* 6 - set mgcg override */
79deaaf4 6063 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6064
6065 udelay(50);
6066
6067 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6068 gfx_v8_0_wait_for_rlc_serdes(adev);
6069 }
dbff57bc
AD
6070
6071 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
6072}
6073
dbff57bc
AD
6074static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6075 bool enable)
6e378858
EH
6076{
6077 uint32_t temp, temp1, data, data1;
6078
6079 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6080
dbff57bc
AD
6081 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6082
14698b6c 6083 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
6084 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6085 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6086 if (temp1 != data1)
6087 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6088
dd31ae9a 6089 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
6090 gfx_v8_0_wait_for_rlc_serdes(adev);
6091
dd31ae9a 6092 /* 2 - clear cgcg override */
79deaaf4 6093 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
6094
6095 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6096 gfx_v8_0_wait_for_rlc_serdes(adev);
6097
dd31ae9a 6098 /* 3 - write cmd to set CGLS */
79deaaf4 6099 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 6100
dd31ae9a 6101 /* 4 - enable cgcg */
6e378858
EH
6102 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6103
14698b6c
AD
6104 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6105 /* enable cgls*/
6106 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 6107
14698b6c
AD
6108 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6109 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 6110
14698b6c
AD
6111 if (temp1 != data1)
6112 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6113 } else {
6114 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6115 }
6e378858
EH
6116
6117 if (temp != data)
6118 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
6119
6120 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6121 * Cmp_busy/GFX_Idle interrupts
6122 */
6123 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
6124 } else {
6125 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6126 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6127
6128 /* TEST CGCG */
6129 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6130 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6131 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6132 if (temp1 != data1)
6133 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6134
6135 /* read gfx register to wake up cgcg */
6136 RREG32(mmCB_CGTT_SCLK_CTRL);
6137 RREG32(mmCB_CGTT_SCLK_CTRL);
6138 RREG32(mmCB_CGTT_SCLK_CTRL);
6139 RREG32(mmCB_CGTT_SCLK_CTRL);
6140
6141 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6142 gfx_v8_0_wait_for_rlc_serdes(adev);
6143
6144 /* write cmd to Set CGCG Overrride */
79deaaf4 6145 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6146
6147 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6148 gfx_v8_0_wait_for_rlc_serdes(adev);
6149
6150 /* write cmd to Clear CGLS */
79deaaf4 6151 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6152
6153 /* disable cgcg, cgls should be disabled too. */
6154 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6155 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6156 if (temp != data)
6157 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
d5dc36a4
AD
6158 /* enable interrupts again for PG */
6159 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858 6160 }
dbff57bc 6161
7894745a
TSD
6162 gfx_v8_0_wait_for_rlc_serdes(adev);
6163
dbff57bc 6164 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6165}
dbff57bc
AD
6166static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6167 bool enable)
6e378858
EH
6168{
6169 if (enable) {
6170 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6171 * === MGCG + MGLS + TS(CG/LS) ===
6172 */
dbff57bc
AD
6173 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6174 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6175 } else {
6176 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6177 * === CGCG + CGLS ===
6178 */
dbff57bc
AD
6179 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6180 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6181 }
6182 return 0;
6183}
6184
a8ca3413
RZ
6185static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6186 enum amd_clockgating_state state)
6187{
8a19e7fa
RZ
6188 uint32_t msg_id, pp_state = 0;
6189 uint32_t pp_support_state = 0;
a8ca3413
RZ
6190 void *pp_handle = adev->powerplay.pp_handle;
6191
8a19e7fa
RZ
6192 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6193 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6194 pp_support_state = PP_STATE_SUPPORT_LS;
6195 pp_state = PP_STATE_LS;
6196 }
6197 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6198 pp_support_state |= PP_STATE_SUPPORT_CG;
6199 pp_state |= PP_STATE_CG;
6200 }
6201 if (state == AMD_CG_STATE_UNGATE)
6202 pp_state = 0;
6203
6204 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6205 PP_BLOCK_GFX_CG,
6206 pp_support_state,
6207 pp_state);
6208 amd_set_clockgating_by_smu(pp_handle, msg_id);
6209 }
a8ca3413 6210
8a19e7fa
RZ
6211 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6212 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6213 pp_support_state = PP_STATE_SUPPORT_LS;
6214 pp_state = PP_STATE_LS;
6215 }
a8ca3413 6216
8a19e7fa
RZ
6217 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6218 pp_support_state |= PP_STATE_SUPPORT_CG;
6219 pp_state |= PP_STATE_CG;
6220 }
6221
6222 if (state == AMD_CG_STATE_UNGATE)
6223 pp_state = 0;
6224
6225 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6226 PP_BLOCK_GFX_MG,
6227 pp_support_state,
6228 pp_state);
6229 amd_set_clockgating_by_smu(pp_handle, msg_id);
6230 }
a8ca3413
RZ
6231
6232 return 0;
6233}
6234
6235static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6236 enum amd_clockgating_state state)
6237{
8a19e7fa
RZ
6238
6239 uint32_t msg_id, pp_state = 0;
6240 uint32_t pp_support_state = 0;
a8ca3413
RZ
6241 void *pp_handle = adev->powerplay.pp_handle;
6242
8a19e7fa
RZ
6243 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6244 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6245 pp_support_state = PP_STATE_SUPPORT_LS;
6246 pp_state = PP_STATE_LS;
6247 }
6248 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6249 pp_support_state |= PP_STATE_SUPPORT_CG;
6250 pp_state |= PP_STATE_CG;
6251 }
6252 if (state == AMD_CG_STATE_UNGATE)
6253 pp_state = 0;
6254
6255 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6256 PP_BLOCK_GFX_CG,
6257 pp_support_state,
6258 pp_state);
6259 amd_set_clockgating_by_smu(pp_handle, msg_id);
6260 }
a8ca3413 6261
8a19e7fa
RZ
6262 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6263 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6264 pp_support_state = PP_STATE_SUPPORT_LS;
6265 pp_state = PP_STATE_LS;
6266 }
6267 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6268 pp_support_state |= PP_STATE_SUPPORT_CG;
6269 pp_state |= PP_STATE_CG;
6270 }
6271 if (state == AMD_CG_STATE_UNGATE)
6272 pp_state = 0;
6273
6274 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6275 PP_BLOCK_GFX_3D,
6276 pp_support_state,
6277 pp_state);
6278 amd_set_clockgating_by_smu(pp_handle, msg_id);
6279 }
a8ca3413 6280
8a19e7fa
RZ
6281 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6282 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6283 pp_support_state = PP_STATE_SUPPORT_LS;
6284 pp_state = PP_STATE_LS;
6285 }
a8ca3413 6286
8a19e7fa
RZ
6287 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6288 pp_support_state |= PP_STATE_SUPPORT_CG;
6289 pp_state |= PP_STATE_CG;
6290 }
a8ca3413 6291
8a19e7fa
RZ
6292 if (state == AMD_CG_STATE_UNGATE)
6293 pp_state = 0;
a8ca3413 6294
8a19e7fa
RZ
6295 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6296 PP_BLOCK_GFX_MG,
6297 pp_support_state,
6298 pp_state);
6299 amd_set_clockgating_by_smu(pp_handle, msg_id);
6300 }
6301
6302 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6303 pp_support_state = PP_STATE_SUPPORT_LS;
6304
6305 if (state == AMD_CG_STATE_UNGATE)
6306 pp_state = 0;
6307 else
6308 pp_state = PP_STATE_LS;
6309
6310 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6311 PP_BLOCK_GFX_RLC,
6312 pp_support_state,
6313 pp_state);
6314 amd_set_clockgating_by_smu(pp_handle, msg_id);
6315 }
6316
6317 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6318 pp_support_state = PP_STATE_SUPPORT_LS;
6319
6320 if (state == AMD_CG_STATE_UNGATE)
6321 pp_state = 0;
6322 else
6323 pp_state = PP_STATE_LS;
6324 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6325 PP_BLOCK_GFX_CP,
8a19e7fa 6326 pp_support_state,
a8ca3413 6327 pp_state);
8a19e7fa
RZ
6328 amd_set_clockgating_by_smu(pp_handle, msg_id);
6329 }
a8ca3413
RZ
6330
6331 return 0;
6332}
6333
5fc3aeeb 6334static int gfx_v8_0_set_clockgating_state(void *handle,
6335 enum amd_clockgating_state state)
aaa36a97 6336{
6e378858
EH
6337 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6338
ce137c04
ML
6339 if (amdgpu_sriov_vf(adev))
6340 return 0;
6341
6e378858
EH
6342 switch (adev->asic_type) {
6343 case CHIP_FIJI:
dbff57bc
AD
6344 case CHIP_CARRIZO:
6345 case CHIP_STONEY:
6346 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6347 state == AMD_CG_STATE_GATE);
6e378858 6348 break;
a8ca3413
RZ
6349 case CHIP_TONGA:
6350 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6351 break;
6352 case CHIP_POLARIS10:
6353 case CHIP_POLARIS11:
739e9fff 6354 case CHIP_POLARIS12:
a8ca3413
RZ
6355 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6356 break;
6e378858
EH
6357 default:
6358 break;
6359 }
aaa36a97
AD
6360 return 0;
6361}
6362
536fbf94 6363static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6364{
5003f278 6365 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6366}
6367
536fbf94 6368static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6369{
6370 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6371
6372 if (ring->use_doorbell)
6373 /* XXX check if swapping is necessary on BE */
5003f278 6374 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6375 else
5003f278 6376 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6377}
6378
6379static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6380{
6381 struct amdgpu_device *adev = ring->adev;
6382
6383 if (ring->use_doorbell) {
6384 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6385 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6386 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6387 } else {
536fbf94 6388 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6389 (void)RREG32(mmCP_RB0_WPTR);
6390 }
6391}
6392
d2edb07b 6393static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6394{
6395 u32 ref_and_mask, reg_mem_engine;
6396
4e638ae9
XY
6397 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6398 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6399 switch (ring->me) {
6400 case 1:
6401 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6402 break;
6403 case 2:
6404 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6405 break;
6406 default:
6407 return;
6408 }
6409 reg_mem_engine = 0;
6410 } else {
6411 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6412 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6413 }
6414
6415 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6416 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6417 WAIT_REG_MEM_FUNCTION(3) | /* == */
6418 reg_mem_engine));
6419 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6420 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6421 amdgpu_ring_write(ring, ref_and_mask);
6422 amdgpu_ring_write(ring, ref_and_mask);
6423 amdgpu_ring_write(ring, 0x20); /* poll interval */
6424}
6425
45682886
ML
6426static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6427{
6428 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6429 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6430 EVENT_INDEX(4));
6431
6432 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6433 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6434 EVENT_INDEX(0));
6435}
6436
6437
d35db561
CZ
6438static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6439{
6440 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6441 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6442 WRITE_DATA_DST_SEL(0) |
6443 WR_CONFIRM));
6444 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6445 amdgpu_ring_write(ring, 0);
6446 amdgpu_ring_write(ring, 1);
6447
6448}
6449
93323131 6450static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
6451 struct amdgpu_ib *ib,
6452 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
6453{
6454 u32 header, control = 0;
aaa36a97 6455
de807f81 6456 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6457 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6458 else
6459 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6460
d88bf583 6461 control |= ib->length_dw | (vm_id << 24);
aaa36a97 6462
635e7132 6463 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
2e2e3c7f
ML
6464 control |= INDIRECT_BUFFER_PRE_ENB(1);
6465
635e7132
ML
6466 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6467 gfx_v8_0_ring_emit_de_meta(ring);
6468 }
6469
aaa36a97
AD
6470 amdgpu_ring_write(ring, header);
6471 amdgpu_ring_write(ring,
6472#ifdef __BIG_ENDIAN
6473 (2 << 0) |
6474#endif
6475 (ib->gpu_addr & 0xFFFFFFFC));
6476 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6477 amdgpu_ring_write(ring, control);
6478}
6479
93323131 6480static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
6481 struct amdgpu_ib *ib,
6482 unsigned vm_id, bool ctx_switch)
93323131 6483{
33b7ed01 6484 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 6485
33b7ed01 6486 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6487 amdgpu_ring_write(ring,
6488#ifdef __BIG_ENDIAN
62d2ce4b 6489 (2 << 0) |
93323131 6490#endif
62d2ce4b 6491 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6492 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6493 amdgpu_ring_write(ring, control);
6494}
6495
aaa36a97 6496static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6497 u64 seq, unsigned flags)
aaa36a97 6498{
890ee23f
CZ
6499 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6500 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6501
aaa36a97
AD
6502 /* EVENT_WRITE_EOP - flush caches, send int */
6503 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6504 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6505 EOP_TC_ACTION_EN |
f84e63f2 6506 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6507 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6508 EVENT_INDEX(5)));
6509 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6510 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6511 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6512 amdgpu_ring_write(ring, lower_32_bits(seq));
6513 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6514
aaa36a97
AD
6515}
6516
b8c7b39e 6517static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6518{
21cd942e 6519 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6520 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6521 uint64_t addr = ring->fence_drv.gpu_addr;
6522
6523 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6524 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6525 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6526 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6527 amdgpu_ring_write(ring, addr & 0xfffffffc);
6528 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6529 amdgpu_ring_write(ring, seq);
6530 amdgpu_ring_write(ring, 0xffffffff);
6531 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6532}
6533
6534static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6535 unsigned vm_id, uint64_t pd_addr)
6536{
21cd942e 6537 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6538
aaa36a97
AD
6539 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6540 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6541 WRITE_DATA_DST_SEL(0)) |
6542 WR_CONFIRM);
aaa36a97
AD
6543 if (vm_id < 8) {
6544 amdgpu_ring_write(ring,
6545 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6546 } else {
6547 amdgpu_ring_write(ring,
6548 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6549 }
6550 amdgpu_ring_write(ring, 0);
6551 amdgpu_ring_write(ring, pd_addr >> 12);
6552
aaa36a97
AD
6553 /* bits 0-15 are the VM contexts0-15 */
6554 /* invalidate the cache */
6555 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6556 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6557 WRITE_DATA_DST_SEL(0)));
6558 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6559 amdgpu_ring_write(ring, 0);
6560 amdgpu_ring_write(ring, 1 << vm_id);
6561
6562 /* wait for the invalidate to complete */
6563 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6564 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6565 WAIT_REG_MEM_FUNCTION(0) | /* always */
6566 WAIT_REG_MEM_ENGINE(0))); /* me */
6567 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6568 amdgpu_ring_write(ring, 0);
6569 amdgpu_ring_write(ring, 0); /* ref */
6570 amdgpu_ring_write(ring, 0); /* mask */
6571 amdgpu_ring_write(ring, 0x20); /* poll interval */
6572
6573 /* compute doesn't have PFP */
6574 if (usepfp) {
6575 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6576 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6577 amdgpu_ring_write(ring, 0x0);
aaa36a97
AD
6578 }
6579}
6580
536fbf94 6581static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6582{
6583 return ring->adev->wb.wb[ring->wptr_offs];
6584}
6585
6586static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6587{
6588 struct amdgpu_device *adev = ring->adev;
6589
6590 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6591 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6592 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6593}
6594
6595static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6596 u64 addr, u64 seq,
890ee23f 6597 unsigned flags)
aaa36a97 6598{
890ee23f
CZ
6599 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6600 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6601
aaa36a97
AD
6602 /* RELEASE_MEM - flush caches, send int */
6603 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6604 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6605 EOP_TC_ACTION_EN |
a3d5aaa8 6606 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6607 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6608 EVENT_INDEX(5)));
890ee23f 6609 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6610 amdgpu_ring_write(ring, addr & 0xfffffffc);
6611 amdgpu_ring_write(ring, upper_32_bits(addr));
6612 amdgpu_ring_write(ring, lower_32_bits(seq));
6613 amdgpu_ring_write(ring, upper_32_bits(seq));
6614}
6615
4e638ae9
XY
6616static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6617 u64 seq, unsigned int flags)
6618{
6619 /* we only allocate 32bit for each seq wb address */
f10b478d 6620 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6621
6622 /* write fence seq to the "addr" */
6623 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6624 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6625 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6626 amdgpu_ring_write(ring, lower_32_bits(addr));
6627 amdgpu_ring_write(ring, upper_32_bits(addr));
6628 amdgpu_ring_write(ring, lower_32_bits(seq));
6629
6630 if (flags & AMDGPU_FENCE_FLAG_INT) {
6631 /* set register to trigger INT */
6632 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6633 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6634 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6635 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6636 amdgpu_ring_write(ring, 0);
6637 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6638 }
6639}
6640
c2167a65
ML
6641static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6642{
6643 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6644 amdgpu_ring_write(ring, 0);
6645}
6646
753ad49c
ML
6647static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6648{
6649 uint32_t dw2 = 0;
6650
c2ce92fc 6651 if (amdgpu_sriov_vf(ring->adev))
95243543 6652 gfx_v8_0_ring_emit_ce_meta(ring);
c2ce92fc 6653
753ad49c
ML
6654 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6655 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6656 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6657 /* set load_global_config & load_global_uconfig */
6658 dw2 |= 0x8001;
6659 /* set load_cs_sh_regs */
6660 dw2 |= 0x01000000;
6661 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6662 dw2 |= 0x10002;
6663
6664 /* set load_ce_ram if preamble presented */
6665 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6666 dw2 |= 0x10000000;
6667 } else {
6668 /* still load_ce_ram if this is the first time preamble presented
6669 * although there is no context switch happens.
6670 */
6671 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6672 dw2 |= 0x10000000;
6673 }
6674
6675 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6676 amdgpu_ring_write(ring, dw2);
6677 amdgpu_ring_write(ring, 0);
6678}
6679
806ba2d4
ML
6680static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6681{
6682 unsigned ret;
6683
6684 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6685 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6686 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6687 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6688 ret = ring->wptr & ring->buf_mask;
6689 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6690 return ret;
6691}
6692
6693static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6694{
6695 unsigned cur;
6696
6697 BUG_ON(offset > ring->buf_mask);
6698 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6699
6700 cur = (ring->wptr & ring->buf_mask) - 1;
6701 if (likely(cur > offset))
6702 ring->ring[offset] = cur - offset;
6703 else
6704 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6705}
6706
880e87e3
XY
6707static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6708{
6709 struct amdgpu_device *adev = ring->adev;
6710
6711 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6712 amdgpu_ring_write(ring, 0 | /* src: register*/
6713 (5 << 8) | /* dst: memory */
6714 (1 << 20)); /* write confirm */
6715 amdgpu_ring_write(ring, reg);
6716 amdgpu_ring_write(ring, 0);
6717 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6718 adev->virt.reg_val_offs * 4));
6719 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6720 adev->virt.reg_val_offs * 4));
6721}
6722
6723static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6724 uint32_t val)
6725{
6726 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6727 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6728 amdgpu_ring_write(ring, reg);
6729 amdgpu_ring_write(ring, 0);
6730 amdgpu_ring_write(ring, val);
6731}
6732
aaa36a97
AD
6733static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6734 enum amdgpu_interrupt_state state)
6735{
61cb8cef
TSD
6736 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6737 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6738}
6739
6740static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6741 int me, int pipe,
6742 enum amdgpu_interrupt_state state)
6743{
aaa36a97
AD
6744 /*
6745 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6746 * handles the setting of interrupts for this specific pipe. All other
6747 * pipes' interrupts are set by amdkfd.
6748 */
6749
6750 if (me == 1) {
6751 switch (pipe) {
6752 case 0:
aaa36a97
AD
6753 break;
6754 default:
6755 DRM_DEBUG("invalid pipe %d\n", pipe);
6756 return;
6757 }
6758 } else {
6759 DRM_DEBUG("invalid me %d\n", me);
6760 return;
6761 }
6762
61cb8cef
TSD
6763 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6764 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6765}
6766
6767static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6768 struct amdgpu_irq_src *source,
6769 unsigned type,
6770 enum amdgpu_interrupt_state state)
6771{
61cb8cef
TSD
6772 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6773 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6774
6775 return 0;
6776}
6777
6778static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6779 struct amdgpu_irq_src *source,
6780 unsigned type,
6781 enum amdgpu_interrupt_state state)
6782{
61cb8cef
TSD
6783 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6784 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6785
6786 return 0;
6787}
6788
6789static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6790 struct amdgpu_irq_src *src,
6791 unsigned type,
6792 enum amdgpu_interrupt_state state)
6793{
6794 switch (type) {
6795 case AMDGPU_CP_IRQ_GFX_EOP:
6796 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6797 break;
6798 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6799 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6800 break;
6801 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6802 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6803 break;
6804 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6805 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6806 break;
6807 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6808 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6809 break;
6810 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6811 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6812 break;
6813 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6814 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6815 break;
6816 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6817 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6818 break;
6819 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6820 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6821 break;
6822 default:
6823 break;
6824 }
6825 return 0;
6826}
6827
6828static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6829 struct amdgpu_irq_src *source,
6830 struct amdgpu_iv_entry *entry)
6831{
6832 int i;
6833 u8 me_id, pipe_id, queue_id;
6834 struct amdgpu_ring *ring;
6835
6836 DRM_DEBUG("IH: CP EOP\n");
6837 me_id = (entry->ring_id & 0x0c) >> 2;
6838 pipe_id = (entry->ring_id & 0x03) >> 0;
6839 queue_id = (entry->ring_id & 0x70) >> 4;
6840
6841 switch (me_id) {
6842 case 0:
6843 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6844 break;
6845 case 1:
6846 case 2:
6847 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6848 ring = &adev->gfx.compute_ring[i];
6849 /* Per-queue interrupt is supported for MEC starting from VI.
6850 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6851 */
6852 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6853 amdgpu_fence_process(ring);
6854 }
6855 break;
6856 }
6857 return 0;
6858}
6859
6860static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6861 struct amdgpu_irq_src *source,
6862 struct amdgpu_iv_entry *entry)
6863{
6864 DRM_ERROR("Illegal register access in command stream\n");
6865 schedule_work(&adev->reset_work);
6866 return 0;
6867}
6868
6869static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6870 struct amdgpu_irq_src *source,
6871 struct amdgpu_iv_entry *entry)
6872{
6873 DRM_ERROR("Illegal instruction in command stream\n");
6874 schedule_work(&adev->reset_work);
6875 return 0;
6876}
6877
4e638ae9
XY
6878static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6879 struct amdgpu_irq_src *src,
6880 unsigned int type,
6881 enum amdgpu_interrupt_state state)
6882{
07c397f9 6883 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6884
4e638ae9
XY
6885 switch (type) {
6886 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
ccaf3574
TSD
6887 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6888 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6889 if (ring->me == 1)
6890 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6891 ring->pipe,
6892 GENERIC2_INT_ENABLE,
6893 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6894 else
6895 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6896 ring->pipe,
6897 GENERIC2_INT_ENABLE,
6898 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
4e638ae9
XY
6899 break;
6900 default:
6901 BUG(); /* kiq only support GENERIC2_INT now */
6902 break;
6903 }
6904 return 0;
6905}
6906
6907static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6908 struct amdgpu_irq_src *source,
6909 struct amdgpu_iv_entry *entry)
6910{
6911 u8 me_id, pipe_id, queue_id;
07c397f9 6912 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6913
4e638ae9
XY
6914 me_id = (entry->ring_id & 0x0c) >> 2;
6915 pipe_id = (entry->ring_id & 0x03) >> 0;
6916 queue_id = (entry->ring_id & 0x70) >> 4;
6917 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6918 me_id, pipe_id, queue_id);
6919
6920 amdgpu_fence_process(ring);
6921 return 0;
6922}
6923
a1255107 6924static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 6925 .name = "gfx_v8_0",
aaa36a97 6926 .early_init = gfx_v8_0_early_init,
ccba7691 6927 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
6928 .sw_init = gfx_v8_0_sw_init,
6929 .sw_fini = gfx_v8_0_sw_fini,
6930 .hw_init = gfx_v8_0_hw_init,
6931 .hw_fini = gfx_v8_0_hw_fini,
6932 .suspend = gfx_v8_0_suspend,
6933 .resume = gfx_v8_0_resume,
6934 .is_idle = gfx_v8_0_is_idle,
6935 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 6936 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 6937 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 6938 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 6939 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
6940 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6941 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 6942 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
6943};
6944
6945static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 6946 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
6947 .align_mask = 0xff,
6948 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6949 .support_64bit_ptrs = false,
e7706b42 6950 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6951 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6952 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e9d672b2
ML
6953 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6954 5 + /* COND_EXEC */
6955 7 + /* PIPELINE_SYNC */
6956 19 + /* VM_FLUSH */
6957 8 + /* FENCE for VM_FLUSH */
6958 20 + /* GDS switch */
6959 4 + /* double SWITCH_BUFFER,
6960 the first COND_EXEC jump to the place just
6961 prior to this double SWITCH_BUFFER */
6962 5 + /* COND_EXEC */
6963 7 + /* HDP_flush */
6964 4 + /* VGT_flush */
6965 14 + /* CE_META */
6966 31 + /* DE_META */
6967 3 + /* CNTX_CTRL */
6968 5 + /* HDP_INVL */
6969 8 + 8 + /* FENCE x2 */
6970 2, /* SWITCH_BUFFER */
e12f3d7a 6971 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 6972 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 6973 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 6974 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6975 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6976 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 6977 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6978 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6979 .test_ring = gfx_v8_0_ring_test_ring,
6980 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6981 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6982 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 6983 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 6984 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
806ba2d4
ML
6985 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6986 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
aaa36a97
AD
6987};
6988
6989static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 6990 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
6991 .align_mask = 0xff,
6992 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6993 .support_64bit_ptrs = false,
e7706b42 6994 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6995 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6996 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
6997 .emit_frame_size =
6998 20 + /* gfx_v8_0_ring_emit_gds_switch */
6999 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7000 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7001 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7002 17 + /* gfx_v8_0_ring_emit_vm_flush */
7003 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7004 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 7005 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 7006 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 7007 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7008 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7009 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 7010 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7011 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7012 .test_ring = gfx_v8_0_ring_test_ring,
7013 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7014 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7015 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
7016};
7017
4e638ae9
XY
7018static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7019 .type = AMDGPU_RING_TYPE_KIQ,
7020 .align_mask = 0xff,
7021 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7022 .support_64bit_ptrs = false,
4e638ae9
XY
7023 .get_rptr = gfx_v8_0_ring_get_rptr,
7024 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7025 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7026 .emit_frame_size =
7027 20 + /* gfx_v8_0_ring_emit_gds_switch */
7028 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7029 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7030 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7031 17 + /* gfx_v8_0_ring_emit_vm_flush */
7032 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7033 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7034 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7035 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
4e638ae9
XY
7036 .test_ring = gfx_v8_0_ring_test_ring,
7037 .test_ib = gfx_v8_0_ring_test_ib,
7038 .insert_nop = amdgpu_ring_insert_nop,
7039 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
7040 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7041 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7042};
7043
aaa36a97
AD
7044static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7045{
7046 int i;
7047
4e638ae9
XY
7048 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7049
aaa36a97
AD
7050 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7051 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7052
7053 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7054 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7055}
7056
7057static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7058 .set = gfx_v8_0_set_eop_interrupt_state,
7059 .process = gfx_v8_0_eop_irq,
7060};
7061
7062static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7063 .set = gfx_v8_0_set_priv_reg_fault_state,
7064 .process = gfx_v8_0_priv_reg_irq,
7065};
7066
7067static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7068 .set = gfx_v8_0_set_priv_inst_fault_state,
7069 .process = gfx_v8_0_priv_inst_irq,
7070};
7071
4e638ae9
XY
7072static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7073 .set = gfx_v8_0_kiq_set_interrupt_state,
7074 .process = gfx_v8_0_kiq_irq,
7075};
7076
aaa36a97
AD
7077static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7078{
7079 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7080 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7081
7082 adev->gfx.priv_reg_irq.num_types = 1;
7083 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7084
7085 adev->gfx.priv_inst_irq.num_types = 1;
7086 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7087
7088 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7089 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
7090}
7091
dbff57bc
AD
7092static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7093{
ae6a58e4 7094 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7095}
7096
aaa36a97
AD
7097static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7098{
7099 /* init asci gds info */
7100 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7101 adev->gds.gws.total_size = 64;
7102 adev->gds.oa.total_size = 16;
7103
7104 if (adev->gds.mem.total_size == 64 * 1024) {
7105 adev->gds.mem.gfx_partition_size = 4096;
7106 adev->gds.mem.cs_partition_size = 4096;
7107
7108 adev->gds.gws.gfx_partition_size = 4;
7109 adev->gds.gws.cs_partition_size = 4;
7110
7111 adev->gds.oa.gfx_partition_size = 4;
7112 adev->gds.oa.cs_partition_size = 1;
7113 } else {
7114 adev->gds.mem.gfx_partition_size = 1024;
7115 adev->gds.mem.cs_partition_size = 1024;
7116
7117 adev->gds.gws.gfx_partition_size = 16;
7118 adev->gds.gws.cs_partition_size = 16;
7119
7120 adev->gds.oa.gfx_partition_size = 4;
7121 adev->gds.oa.cs_partition_size = 4;
7122 }
7123}
7124
9de06de8
NH
7125static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7126 u32 bitmap)
7127{
7128 u32 data;
7129
7130 if (!bitmap)
7131 return;
7132
7133 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7134 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7135
7136 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7137}
7138
8f8e00c1 7139static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7140{
8f8e00c1 7141 u32 data, mask;
aaa36a97 7142
5003f278
TSD
7143 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7144 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7145
6157bd7a 7146 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7147
5003f278 7148 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7149}
7150
7dae69a2 7151static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7152{
7153 int i, j, k, counter, active_cu_number = 0;
7154 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7155 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7156 unsigned disable_masks[4 * 2];
fe723cd3 7157 u32 ao_cu_num;
aaa36a97 7158
6157bd7a
FC
7159 memset(cu_info, 0, sizeof(*cu_info));
7160
fe723cd3
RZ
7161 if (adev->flags & AMD_IS_APU)
7162 ao_cu_num = 2;
7163 else
7164 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7165
9de06de8
NH
7166 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7167
aaa36a97
AD
7168 mutex_lock(&adev->grbm_idx_mutex);
7169 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7170 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7171 mask = 1;
7172 ao_bitmap = 0;
7173 counter = 0;
9559ef5b 7174 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7175 if (i < 4 && j < 2)
7176 gfx_v8_0_set_user_cu_inactive_bitmap(
7177 adev, disable_masks[i * 2 + j]);
8f8e00c1 7178 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7179 cu_info->bitmap[i][j] = bitmap;
7180
fe723cd3 7181 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
aaa36a97 7182 if (bitmap & mask) {
fe723cd3 7183 if (counter < ao_cu_num)
aaa36a97
AD
7184 ao_bitmap |= mask;
7185 counter ++;
7186 }
7187 mask <<= 1;
7188 }
7189 active_cu_number += counter;
7190 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7191 }
7192 }
9559ef5b 7193 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7194 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7195
7196 cu_info->number = active_cu_number;
7197 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 7198}
a1255107
AD
7199
7200const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7201{
7202 .type = AMD_IP_BLOCK_TYPE_GFX,
7203 .major = 8,
7204 .minor = 0,
7205 .rev = 0,
7206 .funcs = &gfx_v8_0_ip_funcs,
7207};
7208
7209const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7210{
7211 .type = AMD_IP_BLOCK_TYPE_GFX,
7212 .major = 8,
7213 .minor = 1,
7214 .rev = 0,
7215 .funcs = &gfx_v8_0_ip_funcs,
7216};
acad2b2a 7217
95243543 7218static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
acad2b2a
ML
7219{
7220 uint64_t ce_payload_addr;
7221 int cnt_ce;
7222 static union {
49abb980
XY
7223 struct vi_ce_ib_state regular;
7224 struct vi_ce_ib_state_chained_ib chained;
e8411302 7225 } ce_payload = {};
acad2b2a
ML
7226
7227 if (ring->adev->virt.chained_ib_support) {
95243543
ML
7228 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7229 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7230 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7231 } else {
95243543
ML
7232 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7233 offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7234 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7235 }
7236
7237 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7238 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7239 WRITE_DATA_DST_SEL(8) |
7240 WR_CONFIRM) |
7241 WRITE_DATA_CACHE_POLICY(0));
7242 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7243 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7244 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7245}
7246
95243543 7247static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
acad2b2a 7248{
95243543 7249 uint64_t de_payload_addr, gds_addr, csa_addr;
acad2b2a
ML
7250 int cnt_de;
7251 static union {
49abb980
XY
7252 struct vi_de_ib_state regular;
7253 struct vi_de_ib_state_chained_ib chained;
e8411302 7254 } de_payload = {};
acad2b2a 7255
95243543 7256 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
acad2b2a
ML
7257 gds_addr = csa_addr + 4096;
7258 if (ring->adev->virt.chained_ib_support) {
7259 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7260 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7261 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7262 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7263 } else {
7264 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7265 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7266 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7267 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7268 }
7269
7270 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7271 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7272 WRITE_DATA_DST_SEL(8) |
7273 WR_CONFIRM) |
7274 WRITE_DATA_CACHE_POLICY(0));
7275 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7276 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7277 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7278}
5ff98043
ML
7279
7280/* create MQD for each compute queue */
0875a242 7281static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
5ff98043
ML
7282{
7283 struct amdgpu_ring *ring = NULL;
7284 int r, i;
7285
7286 /* create MQD for KIQ */
7287 ring = &adev->gfx.kiq.ring;
7288 if (!ring->mqd_obj) {
7289 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7290 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7291 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7292 if (r) {
7293 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7294 return r;
7295 }
9b49c3ab
ML
7296
7297 /* prepare MQD backup */
7298 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7299 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7300 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7301 }
7302
7303 /* create MQD for each KCQ */
b0ac2a32 7304 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5ff98043
ML
7305 ring = &adev->gfx.compute_ring[i];
7306 if (!ring->mqd_obj) {
7307 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7308 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7309 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7310 if (r) {
7311 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7312 return r;
7313 }
9b49c3ab
ML
7314
7315 /* prepare MQD backup */
7316 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7317 if (!adev->gfx.mec.mqd_backup[i])
7318 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7319 }
7320 }
7321
7322 return 0;
7323}
7324
0875a242 7325static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
5ff98043
ML
7326{
7327 struct amdgpu_ring *ring = NULL;
7328 int i;
7329
7330 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7331 ring = &adev->gfx.compute_ring[i];
24de7515 7332 kfree(adev->gfx.mec.mqd_backup[i]);
59a82d7d
XY
7333 amdgpu_bo_free_kernel(&ring->mqd_obj,
7334 &ring->mqd_gpu_addr,
7335 &ring->mqd_ptr);
5ff98043
ML
7336 }
7337
7338 ring = &adev->gfx.kiq.ring;
24de7515 7339 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
59a82d7d
XY
7340 amdgpu_bo_free_kernel(&ring->mqd_obj,
7341 &ring->mqd_gpu_addr,
7342 &ring->mqd_ptr);
7343}