]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: add amdgpu_vm_entries_mask v2
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
c1b24a14 23#include <linux/kernel.h>
aaa36a97 24#include <linux/firmware.h>
248a1d6f 25#include <drm/drmP.h>
aaa36a97
AD
26#include "amdgpu.h"
27#include "amdgpu_gfx.h"
28#include "vi.h"
aeab2032 29#include "vi_structs.h"
aaa36a97
AD
30#include "vid.h"
31#include "amdgpu_ucode.h"
68182d90 32#include "amdgpu_atombios.h"
eeade25a 33#include "atombios_i2c.h"
aaa36a97
AD
34#include "clearstate_vi.h"
35
36#include "gmc/gmc_8_2_d.h"
37#include "gmc/gmc_8_2_sh_mask.h"
38
39#include "oss/oss_3_0_d.h"
40#include "oss/oss_3_0_sh_mask.h"
41
42#include "bif/bif_5_0_d.h"
43#include "bif/bif_5_0_sh_mask.h"
aaa36a97
AD
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
091aec0b
AG
54#include "ivsrcid/ivsrcid_vislands30.h"
55
aaa36a97 56#define GFX8_NUM_GFX_RINGS 1
268cb4c7 57#define GFX8_MEC_HPD_SIZE 2048
aaa36a97
AD
58
59#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 61#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
62#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
6e378858
EH
74#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
75#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
76#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
77#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
78#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
79#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80
81/* BPM SERDES CMD */
82#define SET_BPM_SERDES_CMD 1
83#define CLE_BPM_SERDES_CMD 0
84
85/* BPM Register Address*/
86enum {
87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 BPM_REG_FGCG_MAX
93};
94
2b6cd977
EH
95#define RLC_FormatDirectRegListLength 14
96
c65444fe
JZ
97MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
103
e3c7656c
SL
104MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
109
c65444fe
JZ
110MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
116
117MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 121MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 122
af15a2d5
DZ
123MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
129
2cc0c0b5 130MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
f5830465 131MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
2cc0c0b5 132MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
f5830465 133MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
2cc0c0b5 134MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
f5830465 135MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
2cc0c0b5 136MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
f5830465 137MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
2cc0c0b5 138MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
f5830465 139MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
2cc0c0b5 140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 141
62aac201
LL
142MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
153
c4642a47 154MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
f5830465 155MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
c4642a47 156MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
f5830465 157MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
c4642a47 158MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
f5830465 159MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
c4642a47 160MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
f5830465 161MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
c4642a47 162MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
f5830465 163MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
c4642a47
JZ
164MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
165
62aac201
LL
166MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
172
aaa36a97
AD
173static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
174{
175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191};
192
193static const u32 golden_settings_tonga_a11[] =
194{
195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198 mmGB_GPU_ID, 0x0000000f, 0x00000000,
199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
211};
212
213static const u32 tonga_golden_common_all[] =
214{
215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
223};
224
225static const u32 tonga_mgcg_cgcg_init[] =
226{
227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302};
303
aefbbd6c
LL
304static const u32 golden_settings_vegam_a11[] =
305{
306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316 mmSQ_CONFIG, 0x07f80000, 0x01180000,
317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323};
324
325static const u32 vegam_golden_common_all[] =
326{
327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333};
334
2cc0c0b5 335static const u32 golden_settings_polaris11_a11[] =
68182d90 336{
9761bc53
HR
337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 347 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
354};
355
2cc0c0b5 356static const u32 polaris11_golden_common_all[] =
68182d90
FC
357{
358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
68182d90
FC
364};
365
2cc0c0b5 366static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
367{
368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379 mmSQ_CONFIG, 0x07f80000, 0x07180000,
380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
385};
386
2cc0c0b5 387static const u32 polaris10_golden_common_all[] =
68182d90
FC
388{
389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
68182d90
FC
397};
398
af15a2d5
DZ
399static const u32 fiji_golden_common_all[] =
400{
401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
a7ca8ef9
FC
409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
411};
412
413static const u32 golden_settings_fiji_a10[] =
414{
415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
422 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
426};
427
428static const u32 fiji_mgcg_cgcg_init[] =
429{
a7ca8ef9 430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465};
466
aaa36a97
AD
467static const u32 golden_settings_iceland_a11[] =
468{
469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472 mmGB_GPU_ID, 0x0000000f, 0x00000000,
473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485};
486
487static const u32 iceland_golden_common_all[] =
488{
489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
497};
498
499static const u32 iceland_mgcg_cgcg_init[] =
500{
501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565};
566
567static const u32 cz_golden_settings_a11[] =
568{
569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571 mmGB_GPU_ID, 0x0000000f, 0x00000000,
572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 577 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581};
582
583static const u32 cz_golden_common_all[] =
584{
585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
593};
594
595static const u32 cz_mgcg_cgcg_init[] =
596{
597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672};
673
e3c7656c
SL
674static const u32 stoney_golden_settings_a11[] =
675{
676 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677 mmGB_GPU_ID, 0x0000000f, 0x00000000,
678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 682 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686};
687
688static const u32 stoney_golden_common_all[] =
689{
690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
e3c7656c
SL
698};
699
700static const u32 stoney_mgcg_cgcg_init[] =
701{
702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
707};
708
9bdc2092
AG
709
710static const char * const sq_edc_source_names[] = {
711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718};
719
aaa36a97
AD
720static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 723static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 724static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 725static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
95243543
ML
726static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
aaa36a97
AD
728
729static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
730{
731 switch (adev->asic_type) {
732 case CHIP_TOPAZ:
9c3f2b54
AD
733 amdgpu_device_program_register_sequence(adev,
734 iceland_mgcg_cgcg_init,
735 ARRAY_SIZE(iceland_mgcg_cgcg_init));
736 amdgpu_device_program_register_sequence(adev,
737 golden_settings_iceland_a11,
738 ARRAY_SIZE(golden_settings_iceland_a11));
739 amdgpu_device_program_register_sequence(adev,
740 iceland_golden_common_all,
741 ARRAY_SIZE(iceland_golden_common_all));
aaa36a97 742 break;
af15a2d5 743 case CHIP_FIJI:
9c3f2b54
AD
744 amdgpu_device_program_register_sequence(adev,
745 fiji_mgcg_cgcg_init,
746 ARRAY_SIZE(fiji_mgcg_cgcg_init));
747 amdgpu_device_program_register_sequence(adev,
748 golden_settings_fiji_a10,
749 ARRAY_SIZE(golden_settings_fiji_a10));
750 amdgpu_device_program_register_sequence(adev,
751 fiji_golden_common_all,
752 ARRAY_SIZE(fiji_golden_common_all));
af15a2d5
DZ
753 break;
754
aaa36a97 755 case CHIP_TONGA:
9c3f2b54
AD
756 amdgpu_device_program_register_sequence(adev,
757 tonga_mgcg_cgcg_init,
758 ARRAY_SIZE(tonga_mgcg_cgcg_init));
759 amdgpu_device_program_register_sequence(adev,
760 golden_settings_tonga_a11,
761 ARRAY_SIZE(golden_settings_tonga_a11));
762 amdgpu_device_program_register_sequence(adev,
763 tonga_golden_common_all,
764 ARRAY_SIZE(tonga_golden_common_all));
aaa36a97 765 break;
aefbbd6c
LL
766 case CHIP_VEGAM:
767 amdgpu_device_program_register_sequence(adev,
768 golden_settings_vegam_a11,
769 ARRAY_SIZE(golden_settings_vegam_a11));
770 amdgpu_device_program_register_sequence(adev,
771 vegam_golden_common_all,
772 ARRAY_SIZE(vegam_golden_common_all));
773 break;
2cc0c0b5 774 case CHIP_POLARIS11:
c4642a47 775 case CHIP_POLARIS12:
9c3f2b54
AD
776 amdgpu_device_program_register_sequence(adev,
777 golden_settings_polaris11_a11,
778 ARRAY_SIZE(golden_settings_polaris11_a11));
779 amdgpu_device_program_register_sequence(adev,
780 polaris11_golden_common_all,
781 ARRAY_SIZE(polaris11_golden_common_all));
68182d90 782 break;
2cc0c0b5 783 case CHIP_POLARIS10:
9c3f2b54
AD
784 amdgpu_device_program_register_sequence(adev,
785 golden_settings_polaris10_a11,
786 ARRAY_SIZE(golden_settings_polaris10_a11));
787 amdgpu_device_program_register_sequence(adev,
788 polaris10_golden_common_all,
789 ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
791 if (adev->pdev->revision == 0xc7 &&
792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
797 }
68182d90 798 break;
aaa36a97 799 case CHIP_CARRIZO:
9c3f2b54
AD
800 amdgpu_device_program_register_sequence(adev,
801 cz_mgcg_cgcg_init,
802 ARRAY_SIZE(cz_mgcg_cgcg_init));
803 amdgpu_device_program_register_sequence(adev,
804 cz_golden_settings_a11,
805 ARRAY_SIZE(cz_golden_settings_a11));
806 amdgpu_device_program_register_sequence(adev,
807 cz_golden_common_all,
808 ARRAY_SIZE(cz_golden_common_all));
aaa36a97 809 break;
e3c7656c 810 case CHIP_STONEY:
9c3f2b54
AD
811 amdgpu_device_program_register_sequence(adev,
812 stoney_mgcg_cgcg_init,
813 ARRAY_SIZE(stoney_mgcg_cgcg_init));
814 amdgpu_device_program_register_sequence(adev,
815 stoney_golden_settings_a11,
816 ARRAY_SIZE(stoney_golden_settings_a11));
817 amdgpu_device_program_register_sequence(adev,
818 stoney_golden_common_all,
819 ARRAY_SIZE(stoney_golden_common_all));
e3c7656c 820 break;
aaa36a97
AD
821 default:
822 break;
823 }
824}
825
826static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
827{
6a41f981 828 adev->gfx.scratch.num_reg = 8;
aaa36a97 829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
831}
832
833static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
834{
835 struct amdgpu_device *adev = ring->adev;
836 uint32_t scratch;
837 uint32_t tmp = 0;
838 unsigned i;
839 int r;
840
841 r = amdgpu_gfx_scratch_get(adev, &scratch);
842 if (r) {
843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844 return r;
845 }
846 WREG32(scratch, 0xCAFEDEAD);
a27de35c 847 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
848 if (r) {
849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
850 ring->idx, r);
851 amdgpu_gfx_scratch_free(adev, scratch);
852 return r;
853 }
854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 857 amdgpu_ring_commit(ring);
aaa36a97
AD
858
859 for (i = 0; i < adev->usec_timeout; i++) {
860 tmp = RREG32(scratch);
861 if (tmp == 0xDEADBEEF)
862 break;
863 DRM_UDELAY(1);
864 }
865 if (i < adev->usec_timeout) {
9953b72f 866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
aaa36a97
AD
867 ring->idx, i);
868 } else {
869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870 ring->idx, scratch, tmp);
871 r = -EINVAL;
872 }
873 amdgpu_gfx_scratch_free(adev, scratch);
874 return r;
875}
876
bbec97aa 877static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
878{
879 struct amdgpu_device *adev = ring->adev;
880 struct amdgpu_ib ib;
f54d1867 881 struct dma_fence *f = NULL;
8f4039fe
S
882
883 unsigned int index;
884 uint64_t gpu_addr;
885 uint32_t tmp;
bbec97aa 886 long r;
aaa36a97 887
8f4039fe 888 r = amdgpu_device_wb_get(adev, &index);
aaa36a97 889 if (r) {
8f4039fe 890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
aaa36a97
AD
891 return r;
892 }
8f4039fe
S
893
894 gpu_addr = adev->wb.gpu_addr + (index * 4);
895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
b203dd95 896 memset(&ib, 0, sizeof(ib));
8f4039fe 897 r = amdgpu_ib_get(adev, NULL, 16, &ib);
aaa36a97 898 if (r) {
bbec97aa 899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 900 goto err1;
aaa36a97 901 }
8f4039fe
S
902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904 ib.ptr[2] = lower_32_bits(gpu_addr);
905 ib.ptr[3] = upper_32_bits(gpu_addr);
906 ib.ptr[4] = 0xDEADBEEF;
907 ib.length_dw = 5;
42d13693 908
50ddc75e 909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
910 if (r)
911 goto err2;
912
f54d1867 913 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
914 if (r == 0) {
915 DRM_ERROR("amdgpu: IB test timed out.\n");
916 r = -ETIMEDOUT;
917 goto err2;
918 } else if (r < 0) {
919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 920 goto err2;
aaa36a97 921 }
8f4039fe
S
922
923 tmp = adev->wb.wb[index];
6d44565d 924 if (tmp == 0xDEADBEEF) {
9953b72f 925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 926 r = 0;
aaa36a97 927 } else {
8f4039fe 928 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
aaa36a97
AD
929 r = -EINVAL;
930 }
8f4039fe 931
42d13693 932err2:
cc55c45d 933 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 934 dma_fence_put(f);
42d13693 935err1:
8f4039fe 936 amdgpu_device_wb_free(adev, index);
aaa36a97
AD
937 return r;
938}
939
13331ac3 940
d6b20c87
AD
941static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
942{
13331ac3
ML
943 release_firmware(adev->gfx.pfp_fw);
944 adev->gfx.pfp_fw = NULL;
945 release_firmware(adev->gfx.me_fw);
946 adev->gfx.me_fw = NULL;
947 release_firmware(adev->gfx.ce_fw);
948 adev->gfx.ce_fw = NULL;
949 release_firmware(adev->gfx.rlc_fw);
950 adev->gfx.rlc_fw = NULL;
951 release_firmware(adev->gfx.mec_fw);
952 adev->gfx.mec_fw = NULL;
953 if ((adev->asic_type != CHIP_STONEY) &&
954 (adev->asic_type != CHIP_TOPAZ))
955 release_firmware(adev->gfx.mec2_fw);
956 adev->gfx.mec2_fw = NULL;
957
958 kfree(adev->gfx.rlc.register_list_format);
959}
960
aaa36a97
AD
961static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
962{
963 const char *chip_name;
964 char fw_name[30];
965 int err;
966 struct amdgpu_firmware_info *info = NULL;
967 const struct common_firmware_header *header = NULL;
595fd013 968 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
969 const struct rlc_firmware_header_v2_0 *rlc_hdr;
970 unsigned int *tmp = NULL, i;
aaa36a97
AD
971
972 DRM_DEBUG("\n");
973
974 switch (adev->asic_type) {
975 case CHIP_TOPAZ:
976 chip_name = "topaz";
977 break;
978 case CHIP_TONGA:
979 chip_name = "tonga";
980 break;
981 case CHIP_CARRIZO:
982 chip_name = "carrizo";
983 break;
af15a2d5
DZ
984 case CHIP_FIJI:
985 chip_name = "fiji";
986 break;
62aac201
LL
987 case CHIP_STONEY:
988 chip_name = "stoney";
68182d90 989 break;
2cc0c0b5
FC
990 case CHIP_POLARIS10:
991 chip_name = "polaris10";
68182d90 992 break;
62aac201
LL
993 case CHIP_POLARIS11:
994 chip_name = "polaris11";
995 break;
c4642a47
JZ
996 case CHIP_POLARIS12:
997 chip_name = "polaris12";
998 break;
62aac201
LL
999 case CHIP_VEGAM:
1000 chip_name = "vegam";
e3c7656c 1001 break;
aaa36a97
AD
1002 default:
1003 BUG();
1004 }
1005
727030b0
EQ
1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009 if (err == -ENOENT) {
1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012 }
1013 } else {
1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1016 }
aaa36a97
AD
1017 if (err)
1018 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020 if (err)
1021 goto out;
595fd013
JZ
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1025
727030b0
EQ
1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029 if (err == -ENOENT) {
1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032 }
1033 } else {
1034 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1036 }
aaa36a97
AD
1037 if (err)
1038 goto out;
1039 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040 if (err)
1041 goto out;
595fd013
JZ
1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d 1044
595fd013 1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1046
727030b0
EQ
1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050 if (err == -ENOENT) {
1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053 }
1054 } else {
1055 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1057 }
aaa36a97
AD
1058 if (err)
1059 goto out;
1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061 if (err)
1062 goto out;
595fd013
JZ
1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1066
63a7c748
TH
1067 /*
1068 * Support for MCBP/Virtualization in combination with chained IBs is
1069 * formal released on feature version #46
1070 */
1071 if (adev->gfx.ce_feature_version >= 46 &&
1072 adev->gfx.pfp_feature_version >= 46) {
1073 adev->virt.chained_ib_support = true;
1074 DRM_INFO("Chained IB support enabled!\n");
1075 } else
1076 adev->virt.chained_ib_support = false;
1077
c65444fe 1078 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080 if (err)
1081 goto out;
1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1086
1087 adev->gfx.rlc.save_and_restore_offset =
1088 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1089 adev->gfx.rlc.clear_state_descriptor_offset =
1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1091 adev->gfx.rlc.avail_scratch_ram_locations =
1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1093 adev->gfx.rlc.reg_restore_list_size =
1094 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1095 adev->gfx.rlc.reg_list_format_start =
1096 le32_to_cpu(rlc_hdr->reg_list_format_start);
1097 adev->gfx.rlc.reg_list_format_separate_start =
1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1099 adev->gfx.rlc.starting_offsets_start =
1100 le32_to_cpu(rlc_hdr->starting_offsets_start);
1101 adev->gfx.rlc.reg_list_format_size_bytes =
1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1103 adev->gfx.rlc.reg_list_size_bytes =
1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1105
1106 adev->gfx.rlc.register_list_format =
1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1108 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1109
1110 if (!adev->gfx.rlc.register_list_format) {
1111 err = -ENOMEM;
1112 goto out;
1113 }
1114
ae17c999 1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1119
1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1121
ae17c999 1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1126
727030b0
EQ
1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130 if (err == -ENOENT) {
1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133 }
1134 } else {
1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1137 }
aaa36a97
AD
1138 if (err)
1139 goto out;
1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141 if (err)
1142 goto out;
595fd013
JZ
1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1146
97dde76a
AD
1147 if ((adev->asic_type != CHIP_STONEY) &&
1148 (adev->asic_type != CHIP_TOPAZ)) {
727030b0
EQ
1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152 if (err == -ENOENT) {
1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155 }
1156 } else {
1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1159 }
e3c7656c
SL
1160 if (!err) {
1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162 if (err)
1163 goto out;
1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1165 adev->gfx.mec2_fw->data;
1166 adev->gfx.mec2_fw_version =
1167 le32_to_cpu(cp_hdr->header.ucode_version);
1168 adev->gfx.mec2_feature_version =
1169 le32_to_cpu(cp_hdr->ucode_feature_version);
1170 } else {
1171 err = 0;
1172 adev->gfx.mec2_fw = NULL;
1173 }
aaa36a97
AD
1174 }
1175
e635ee07 1176 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
aaa36a97
AD
1177 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1178 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1179 info->fw = adev->gfx.pfp_fw;
1180 header = (const struct common_firmware_header *)info->fw->data;
1181 adev->firmware.fw_size +=
1182 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1183
1184 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1185 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1186 info->fw = adev->gfx.me_fw;
1187 header = (const struct common_firmware_header *)info->fw->data;
1188 adev->firmware.fw_size +=
1189 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1190
1191 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1192 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1193 info->fw = adev->gfx.ce_fw;
1194 header = (const struct common_firmware_header *)info->fw->data;
1195 adev->firmware.fw_size +=
1196 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1197
1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1199 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1200 info->fw = adev->gfx.rlc_fw;
1201 header = (const struct common_firmware_header *)info->fw->data;
1202 adev->firmware.fw_size +=
1203 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1204
1205 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1206 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1207 info->fw = adev->gfx.mec_fw;
1208 header = (const struct common_firmware_header *)info->fw->data;
1209 adev->firmware.fw_size +=
1210 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1211
4c2b2453
ML
1212 /* we need account JT in */
1213 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1214 adev->firmware.fw_size +=
1215 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1216
bed5712e
ML
1217 if (amdgpu_sriov_vf(adev)) {
1218 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1219 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1220 info->fw = adev->gfx.mec_fw;
1221 adev->firmware.fw_size +=
1222 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1223 }
1224
aaa36a97
AD
1225 if (adev->gfx.mec2_fw) {
1226 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1227 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1228 info->fw = adev->gfx.mec2_fw;
1229 header = (const struct common_firmware_header *)info->fw->data;
1230 adev->firmware.fw_size +=
1231 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1232 }
1233
1234 }
1235
1236out:
1237 if (err) {
1238 dev_err(adev->dev,
1239 "gfx8: Failed to load firmware \"%s\"\n",
1240 fw_name);
1241 release_firmware(adev->gfx.pfp_fw);
1242 adev->gfx.pfp_fw = NULL;
1243 release_firmware(adev->gfx.me_fw);
1244 adev->gfx.me_fw = NULL;
1245 release_firmware(adev->gfx.ce_fw);
1246 adev->gfx.ce_fw = NULL;
1247 release_firmware(adev->gfx.rlc_fw);
1248 adev->gfx.rlc_fw = NULL;
1249 release_firmware(adev->gfx.mec_fw);
1250 adev->gfx.mec_fw = NULL;
1251 release_firmware(adev->gfx.mec2_fw);
1252 adev->gfx.mec2_fw = NULL;
1253 }
1254 return err;
1255}
1256
2b6cd977
EH
1257static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1258 volatile u32 *buffer)
1259{
1260 u32 count = 0, i;
1261 const struct cs_section_def *sect = NULL;
1262 const struct cs_extent_def *ext = NULL;
1263
1264 if (adev->gfx.rlc.cs_data == NULL)
1265 return;
1266 if (buffer == NULL)
1267 return;
1268
1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1270 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1271
1272 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1273 buffer[count++] = cpu_to_le32(0x80000000);
1274 buffer[count++] = cpu_to_le32(0x80000000);
1275
1276 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1277 for (ext = sect->section; ext->extent != NULL; ++ext) {
1278 if (sect->id == SECT_CONTEXT) {
1279 buffer[count++] =
1280 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1281 buffer[count++] = cpu_to_le32(ext->reg_index -
1282 PACKET3_SET_CONTEXT_REG_START);
1283 for (i = 0; i < ext->reg_count; i++)
1284 buffer[count++] = cpu_to_le32(ext->extent[i]);
1285 } else {
1286 return;
1287 }
1288 }
1289 }
1290
1291 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1292 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1293 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1294 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1295 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1296
1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1298 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1299
1300 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1301 buffer[count++] = cpu_to_le32(0);
1302}
1303
fb16007b
AD
1304static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1305{
1306 const __le32 *fw_data;
1307 volatile u32 *dst_ptr;
1308 int me, i, max_me = 4;
1309 u32 bo_offset = 0;
1310 u32 table_offset, table_size;
1311
1312 if (adev->asic_type == CHIP_CARRIZO)
1313 max_me = 5;
1314
1315 /* write the cp table buffer */
1316 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1317 for (me = 0; me < max_me; me++) {
1318 if (me == 0) {
1319 const struct gfx_firmware_header_v1_0 *hdr =
1320 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321 fw_data = (const __le32 *)
1322 (adev->gfx.ce_fw->data +
1323 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1324 table_offset = le32_to_cpu(hdr->jt_offset);
1325 table_size = le32_to_cpu(hdr->jt_size);
1326 } else if (me == 1) {
1327 const struct gfx_firmware_header_v1_0 *hdr =
1328 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1329 fw_data = (const __le32 *)
1330 (adev->gfx.pfp_fw->data +
1331 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1332 table_offset = le32_to_cpu(hdr->jt_offset);
1333 table_size = le32_to_cpu(hdr->jt_size);
1334 } else if (me == 2) {
1335 const struct gfx_firmware_header_v1_0 *hdr =
1336 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1337 fw_data = (const __le32 *)
1338 (adev->gfx.me_fw->data +
1339 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1340 table_offset = le32_to_cpu(hdr->jt_offset);
1341 table_size = le32_to_cpu(hdr->jt_size);
1342 } else if (me == 3) {
1343 const struct gfx_firmware_header_v1_0 *hdr =
1344 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1345 fw_data = (const __le32 *)
1346 (adev->gfx.mec_fw->data +
1347 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1348 table_offset = le32_to_cpu(hdr->jt_offset);
1349 table_size = le32_to_cpu(hdr->jt_size);
1350 } else if (me == 4) {
1351 const struct gfx_firmware_header_v1_0 *hdr =
1352 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1353 fw_data = (const __le32 *)
1354 (adev->gfx.mec2_fw->data +
1355 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1356 table_offset = le32_to_cpu(hdr->jt_offset);
1357 table_size = le32_to_cpu(hdr->jt_size);
1358 }
1359
1360 for (i = 0; i < table_size; i ++) {
1361 dst_ptr[bo_offset + i] =
1362 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1363 }
1364
1365 bo_offset += table_size;
1366 }
1367}
1368
2b6cd977
EH
1369static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1370{
078af1a3
CK
1371 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1372 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
2b6cd977
EH
1373}
1374
1375static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1376{
1377 volatile u32 *dst_ptr;
1378 u32 dws;
1379 const struct cs_section_def *cs_data;
1380 int r;
1381
1382 adev->gfx.rlc.cs_data = vi_cs_data;
1383
1384 cs_data = adev->gfx.rlc.cs_data;
1385
1386 if (cs_data) {
1387 /* clear state block */
1388 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1389
a4a02777
CK
1390 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1391 AMDGPU_GEM_DOMAIN_VRAM,
1392 &adev->gfx.rlc.clear_state_obj,
1393 &adev->gfx.rlc.clear_state_gpu_addr,
1394 (void **)&adev->gfx.rlc.cs_ptr);
2b6cd977 1395 if (r) {
a4a02777 1396 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
2b6cd977
EH
1397 gfx_v8_0_rlc_fini(adev);
1398 return r;
1399 }
1400
2b6cd977
EH
1401 /* set up the cs buffer */
1402 dst_ptr = adev->gfx.rlc.cs_ptr;
1403 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1404 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1405 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1406 }
1407
fb16007b
AD
1408 if ((adev->asic_type == CHIP_CARRIZO) ||
1409 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1410 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
a4a02777
CK
1411 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1412 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1413 &adev->gfx.rlc.cp_table_obj,
1414 &adev->gfx.rlc.cp_table_gpu_addr,
1415 (void **)&adev->gfx.rlc.cp_table_ptr);
fb16007b 1416 if (r) {
a4a02777 1417 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
fb16007b
AD
1418 return r;
1419 }
1420
1421 cz_init_cp_jump_table(adev);
1422
1423 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1424 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1425 }
1426
2b6cd977
EH
1427 return 0;
1428}
1429
aaa36a97
AD
1430static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1431{
078af1a3 1432 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
aaa36a97
AD
1433}
1434
aaa36a97
AD
1435static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1436{
1437 int r;
1438 u32 *hpd;
42794b27 1439 size_t mec_hpd_size;
aaa36a97 1440
78c16834
AR
1441 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1442
78c16834 1443 /* take ownership of the relevant compute queues */
41f6a99a 1444 amdgpu_gfx_compute_queue_acquire(adev);
78c16834
AR
1445
1446 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
aaa36a97 1447
a4a02777
CK
1448 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1449 AMDGPU_GEM_DOMAIN_GTT,
1450 &adev->gfx.mec.hpd_eop_obj,
1451 &adev->gfx.mec.hpd_eop_gpu_addr,
1452 (void **)&hpd);
aaa36a97 1453 if (r) {
a4a02777 1454 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
aaa36a97
AD
1455 return r;
1456 }
1457
42794b27 1458 memset(hpd, 0, mec_hpd_size);
aaa36a97
AD
1459
1460 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1461 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1462
1463 return 0;
1464}
1465
ccba7691
AD
1466static const u32 vgpr_init_compute_shader[] =
1467{
1468 0x7e000209, 0x7e020208,
1469 0x7e040207, 0x7e060206,
1470 0x7e080205, 0x7e0a0204,
1471 0x7e0c0203, 0x7e0e0202,
1472 0x7e100201, 0x7e120200,
1473 0x7e140209, 0x7e160208,
1474 0x7e180207, 0x7e1a0206,
1475 0x7e1c0205, 0x7e1e0204,
1476 0x7e200203, 0x7e220202,
1477 0x7e240201, 0x7e260200,
1478 0x7e280209, 0x7e2a0208,
1479 0x7e2c0207, 0x7e2e0206,
1480 0x7e300205, 0x7e320204,
1481 0x7e340203, 0x7e360202,
1482 0x7e380201, 0x7e3a0200,
1483 0x7e3c0209, 0x7e3e0208,
1484 0x7e400207, 0x7e420206,
1485 0x7e440205, 0x7e460204,
1486 0x7e480203, 0x7e4a0202,
1487 0x7e4c0201, 0x7e4e0200,
1488 0x7e500209, 0x7e520208,
1489 0x7e540207, 0x7e560206,
1490 0x7e580205, 0x7e5a0204,
1491 0x7e5c0203, 0x7e5e0202,
1492 0x7e600201, 0x7e620200,
1493 0x7e640209, 0x7e660208,
1494 0x7e680207, 0x7e6a0206,
1495 0x7e6c0205, 0x7e6e0204,
1496 0x7e700203, 0x7e720202,
1497 0x7e740201, 0x7e760200,
1498 0x7e780209, 0x7e7a0208,
1499 0x7e7c0207, 0x7e7e0206,
1500 0xbf8a0000, 0xbf810000,
1501};
1502
1503static const u32 sgpr_init_compute_shader[] =
1504{
1505 0xbe8a0100, 0xbe8c0102,
1506 0xbe8e0104, 0xbe900106,
1507 0xbe920108, 0xbe940100,
1508 0xbe960102, 0xbe980104,
1509 0xbe9a0106, 0xbe9c0108,
1510 0xbe9e0100, 0xbea00102,
1511 0xbea20104, 0xbea40106,
1512 0xbea60108, 0xbea80100,
1513 0xbeaa0102, 0xbeac0104,
1514 0xbeae0106, 0xbeb00108,
1515 0xbeb20100, 0xbeb40102,
1516 0xbeb60104, 0xbeb80106,
1517 0xbeba0108, 0xbebc0100,
1518 0xbebe0102, 0xbec00104,
1519 0xbec20106, 0xbec40108,
1520 0xbec60100, 0xbec80102,
1521 0xbee60004, 0xbee70005,
1522 0xbeea0006, 0xbeeb0007,
1523 0xbee80008, 0xbee90009,
1524 0xbefc0000, 0xbf8a0000,
1525 0xbf810000, 0x00000000,
1526};
1527
1528static const u32 vgpr_init_regs[] =
1529{
1530 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
38610f15 1531 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
ccba7691
AD
1532 mmCOMPUTE_NUM_THREAD_X, 256*4,
1533 mmCOMPUTE_NUM_THREAD_Y, 1,
1534 mmCOMPUTE_NUM_THREAD_Z, 1,
38610f15 1535 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
ccba7691
AD
1536 mmCOMPUTE_PGM_RSRC2, 20,
1537 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1538 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1539 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1540 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1541 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1542 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1543 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1544 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1545 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1546 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1547};
1548
1549static const u32 sgpr1_init_regs[] =
1550{
1551 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
38610f15 1552 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
ccba7691
AD
1553 mmCOMPUTE_NUM_THREAD_X, 256*5,
1554 mmCOMPUTE_NUM_THREAD_Y, 1,
1555 mmCOMPUTE_NUM_THREAD_Z, 1,
38610f15 1556 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
ccba7691
AD
1557 mmCOMPUTE_PGM_RSRC2, 20,
1558 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1559 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1560 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1561 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1562 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1563 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1564 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1565 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1566 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1567 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1568};
1569
1570static const u32 sgpr2_init_regs[] =
1571{
1572 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1573 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1574 mmCOMPUTE_NUM_THREAD_X, 256*5,
1575 mmCOMPUTE_NUM_THREAD_Y, 1,
1576 mmCOMPUTE_NUM_THREAD_Z, 1,
38610f15 1577 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
ccba7691
AD
1578 mmCOMPUTE_PGM_RSRC2, 20,
1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1589};
1590
1591static const u32 sec_ded_counter_registers[] =
1592{
1593 mmCPC_EDC_ATC_CNT,
1594 mmCPC_EDC_SCRATCH_CNT,
1595 mmCPC_EDC_UCODE_CNT,
1596 mmCPF_EDC_ATC_CNT,
1597 mmCPF_EDC_ROQ_CNT,
1598 mmCPF_EDC_TAG_CNT,
1599 mmCPG_EDC_ATC_CNT,
1600 mmCPG_EDC_DMA_CNT,
1601 mmCPG_EDC_TAG_CNT,
1602 mmDC_EDC_CSINVOC_CNT,
1603 mmDC_EDC_RESTORE_CNT,
1604 mmDC_EDC_STATE_CNT,
1605 mmGDS_EDC_CNT,
1606 mmGDS_EDC_GRBM_CNT,
1607 mmGDS_EDC_OA_DED,
1608 mmSPI_EDC_CNT,
1609 mmSQC_ATC_EDC_GATCL1_CNT,
1610 mmSQC_EDC_CNT,
1611 mmSQ_EDC_DED_CNT,
1612 mmSQ_EDC_INFO,
1613 mmSQ_EDC_SEC_CNT,
1614 mmTCC_EDC_CNT,
1615 mmTCP_ATC_EDC_GATCL1_CNT,
1616 mmTCP_EDC_CNT,
1617 mmTD_EDC_CNT
1618};
1619
1620static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1621{
1622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1623 struct amdgpu_ib ib;
f54d1867 1624 struct dma_fence *f = NULL;
ccba7691
AD
1625 int r, i;
1626 u32 tmp;
1627 unsigned total_size, vgpr_offset, sgpr_offset;
1628 u64 gpu_addr;
1629
1630 /* only supported on CZ */
1631 if (adev->asic_type != CHIP_CARRIZO)
1632 return 0;
1633
1634 /* bail if the compute ring is not ready */
1635 if (!ring->ready)
1636 return 0;
1637
1638 tmp = RREG32(mmGB_EDC_MODE);
1639 WREG32(mmGB_EDC_MODE, 0);
1640
1641 total_size =
1642 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1643 total_size +=
1644 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645 total_size +=
1646 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1647 total_size = ALIGN(total_size, 256);
1648 vgpr_offset = total_size;
1649 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1650 sgpr_offset = total_size;
1651 total_size += sizeof(sgpr_init_compute_shader);
1652
1653 /* allocate an indirect buffer to put the commands in */
1654 memset(&ib, 0, sizeof(ib));
b07c60c0 1655 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1656 if (r) {
1657 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1658 return r;
1659 }
1660
1661 /* load the compute shaders */
1662 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1663 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1664
1665 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1666 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1667
1668 /* init the ib length to 0 */
1669 ib.length_dw = 0;
1670
1671 /* VGPR */
1672 /* write the register state for the compute dispatch */
1673 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1675 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1676 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1677 }
1678 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1679 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1681 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1682 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1683 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1684
1685 /* write dispatch packet */
1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1687 ib.ptr[ib.length_dw++] = 8; /* x */
1688 ib.ptr[ib.length_dw++] = 1; /* y */
1689 ib.ptr[ib.length_dw++] = 1; /* z */
1690 ib.ptr[ib.length_dw++] =
1691 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1692
1693 /* write CS partial flush packet */
1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1695 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1696
1697 /* SGPR1 */
1698 /* write the register state for the compute dispatch */
1699 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1701 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1702 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1703 }
1704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1705 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1707 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1708 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1709 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1710
1711 /* write dispatch packet */
1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1713 ib.ptr[ib.length_dw++] = 8; /* x */
1714 ib.ptr[ib.length_dw++] = 1; /* y */
1715 ib.ptr[ib.length_dw++] = 1; /* z */
1716 ib.ptr[ib.length_dw++] =
1717 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1718
1719 /* write CS partial flush packet */
1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1721 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1722
1723 /* SGPR2 */
1724 /* write the register state for the compute dispatch */
1725 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1727 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1729 }
1730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1731 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1733 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1736
1737 /* write dispatch packet */
1738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1739 ib.ptr[ib.length_dw++] = 8; /* x */
1740 ib.ptr[ib.length_dw++] = 1; /* y */
1741 ib.ptr[ib.length_dw++] = 1; /* z */
1742 ib.ptr[ib.length_dw++] =
1743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1744
1745 /* write CS partial flush packet */
1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1748
1749 /* shedule the ib on the ring */
50ddc75e 1750 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1751 if (r) {
1752 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1753 goto fail;
1754 }
1755
1756 /* wait for the GPU to finish processing the IB */
f54d1867 1757 r = dma_fence_wait(f, false);
ccba7691
AD
1758 if (r) {
1759 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1760 goto fail;
1761 }
1762
1763 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1764 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1765 WREG32(mmGB_EDC_MODE, tmp);
1766
1767 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1768 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1769 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1770
1771
1772 /* read back registers to clear the counters */
1773 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1774 RREG32(sec_ded_counter_registers[i]);
1775
1776fail:
cc55c45d 1777 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1778 dma_fence_put(f);
ccba7691
AD
1779
1780 return r;
1781}
1782
68182d90 1783static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1784{
1785 u32 gb_addr_config;
1786 u32 mc_shared_chmap, mc_arb_ramcfg;
1787 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1788 u32 tmp;
68182d90 1789 int ret;
0bde3a95
AD
1790
1791 switch (adev->asic_type) {
1792 case CHIP_TOPAZ:
1793 adev->gfx.config.max_shader_engines = 1;
1794 adev->gfx.config.max_tile_pipes = 2;
1795 adev->gfx.config.max_cu_per_sh = 6;
1796 adev->gfx.config.max_sh_per_se = 1;
1797 adev->gfx.config.max_backends_per_se = 2;
1798 adev->gfx.config.max_texture_channel_caches = 2;
1799 adev->gfx.config.max_gprs = 256;
1800 adev->gfx.config.max_gs_threads = 32;
1801 adev->gfx.config.max_hw_contexts = 8;
1802
1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1808 break;
1809 case CHIP_FIJI:
1810 adev->gfx.config.max_shader_engines = 4;
1811 adev->gfx.config.max_tile_pipes = 16;
1812 adev->gfx.config.max_cu_per_sh = 16;
1813 adev->gfx.config.max_sh_per_se = 1;
1814 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1815 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1816 adev->gfx.config.max_gprs = 256;
1817 adev->gfx.config.max_gs_threads = 32;
1818 adev->gfx.config.max_hw_contexts = 8;
1819
68182d90
FC
1820 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1825 break;
2cc0c0b5 1826 case CHIP_POLARIS11:
c4642a47 1827 case CHIP_POLARIS12:
68182d90
FC
1828 ret = amdgpu_atombios_get_gfx_info(adev);
1829 if (ret)
1830 return ret;
1831 adev->gfx.config.max_gprs = 256;
1832 adev->gfx.config.max_gs_threads = 32;
1833 adev->gfx.config.max_hw_contexts = 8;
1834
1835 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1836 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1837 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1838 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1839 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1840 break;
2cc0c0b5 1841 case CHIP_POLARIS10:
71765469 1842 case CHIP_VEGAM:
68182d90
FC
1843 ret = amdgpu_atombios_get_gfx_info(adev);
1844 if (ret)
1845 return ret;
1846 adev->gfx.config.max_gprs = 256;
1847 adev->gfx.config.max_gs_threads = 32;
1848 adev->gfx.config.max_hw_contexts = 8;
1849
0bde3a95
AD
1850 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1851 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1852 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1853 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1854 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1855 break;
1856 case CHIP_TONGA:
1857 adev->gfx.config.max_shader_engines = 4;
1858 adev->gfx.config.max_tile_pipes = 8;
1859 adev->gfx.config.max_cu_per_sh = 8;
1860 adev->gfx.config.max_sh_per_se = 1;
1861 adev->gfx.config.max_backends_per_se = 2;
1862 adev->gfx.config.max_texture_channel_caches = 8;
1863 adev->gfx.config.max_gprs = 256;
1864 adev->gfx.config.max_gs_threads = 32;
1865 adev->gfx.config.max_hw_contexts = 8;
1866
1867 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1871 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1872 break;
1873 case CHIP_CARRIZO:
1874 adev->gfx.config.max_shader_engines = 1;
1875 adev->gfx.config.max_tile_pipes = 2;
1876 adev->gfx.config.max_sh_per_se = 1;
1877 adev->gfx.config.max_backends_per_se = 2;
943c05bd 1878 adev->gfx.config.max_cu_per_sh = 8;
0bde3a95
AD
1879 adev->gfx.config.max_texture_channel_caches = 2;
1880 adev->gfx.config.max_gprs = 256;
1881 adev->gfx.config.max_gs_threads = 32;
1882 adev->gfx.config.max_hw_contexts = 8;
1883
e3c7656c
SL
1884 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1888 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1889 break;
1890 case CHIP_STONEY:
1891 adev->gfx.config.max_shader_engines = 1;
1892 adev->gfx.config.max_tile_pipes = 2;
1893 adev->gfx.config.max_sh_per_se = 1;
1894 adev->gfx.config.max_backends_per_se = 1;
943c05bd 1895 adev->gfx.config.max_cu_per_sh = 3;
e3c7656c
SL
1896 adev->gfx.config.max_texture_channel_caches = 2;
1897 adev->gfx.config.max_gprs = 256;
1898 adev->gfx.config.max_gs_threads = 16;
1899 adev->gfx.config.max_hw_contexts = 8;
1900
0bde3a95
AD
1901 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1904 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1905 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1906 break;
1907 default:
1908 adev->gfx.config.max_shader_engines = 2;
1909 adev->gfx.config.max_tile_pipes = 4;
1910 adev->gfx.config.max_cu_per_sh = 2;
1911 adev->gfx.config.max_sh_per_se = 1;
1912 adev->gfx.config.max_backends_per_se = 2;
1913 adev->gfx.config.max_texture_channel_caches = 4;
1914 adev->gfx.config.max_gprs = 256;
1915 adev->gfx.config.max_gs_threads = 32;
1916 adev->gfx.config.max_hw_contexts = 8;
1917
1918 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1919 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1920 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1921 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1922 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1923 break;
1924 }
1925
1926 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1927 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1928 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1929
1930 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1931 adev->gfx.config.mem_max_burst_length_bytes = 256;
1932 if (adev->flags & AMD_IS_APU) {
1933 /* Get memory bank mapping mode. */
1934 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1935 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1936 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1937
1938 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1939 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1941
1942 /* Validate settings in case only one DIMM installed. */
1943 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1944 dimm00_addr_map = 0;
1945 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1946 dimm01_addr_map = 0;
1947 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1948 dimm10_addr_map = 0;
1949 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1950 dimm11_addr_map = 0;
1951
1952 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1953 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1954 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1955 adev->gfx.config.mem_row_size_in_kb = 2;
1956 else
1957 adev->gfx.config.mem_row_size_in_kb = 1;
1958 } else {
1959 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1960 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1961 if (adev->gfx.config.mem_row_size_in_kb > 4)
1962 adev->gfx.config.mem_row_size_in_kb = 4;
1963 }
1964
1965 adev->gfx.config.shader_engine_tile_size = 32;
1966 adev->gfx.config.num_gpus = 1;
1967 adev->gfx.config.multi_gpu_tile_size = 64;
1968
1969 /* fix up row size */
1970 switch (adev->gfx.config.mem_row_size_in_kb) {
1971 case 1:
1972 default:
1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1974 break;
1975 case 2:
1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1977 break;
1978 case 4:
1979 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1980 break;
1981 }
1982 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
1983
1984 return 0;
0bde3a95
AD
1985}
1986
e33fec48
AR
1987static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1988 int mec, int pipe, int queue)
1989{
1990 int r;
1991 unsigned irq_type;
1992 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1993
1994 ring = &adev->gfx.compute_ring[ring_id];
1995
1996 /* mec0 is me1 */
1997 ring->me = mec + 1;
1998 ring->pipe = pipe;
1999 ring->queue = queue;
2000
2001 ring->ring_obj = NULL;
2002 ring->use_doorbell = true;
2003 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2004 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2005 + (ring_id * GFX8_MEC_HPD_SIZE);
2006 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2007
2008 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2009 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2010 + ring->pipe;
2011
2012 /* type-2 packets are deprecated on MEC, use type-3 instead */
2013 r = amdgpu_ring_init(adev, ring, 1024,
2014 &adev->gfx.eop_irq, irq_type);
2015 if (r)
2016 return r;
2017
2018
2019 return 0;
2020}
2021
9bdc2092
AG
2022static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2023
5fc3aeeb 2024static int gfx_v8_0_sw_init(void *handle)
aaa36a97 2025{
e33fec48 2026 int i, j, k, r, ring_id;
aaa36a97 2027 struct amdgpu_ring *ring;
4e638ae9 2028 struct amdgpu_kiq *kiq;
5fc3aeeb 2029 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2030
4853bbb6 2031 switch (adev->asic_type) {
4853bbb6 2032 case CHIP_TONGA:
71765469
LL
2033 case CHIP_CARRIZO:
2034 case CHIP_FIJI:
2035 case CHIP_POLARIS10:
4853bbb6
AD
2036 case CHIP_POLARIS11:
2037 case CHIP_POLARIS12:
71765469 2038 case CHIP_VEGAM:
4853bbb6
AD
2039 adev->gfx.mec.num_mec = 2;
2040 break;
2041 case CHIP_TOPAZ:
2042 case CHIP_STONEY:
2043 default:
2044 adev->gfx.mec.num_mec = 1;
2045 break;
2046 }
2047
2048 adev->gfx.mec.num_pipe_per_mec = 4;
2049 adev->gfx.mec.num_queue_per_pipe = 8;
2050
4e638ae9 2051 /* KIQ event */
091aec0b 2052 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
4e638ae9
XY
2053 if (r)
2054 return r;
2055
aaa36a97 2056 /* EOP Event */
091aec0b 2057 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
aaa36a97
AD
2058 if (r)
2059 return r;
2060
2061 /* Privileged reg */
091aec0b 2062 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
d766e6a3 2063 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2064 if (r)
2065 return r;
2066
2067 /* Privileged inst */
091aec0b 2068 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
d766e6a3 2069 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2070 if (r)
2071 return r;
2072
5a2f2913 2073 /* Add CP EDC/ECC irq */
091aec0b 2074 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
5a2f2913
DP
2075 &adev->gfx.cp_ecc_error_irq);
2076 if (r)
2077 return r;
2078
04ad26bb 2079 /* SQ interrupts. */
091aec0b 2080 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
04ad26bb
DP
2081 &adev->gfx.sq_irq);
2082 if (r) {
2083 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2084 return r;
2085 }
2086
9bdc2092
AG
2087 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2088
aaa36a97
AD
2089 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2090
2091 gfx_v8_0_scratch_init(adev);
2092
2093 r = gfx_v8_0_init_microcode(adev);
2094 if (r) {
2095 DRM_ERROR("Failed to load gfx firmware!\n");
2096 return r;
2097 }
2098
2b6cd977
EH
2099 r = gfx_v8_0_rlc_init(adev);
2100 if (r) {
2101 DRM_ERROR("Failed to init rlc BOs!\n");
2102 return r;
2103 }
2104
aaa36a97
AD
2105 r = gfx_v8_0_mec_init(adev);
2106 if (r) {
2107 DRM_ERROR("Failed to init MEC BOs!\n");
2108 return r;
2109 }
2110
aaa36a97
AD
2111 /* set up the gfx ring */
2112 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2113 ring = &adev->gfx.gfx_ring[i];
2114 ring->ring_obj = NULL;
2115 sprintf(ring->name, "gfx");
2116 /* no gfx doorbells on iceland */
2117 if (adev->asic_type != CHIP_TOPAZ) {
2118 ring->use_doorbell = true;
2119 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2120 }
2121
79887142
CK
2122 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2123 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2124 if (r)
2125 return r;
2126 }
2127
aaa36a97 2128
e33fec48
AR
2129 /* set up the compute queues - allocate horizontally across pipes */
2130 ring_id = 0;
2131 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2132 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2133 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2db0cdbe 2134 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
e33fec48 2135 continue;
78c16834 2136
e33fec48
AR
2137 r = gfx_v8_0_compute_ring_init(adev,
2138 ring_id,
2139 i, k, j);
2140 if (r)
2141 return r;
78c16834 2142
e33fec48
AR
2143 ring_id++;
2144 }
aaa36a97 2145 }
aaa36a97
AD
2146 }
2147
71c37505 2148 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
b4e40676
DP
2149 if (r) {
2150 DRM_ERROR("Failed to init KIQ BOs!\n");
2151 return r;
2152 }
596c67d0 2153
b4e40676 2154 kiq = &adev->gfx.kiq;
71c37505 2155 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
b4e40676
DP
2156 if (r)
2157 return r;
596c67d0 2158
b4e40676 2159 /* create MQD for all compute queues as well as KIQ for SRIOV case */
6b0fa871 2160 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
b4e40676
DP
2161 if (r)
2162 return r;
596c67d0 2163
aaa36a97 2164 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2165 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2166 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2167 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2168 if (r)
2169 return r;
2170
78bbbd9c
CK
2171 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2172 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2173 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2174 if (r)
2175 return r;
2176
78bbbd9c
CK
2177 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2178 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2179 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2180 if (r)
2181 return r;
2182
a101a899
KW
2183 adev->gfx.ce_ram_size = 0x8000;
2184
68182d90
FC
2185 r = gfx_v8_0_gpu_early_init(adev);
2186 if (r)
2187 return r;
0bde3a95 2188
aaa36a97
AD
2189 return 0;
2190}
2191
5fc3aeeb 2192static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2193{
2194 int i;
5fc3aeeb 2195 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2196
8640faed
JZ
2197 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2198 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2199 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2200
2201 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2202 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2203 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2204 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2205
b9683c21 2206 amdgpu_gfx_compute_mqd_sw_fini(adev);
71c37505
AD
2207 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2208 amdgpu_gfx_kiq_fini(adev);
596c67d0 2209
aaa36a97 2210 gfx_v8_0_mec_fini(adev);
2b6cd977 2211 gfx_v8_0_rlc_fini(adev);
9862def9
ML
2212 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2213 &adev->gfx.rlc.clear_state_gpu_addr,
2214 (void **)&adev->gfx.rlc.cs_ptr);
2215 if ((adev->asic_type == CHIP_CARRIZO) ||
2216 (adev->asic_type == CHIP_STONEY)) {
2217 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2218 &adev->gfx.rlc.cp_table_gpu_addr,
2219 (void **)&adev->gfx.rlc.cp_table_ptr);
2220 }
13331ac3 2221 gfx_v8_0_free_microcode(adev);
2b6cd977 2222
aaa36a97
AD
2223 return 0;
2224}
2225
2226static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2227{
90bea0ab 2228 uint32_t *modearray, *mod2array;
eb64526f
TSD
2229 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2230 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2231 u32 reg_offset;
aaa36a97 2232
90bea0ab
TSD
2233 modearray = adev->gfx.config.tile_mode_array;
2234 mod2array = adev->gfx.config.macrotile_mode_array;
2235
2236 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2237 modearray[reg_offset] = 0;
2238
2239 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2240 mod2array[reg_offset] = 0;
aaa36a97
AD
2241
2242 switch (adev->asic_type) {
2243 case CHIP_TOPAZ:
90bea0ab
TSD
2244 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2273 PIPE_CONFIG(ADDR_SURF_P2));
2274 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P2) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2294 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2302 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2326 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2335 PIPE_CONFIG(ADDR_SURF_P2) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2338 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339 PIPE_CONFIG(ADDR_SURF_P2) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2342 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2346
2347 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366 NUM_BANKS(ADDR_SURF_8_BANK));
2367 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2371 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 NUM_BANKS(ADDR_SURF_16_BANK));
2379 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382 NUM_BANKS(ADDR_SURF_16_BANK));
2383 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386 NUM_BANKS(ADDR_SURF_16_BANK));
2387 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390 NUM_BANKS(ADDR_SURF_16_BANK));
2391 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 NUM_BANKS(ADDR_SURF_16_BANK));
2395 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2398 NUM_BANKS(ADDR_SURF_16_BANK));
2399 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2402 NUM_BANKS(ADDR_SURF_8_BANK));
2403
2404 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2405 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2406 reg_offset != 23)
2407 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2408
2409 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2410 if (reg_offset != 7)
2411 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2412
8cdacf44 2413 break;
af15a2d5 2414 case CHIP_FIJI:
71765469 2415 case CHIP_VEGAM:
90bea0ab
TSD
2416 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2436 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2440 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2444 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2447 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2448 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2450 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2470 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2475 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2479 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2482 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2486 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2487 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2491 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2499 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2500 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2503 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2504 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2507 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2510 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2511 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2512 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2514 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2515 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2516 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2518 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2519 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2520 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2521 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2522 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2525 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2528 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2532 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2533 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2536 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2538
2539 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542 NUM_BANKS(ADDR_SURF_8_BANK));
2543 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546 NUM_BANKS(ADDR_SURF_8_BANK));
2547 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550 NUM_BANKS(ADDR_SURF_8_BANK));
2551 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 NUM_BANKS(ADDR_SURF_8_BANK));
2555 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558 NUM_BANKS(ADDR_SURF_8_BANK));
2559 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562 NUM_BANKS(ADDR_SURF_8_BANK));
2563 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 NUM_BANKS(ADDR_SURF_8_BANK));
2567 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2569 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2570 NUM_BANKS(ADDR_SURF_8_BANK));
2571 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2573 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2574 NUM_BANKS(ADDR_SURF_8_BANK));
2575 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 NUM_BANKS(ADDR_SURF_8_BANK));
2579 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2582 NUM_BANKS(ADDR_SURF_8_BANK));
2583 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586 NUM_BANKS(ADDR_SURF_8_BANK));
2587 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590 NUM_BANKS(ADDR_SURF_8_BANK));
2591 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2594 NUM_BANKS(ADDR_SURF_4_BANK));
2595
2596 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2597 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2598
2599 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2600 if (reg_offset != 7)
2601 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2602
5f2e816b 2603 break;
aaa36a97 2604 case CHIP_TONGA:
90bea0ab
TSD
2605 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2620 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2624 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2625 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2629 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2633 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2639 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2651 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2661 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2664 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2665 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2666 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2671 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2676 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2680 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2688 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2692 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2699 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2700 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2703 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2704 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2708 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727
2728 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731 NUM_BANKS(ADDR_SURF_16_BANK));
2732 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 NUM_BANKS(ADDR_SURF_16_BANK));
2736 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739 NUM_BANKS(ADDR_SURF_16_BANK));
2740 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743 NUM_BANKS(ADDR_SURF_16_BANK));
2744 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2746 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 NUM_BANKS(ADDR_SURF_16_BANK));
2748 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2751 NUM_BANKS(ADDR_SURF_16_BANK));
2752 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2755 NUM_BANKS(ADDR_SURF_16_BANK));
2756 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759 NUM_BANKS(ADDR_SURF_16_BANK));
2760 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 NUM_BANKS(ADDR_SURF_16_BANK));
2764 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2765 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2766 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2767 NUM_BANKS(ADDR_SURF_16_BANK));
2768 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771 NUM_BANKS(ADDR_SURF_16_BANK));
2772 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2775 NUM_BANKS(ADDR_SURF_8_BANK));
2776 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2779 NUM_BANKS(ADDR_SURF_4_BANK));
2780 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2783 NUM_BANKS(ADDR_SURF_4_BANK));
2784
2785 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2786 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2787
2788 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2789 if (reg_offset != 7)
2790 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2791
68182d90 2792 break;
2cc0c0b5 2793 case CHIP_POLARIS11:
c4642a47 2794 case CHIP_POLARIS12:
68182d90
FC
2795 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2810 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2826 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2827 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2829 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2854 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2862 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2863 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2865 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2870 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2874 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2875 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2878 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2886 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2887 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2890 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2894 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2898 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2901 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917
2918 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921 NUM_BANKS(ADDR_SURF_16_BANK));
2922
2923 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926 NUM_BANKS(ADDR_SURF_16_BANK));
2927
2928 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2931 NUM_BANKS(ADDR_SURF_16_BANK));
2932
2933 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936 NUM_BANKS(ADDR_SURF_16_BANK));
2937
2938 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2941 NUM_BANKS(ADDR_SURF_16_BANK));
2942
2943 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2946 NUM_BANKS(ADDR_SURF_16_BANK));
2947
2948 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951 NUM_BANKS(ADDR_SURF_16_BANK));
2952
2953 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2956 NUM_BANKS(ADDR_SURF_16_BANK));
2957
2958 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961 NUM_BANKS(ADDR_SURF_16_BANK));
2962
2963 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 NUM_BANKS(ADDR_SURF_16_BANK));
2967
2968 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 NUM_BANKS(ADDR_SURF_16_BANK));
2972
2973 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976 NUM_BANKS(ADDR_SURF_16_BANK));
2977
2978 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981 NUM_BANKS(ADDR_SURF_8_BANK));
2982
2983 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2986 NUM_BANKS(ADDR_SURF_4_BANK));
2987
2988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2989 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2990
2991 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2992 if (reg_offset != 7)
2993 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2994
2995 break;
2cc0c0b5 2996 case CHIP_POLARIS10:
68182d90
FC
2997 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3000 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3008 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3024 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3025 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3028 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3029 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3031 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3035 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3043 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3045 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3051 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3055 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3057 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3059 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3063 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3067 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3068 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3069 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3073 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3076 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3077 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3080 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3081 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3084 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3085 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3087 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3088 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3089 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3091 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3092 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3095 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3096 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3100 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3105 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3109 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3113 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3116 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3117 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3119
3120 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3123 NUM_BANKS(ADDR_SURF_16_BANK));
3124
3125 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128 NUM_BANKS(ADDR_SURF_16_BANK));
3129
3130 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133 NUM_BANKS(ADDR_SURF_16_BANK));
3134
3135 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 NUM_BANKS(ADDR_SURF_16_BANK));
3139
3140 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3143 NUM_BANKS(ADDR_SURF_16_BANK));
3144
3145 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3148 NUM_BANKS(ADDR_SURF_16_BANK));
3149
3150 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3153 NUM_BANKS(ADDR_SURF_16_BANK));
3154
3155 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158 NUM_BANKS(ADDR_SURF_16_BANK));
3159
3160 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163 NUM_BANKS(ADDR_SURF_16_BANK));
3164
3165 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3168 NUM_BANKS(ADDR_SURF_16_BANK));
3169
3170 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173 NUM_BANKS(ADDR_SURF_16_BANK));
3174
3175 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3178 NUM_BANKS(ADDR_SURF_8_BANK));
3179
3180 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3183 NUM_BANKS(ADDR_SURF_4_BANK));
3184
3185 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3188 NUM_BANKS(ADDR_SURF_4_BANK));
3189
3190 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3191 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3192
3193 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3194 if (reg_offset != 7)
3195 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3196
aaa36a97 3197 break;
e3c7656c 3198 case CHIP_STONEY:
90bea0ab
TSD
3199 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3202 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3203 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3206 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3207 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3210 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3211 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3214 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3215 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3219 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3223 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3228 PIPE_CONFIG(ADDR_SURF_P2));
3229 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234 PIPE_CONFIG(ADDR_SURF_P2) |
3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3237 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 PIPE_CONFIG(ADDR_SURF_P2) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3241 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3242 PIPE_CONFIG(ADDR_SURF_P2) |
3243 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246 PIPE_CONFIG(ADDR_SURF_P2) |
3247 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3249 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3250 PIPE_CONFIG(ADDR_SURF_P2) |
3251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254 PIPE_CONFIG(ADDR_SURF_P2) |
3255 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3257 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3258 PIPE_CONFIG(ADDR_SURF_P2) |
3259 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3261 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3262 PIPE_CONFIG(ADDR_SURF_P2) |
3263 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3265 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3266 PIPE_CONFIG(ADDR_SURF_P2) |
3267 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3269 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3270 PIPE_CONFIG(ADDR_SURF_P2) |
3271 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3273 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3274 PIPE_CONFIG(ADDR_SURF_P2) |
3275 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3277 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3278 PIPE_CONFIG(ADDR_SURF_P2) |
3279 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3281 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3282 PIPE_CONFIG(ADDR_SURF_P2) |
3283 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3286 PIPE_CONFIG(ADDR_SURF_P2) |
3287 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3290 PIPE_CONFIG(ADDR_SURF_P2) |
3291 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294 PIPE_CONFIG(ADDR_SURF_P2) |
3295 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3298 PIPE_CONFIG(ADDR_SURF_P2) |
3299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3301
3302 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305 NUM_BANKS(ADDR_SURF_8_BANK));
3306 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309 NUM_BANKS(ADDR_SURF_8_BANK));
3310 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313 NUM_BANKS(ADDR_SURF_8_BANK));
3314 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3317 NUM_BANKS(ADDR_SURF_8_BANK));
3318 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321 NUM_BANKS(ADDR_SURF_8_BANK));
3322 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325 NUM_BANKS(ADDR_SURF_8_BANK));
3326 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3329 NUM_BANKS(ADDR_SURF_8_BANK));
3330 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3333 NUM_BANKS(ADDR_SURF_16_BANK));
3334 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 NUM_BANKS(ADDR_SURF_16_BANK));
3338 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3341 NUM_BANKS(ADDR_SURF_16_BANK));
3342 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3345 NUM_BANKS(ADDR_SURF_16_BANK));
3346 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 NUM_BANKS(ADDR_SURF_16_BANK));
3350 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3353 NUM_BANKS(ADDR_SURF_16_BANK));
3354 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3357 NUM_BANKS(ADDR_SURF_8_BANK));
3358
3359 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3360 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3361 reg_offset != 23)
3362 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3363
3364 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3365 if (reg_offset != 7)
3366 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3367
e3c7656c 3368 break;
aaa36a97 3369 default:
90bea0ab
TSD
3370 dev_warn(adev->dev,
3371 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3372 adev->asic_type);
3373
3374 case CHIP_CARRIZO:
3375 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376 PIPE_CONFIG(ADDR_SURF_P2) |
3377 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3378 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3379 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380 PIPE_CONFIG(ADDR_SURF_P2) |
3381 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3382 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3383 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384 PIPE_CONFIG(ADDR_SURF_P2) |
3385 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3386 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3387 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3388 PIPE_CONFIG(ADDR_SURF_P2) |
3389 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3390 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3391 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3392 PIPE_CONFIG(ADDR_SURF_P2) |
3393 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3395 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3396 PIPE_CONFIG(ADDR_SURF_P2) |
3397 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3398 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3399 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400 PIPE_CONFIG(ADDR_SURF_P2) |
3401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3402 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3403 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3404 PIPE_CONFIG(ADDR_SURF_P2));
3405 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3406 PIPE_CONFIG(ADDR_SURF_P2) |
3407 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3409 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410 PIPE_CONFIG(ADDR_SURF_P2) |
3411 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3413 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414 PIPE_CONFIG(ADDR_SURF_P2) |
3415 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3417 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3418 PIPE_CONFIG(ADDR_SURF_P2) |
3419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3421 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3422 PIPE_CONFIG(ADDR_SURF_P2) |
3423 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3425 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3426 PIPE_CONFIG(ADDR_SURF_P2) |
3427 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3429 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3430 PIPE_CONFIG(ADDR_SURF_P2) |
3431 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3433 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3434 PIPE_CONFIG(ADDR_SURF_P2) |
3435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3437 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3438 PIPE_CONFIG(ADDR_SURF_P2) |
3439 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3441 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3442 PIPE_CONFIG(ADDR_SURF_P2) |
3443 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3445 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3446 PIPE_CONFIG(ADDR_SURF_P2) |
3447 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3449 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3450 PIPE_CONFIG(ADDR_SURF_P2) |
3451 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3453 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3454 PIPE_CONFIG(ADDR_SURF_P2) |
3455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3457 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3458 PIPE_CONFIG(ADDR_SURF_P2) |
3459 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3461 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3462 PIPE_CONFIG(ADDR_SURF_P2) |
3463 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3465 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3466 PIPE_CONFIG(ADDR_SURF_P2) |
3467 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3469 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3470 PIPE_CONFIG(ADDR_SURF_P2) |
3471 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3473 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3474 PIPE_CONFIG(ADDR_SURF_P2) |
3475 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3477
3478 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3481 NUM_BANKS(ADDR_SURF_8_BANK));
3482 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3485 NUM_BANKS(ADDR_SURF_8_BANK));
3486 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3489 NUM_BANKS(ADDR_SURF_8_BANK));
3490 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3493 NUM_BANKS(ADDR_SURF_8_BANK));
3494 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3497 NUM_BANKS(ADDR_SURF_8_BANK));
3498 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3501 NUM_BANKS(ADDR_SURF_8_BANK));
3502 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3505 NUM_BANKS(ADDR_SURF_8_BANK));
3506 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3509 NUM_BANKS(ADDR_SURF_16_BANK));
3510 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3513 NUM_BANKS(ADDR_SURF_16_BANK));
3514 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3517 NUM_BANKS(ADDR_SURF_16_BANK));
3518 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3521 NUM_BANKS(ADDR_SURF_16_BANK));
3522 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3525 NUM_BANKS(ADDR_SURF_16_BANK));
3526 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3529 NUM_BANKS(ADDR_SURF_16_BANK));
3530 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3533 NUM_BANKS(ADDR_SURF_8_BANK));
3534
3535 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3536 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3537 reg_offset != 23)
3538 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3539
3540 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3541 if (reg_offset != 7)
3542 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3543
3544 break;
aaa36a97
AD
3545 }
3546}
3547
05fb7291 3548static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3549 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3550{
9559ef5b
TSD
3551 u32 data;
3552
3553 if (instance == 0xffffffff)
3554 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3555 else
3556 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3557
5003f278 3558 if (se_num == 0xffffffff)
aaa36a97 3559 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3560 else
aaa36a97 3561 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3562
3563 if (sh_num == 0xffffffff)
3564 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3565 else
aaa36a97 3566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3567
aaa36a97
AD
3568 WREG32(mmGRBM_GFX_INDEX, data);
3569}
3570
f7a9ee81
AG
3571static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3572 u32 me, u32 pipe, u32 q)
3573{
3574 vi_srbm_select(adev, me, pipe, q, 0);
3575}
3576
8f8e00c1 3577static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3578{
3579 u32 data, mask;
3580
5003f278
TSD
3581 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3582 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3583
5003f278 3584 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3585
378506a7
AD
3586 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3587 adev->gfx.config.max_sh_per_se);
aaa36a97 3588
8f8e00c1 3589 return (~data) & mask;
aaa36a97
AD
3590}
3591
167ac573
HR
3592static void
3593gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3594{
3595 switch (adev->asic_type) {
3596 case CHIP_FIJI:
71765469 3597 case CHIP_VEGAM:
167ac573
HR
3598 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3599 RB_XSEL2(1) | PKR_MAP(2) |
3600 PKR_XSEL(1) | PKR_YSEL(1) |
3601 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3602 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3603 SE_PAIR_YSEL(2);
3604 break;
3605 case CHIP_TONGA:
3606 case CHIP_POLARIS10:
3607 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3608 SE_XSEL(1) | SE_YSEL(1);
3609 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3610 SE_PAIR_YSEL(2);
3611 break;
3612 case CHIP_TOPAZ:
3613 case CHIP_CARRIZO:
3614 *rconf |= RB_MAP_PKR0(2);
3615 *rconf1 |= 0x0;
3616 break;
3617 case CHIP_POLARIS11:
c4642a47 3618 case CHIP_POLARIS12:
167ac573
HR
3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3620 SE_XSEL(1) | SE_YSEL(1);
3621 *rconf1 |= 0x0;
3622 break;
3623 case CHIP_STONEY:
3624 *rconf |= 0x0;
3625 *rconf1 |= 0x0;
3626 break;
3627 default:
3628 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3629 break;
3630 }
3631}
3632
3633static void
3634gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3635 u32 raster_config, u32 raster_config_1,
3636 unsigned rb_mask, unsigned num_rb)
3637{
3638 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3639 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3640 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3641 unsigned rb_per_se = num_rb / num_se;
3642 unsigned se_mask[4];
3643 unsigned se;
3644
3645 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3646 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3647 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3648 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3649
3650 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3651 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3652 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3653
3654 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3655 (!se_mask[2] && !se_mask[3]))) {
3656 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3657
3658 if (!se_mask[0] && !se_mask[1]) {
3659 raster_config_1 |=
3660 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3661 } else {
3662 raster_config_1 |=
3663 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3664 }
3665 }
3666
3667 for (se = 0; se < num_se; se++) {
3668 unsigned raster_config_se = raster_config;
3669 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3670 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3671 int idx = (se / 2) * 2;
3672
3673 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3674 raster_config_se &= ~SE_MAP_MASK;
3675
3676 if (!se_mask[idx]) {
3677 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3678 } else {
3679 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3680 }
3681 }
3682
3683 pkr0_mask &= rb_mask;
3684 pkr1_mask &= rb_mask;
3685 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3686 raster_config_se &= ~PKR_MAP_MASK;
3687
3688 if (!pkr0_mask) {
3689 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3690 } else {
3691 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3692 }
3693 }
3694
3695 if (rb_per_se >= 2) {
3696 unsigned rb0_mask = 1 << (se * rb_per_se);
3697 unsigned rb1_mask = rb0_mask << 1;
3698
3699 rb0_mask &= rb_mask;
3700 rb1_mask &= rb_mask;
3701 if (!rb0_mask || !rb1_mask) {
3702 raster_config_se &= ~RB_MAP_PKR0_MASK;
3703
3704 if (!rb0_mask) {
3705 raster_config_se |=
3706 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3707 } else {
3708 raster_config_se |=
3709 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3710 }
3711 }
3712
3713 if (rb_per_se > 2) {
3714 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3715 rb1_mask = rb0_mask << 1;
3716 rb0_mask &= rb_mask;
3717 rb1_mask &= rb_mask;
3718 if (!rb0_mask || !rb1_mask) {
3719 raster_config_se &= ~RB_MAP_PKR1_MASK;
3720
3721 if (!rb0_mask) {
3722 raster_config_se |=
3723 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3724 } else {
3725 raster_config_se |=
3726 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3727 }
3728 }
3729 }
3730 }
3731
3732 /* GRBM_GFX_INDEX has a different offset on VI */
3733 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3734 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3735 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3736 }
3737
3738 /* GRBM_GFX_INDEX has a different offset on VI */
3739 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3740}
3741
8f8e00c1 3742static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3743{
3744 int i, j;
aac1e3ca 3745 u32 data;
167ac573 3746 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3747 u32 active_rbs = 0;
6157bd7a
FC
3748 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3749 adev->gfx.config.max_sh_per_se;
167ac573 3750 unsigned num_rb_pipes;
aaa36a97
AD
3751
3752 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3753 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3754 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3755 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3756 data = gfx_v8_0_get_rb_active_bitmap(adev);
3757 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3758 rb_bitmap_width_per_sh);
aaa36a97
AD
3759 }
3760 }
9559ef5b 3761 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3762
8f8e00c1 3763 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3764 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3765
3766 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3767 adev->gfx.config.max_shader_engines, 16);
3768
3769 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3770
3771 if (!adev->gfx.config.backend_enable_mask ||
3772 adev->gfx.config.num_rbs >= num_rb_pipes) {
3773 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3774 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3775 } else {
3776 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3777 adev->gfx.config.backend_enable_mask,
3778 num_rb_pipes);
3779 }
3780
392f0c77
AD
3781 /* cache the values for userspace */
3782 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3783 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3784 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3785 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3786 RREG32(mmCC_RB_BACKEND_DISABLE);
3787 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3788 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3789 adev->gfx.config.rb_config[i][j].raster_config =
3790 RREG32(mmPA_SC_RASTER_CONFIG);
3791 adev->gfx.config.rb_config[i][j].raster_config_1 =
3792 RREG32(mmPA_SC_RASTER_CONFIG_1);
3793 }
3794 }
3795 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3796 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3797}
3798
cd06bf68 3799/**
35c7a952 3800 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68 3801 *
dc102c43 3802 * @adev: amdgpu_device pointer
cd06bf68
BG
3803 *
3804 * Initialize compute vmid sh_mem registers
3805 *
3806 */
3807#define DEFAULT_SH_MEM_BASES (0x6000)
3808#define FIRST_COMPUTE_VMID (8)
3809#define LAST_COMPUTE_VMID (16)
35c7a952 3810static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3811{
3812 int i;
3813 uint32_t sh_mem_config;
3814 uint32_t sh_mem_bases;
3815
3816 /*
3817 * Configure apertures:
3818 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3819 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3820 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3821 */
3822 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3823
3824 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3825 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3826 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3827 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3828 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3829 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3830
3831 mutex_lock(&adev->srbm_mutex);
3832 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3833 vi_srbm_select(adev, 0, 0, 0, i);
3834 /* CP and shaders */
3835 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3836 WREG32(mmSH_MEM_APE1_BASE, 1);
3837 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3838 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3839 }
3840 vi_srbm_select(adev, 0, 0, 0, 0);
3841 mutex_unlock(&adev->srbm_mutex);
3842}
3843
df6e2c4a
JZ
3844static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3845{
3846 switch (adev->asic_type) {
3847 default:
3848 adev->gfx.config.double_offchip_lds_buf = 1;
3849 break;
3850 case CHIP_CARRIZO:
3851 case CHIP_STONEY:
3852 adev->gfx.config.double_offchip_lds_buf = 0;
3853 break;
3854 }
3855}
3856
aaa36a97
AD
3857static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3858{
8fe73328 3859 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3860 int i;
3861
61cb8cef 3862 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3863 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3864 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3865 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3866
3867 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3868 gfx_v8_0_setup_rb(adev);
7dae69a2 3869 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3870 gfx_v8_0_config_init(adev);
aaa36a97
AD
3871
3872 /* XXX SH_MEM regs */
3873 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3874 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3875 SWIZZLE_ENABLE, 1);
3876 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3877 ELEMENT_SIZE, 1);
3878 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3879 INDEX_STRIDE, 3);
111159b5
FK
3880 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3881
aaa36a97 3882 mutex_lock(&adev->srbm_mutex);
7645670d 3883 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
aaa36a97
AD
3884 vi_srbm_select(adev, 0, 0, 0, i);
3885 /* CP and shaders */
3886 if (i == 0) {
3887 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3888 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3889 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3890 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3891 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3892 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3893 } else {
3894 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3895 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3896 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3897 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3898 WREG32(mmSH_MEM_CONFIG, tmp);
770d13b1 3899 tmp = adev->gmc.shared_aperture_start >> 48;
8fe73328 3900 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3901 }
3902
3903 WREG32(mmSH_MEM_APE1_BASE, 1);
3904 WREG32(mmSH_MEM_APE1_LIMIT, 0);
aaa36a97
AD
3905 }
3906 vi_srbm_select(adev, 0, 0, 0, 0);
3907 mutex_unlock(&adev->srbm_mutex);
3908
35c7a952 3909 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3910
aaa36a97
AD
3911 mutex_lock(&adev->grbm_idx_mutex);
3912 /*
3913 * making sure that the following register writes will be broadcasted
3914 * to all the shaders
3915 */
9559ef5b 3916 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3917
3918 WREG32(mmPA_SC_FIFO_SIZE,
3919 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3920 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3921 (adev->gfx.config.sc_prim_fifo_size_backend <<
3922 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3923 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3924 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3925 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3926 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3927
3928 tmp = RREG32(mmSPI_ARB_PRIORITY);
3929 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3931 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3932 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3933 WREG32(mmSPI_ARB_PRIORITY, tmp);
3934
aaa36a97
AD
3935 mutex_unlock(&adev->grbm_idx_mutex);
3936
3937}
3938
3939static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3940{
3941 u32 i, j, k;
3942 u32 mask;
3943
3944 mutex_lock(&adev->grbm_idx_mutex);
3945 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3946 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3947 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3948 for (k = 0; k < adev->usec_timeout; k++) {
3949 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3950 break;
3951 udelay(1);
3952 }
1366b2d0 3953 if (k == adev->usec_timeout) {
3954 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3955 0xffffffff, 0xffffffff);
3956 mutex_unlock(&adev->grbm_idx_mutex);
3957 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3958 i, j);
3959 return;
3960 }
aaa36a97
AD
3961 }
3962 }
9559ef5b 3963 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3964 mutex_unlock(&adev->grbm_idx_mutex);
3965
3966 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3967 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3968 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3969 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3970 for (k = 0; k < adev->usec_timeout; k++) {
3971 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3972 break;
3973 udelay(1);
3974 }
3975}
3976
3977static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3978 bool enable)
3979{
3980 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3981
0d07db7e
TSD
3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3984 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3986
aaa36a97
AD
3987 WREG32(mmCP_INT_CNTL_RING0, tmp);
3988}
3989
2b6cd977
EH
3990static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3991{
3992 /* csib */
3993 WREG32(mmRLC_CSIB_ADDR_HI,
3994 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3995 WREG32(mmRLC_CSIB_ADDR_LO,
3996 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3997 WREG32(mmRLC_CSIB_LENGTH,
3998 adev->gfx.rlc.clear_state_size);
3999}
4000
4001static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4002 int ind_offset,
4003 int list_size,
4004 int *unique_indices,
4005 int *indices_count,
4006 int max_indices,
4007 int *ind_start_offsets,
4008 int *offset_count,
4009 int max_offset)
4010{
4011 int indices;
4012 bool new_entry = true;
4013
4014 for (; ind_offset < list_size; ind_offset++) {
4015
4016 if (new_entry) {
4017 new_entry = false;
4018 ind_start_offsets[*offset_count] = ind_offset;
4019 *offset_count = *offset_count + 1;
4020 BUG_ON(*offset_count >= max_offset);
4021 }
4022
4023 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4024 new_entry = true;
4025 continue;
4026 }
4027
4028 ind_offset += 2;
4029
4030 /* look for the matching indice */
4031 for (indices = 0;
4032 indices < *indices_count;
4033 indices++) {
4034 if (unique_indices[indices] ==
4035 register_list_format[ind_offset])
4036 break;
4037 }
4038
4039 if (indices >= *indices_count) {
4040 unique_indices[*indices_count] =
4041 register_list_format[ind_offset];
4042 indices = *indices_count;
4043 *indices_count = *indices_count + 1;
4044 BUG_ON(*indices_count >= max_indices);
4045 }
4046
4047 register_list_format[ind_offset] = indices;
4048 }
4049}
4050
4051static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4052{
4053 int i, temp, data;
4054 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4055 int indices_count = 0;
4056 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4057 int offset_count = 0;
4058
4059 int list_size;
4060 unsigned int *register_list_format =
4061 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4062 if (!register_list_format)
2b6cd977
EH
4063 return -ENOMEM;
4064 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4065 adev->gfx.rlc.reg_list_format_size_bytes);
4066
4067 gfx_v8_0_parse_ind_reg_list(register_list_format,
4068 RLC_FormatDirectRegListLength,
4069 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4070 unique_indices,
4071 &indices_count,
c1b24a14 4072 ARRAY_SIZE(unique_indices),
2b6cd977
EH
4073 indirect_start_offsets,
4074 &offset_count,
c1b24a14 4075 ARRAY_SIZE(indirect_start_offsets));
2b6cd977
EH
4076
4077 /* save and restore list */
61cb8cef 4078 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4079
4080 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4081 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4082 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4083
4084 /* indirect list */
4085 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4086 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4087 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4088
4089 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4090 list_size = list_size >> 1;
4091 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4092 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4093
4094 /* starting offsets starts */
4095 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4096 adev->gfx.rlc.starting_offsets_start);
c1b24a14 4097 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2b6cd977
EH
4098 WREG32(mmRLC_GPM_SCRATCH_DATA,
4099 indirect_start_offsets[i]);
4100
4101 /* unique indices */
4102 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4103 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
c1b24a14 4104 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
202e0b22 4105 if (unique_indices[i] != 0) {
b85c9d2a
ML
4106 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4107 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4108 }
2b6cd977
EH
4109 }
4110 kfree(register_list_format);
4111
4112 return 0;
4113}
4114
4115static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4116{
61cb8cef 4117 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4118}
4119
fb16007b 4120static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4121{
4122 uint32_t data;
4123
c4d17b81
RZ
4124 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4125
4126 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4128 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4129 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4130 WREG32(mmRLC_PG_DELAY, data);
4131
4132 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4133 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4134
f4bfffdd
EH
4135}
4136
2c547165
AD
4137static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4138 bool enable)
4139{
61cb8cef 4140 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4141}
4142
4143static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4144 bool enable)
4145{
61cb8cef 4146 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4147}
4148
4149static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4150{
eb584241 4151 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4152}
4153
2b6cd977
EH
4154static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4155{
c4d17b81
RZ
4156 if ((adev->asic_type == CHIP_CARRIZO) ||
4157 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4158 gfx_v8_0_init_csb(adev);
4159 gfx_v8_0_init_save_restore_list(adev);
4160 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4161 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4162 gfx_v8_0_init_power_gating(adev);
4163 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47 4164 } else if ((adev->asic_type == CHIP_POLARIS11) ||
71765469
LL
4165 (adev->asic_type == CHIP_POLARIS12) ||
4166 (adev->asic_type == CHIP_VEGAM)) {
c4d17b81
RZ
4167 gfx_v8_0_init_csb(adev);
4168 gfx_v8_0_init_save_restore_list(adev);
4169 gfx_v8_0_enable_save_restore_machine(adev);
4170 gfx_v8_0_init_power_gating(adev);
2b6cd977 4171 }
c4d17b81 4172
2b6cd977
EH
4173}
4174
761c2e82 4175static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4176{
61cb8cef 4177 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4178
4179 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4180 gfx_v8_0_wait_for_rlc_serdes(adev);
4181}
4182
4183static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4184{
61cb8cef 4185 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4186 udelay(50);
61cb8cef
TSD
4187
4188 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4189 udelay(50);
4190}
4191
4192static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4193{
61cb8cef 4194 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4195
4196 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4197 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4198 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4199
4200 udelay(50);
4201}
4202
4203static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4204{
4205 const struct rlc_firmware_header_v2_0 *hdr;
4206 const __le32 *fw_data;
4207 unsigned i, fw_size;
4208
4209 if (!adev->gfx.rlc_fw)
4210 return -EINVAL;
4211
4212 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4213 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4214
4215 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4216 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4217 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4218
4219 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4220 for (i = 0; i < fw_size; i++)
4221 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4222 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4223
4224 return 0;
4225}
4226
4227static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4228{
4229 int r;
6ae81452 4230 u32 tmp;
aaa36a97
AD
4231
4232 gfx_v8_0_rlc_stop(adev);
4233
4234 /* disable CG */
6ae81452
AD
4235 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4236 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4237 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4238 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4239 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47 4240 adev->asic_type == CHIP_POLARIS10 ||
71765469
LL
4241 adev->asic_type == CHIP_POLARIS12 ||
4242 adev->asic_type == CHIP_VEGAM) {
6ae81452
AD
4243 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4244 tmp &= ~0x3;
4245 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4246 }
aaa36a97
AD
4247
4248 /* disable PG */
4249 WREG32(mmRLC_PG_CNTL, 0);
4250
4251 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4252 gfx_v8_0_init_pg(adev);
4253
790d84fd
RZ
4254
4255 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4256 /* legacy rlc firmware loading */
4257 r = gfx_v8_0_rlc_load_microcode(adev);
4258 if (r)
4259 return r;
aaa36a97
AD
4260 }
4261
4262 gfx_v8_0_rlc_start(adev);
4263
4264 return 0;
4265}
4266
4267static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4268{
4269 int i;
4270 u32 tmp = RREG32(mmCP_ME_CNTL);
4271
4272 if (enable) {
4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4274 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4276 } else {
4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4280 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4281 adev->gfx.gfx_ring[i].ready = false;
4282 }
4283 WREG32(mmCP_ME_CNTL, tmp);
4284 udelay(50);
4285}
4286
4287static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4288{
4289 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4290 const struct gfx_firmware_header_v1_0 *ce_hdr;
4291 const struct gfx_firmware_header_v1_0 *me_hdr;
4292 const __le32 *fw_data;
4293 unsigned i, fw_size;
4294
4295 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296 return -EINVAL;
4297
4298 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4299 adev->gfx.pfp_fw->data;
4300 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4301 adev->gfx.ce_fw->data;
4302 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4303 adev->gfx.me_fw->data;
4304
4305 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4306 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4307 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4308
4309 gfx_v8_0_cp_gfx_enable(adev, false);
4310
4311 /* PFP */
4312 fw_data = (const __le32 *)
4313 (adev->gfx.pfp_fw->data +
4314 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4317 for (i = 0; i < fw_size; i++)
4318 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4320
4321 /* CE */
4322 fw_data = (const __le32 *)
4323 (adev->gfx.ce_fw->data +
4324 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4325 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4326 WREG32(mmCP_CE_UCODE_ADDR, 0);
4327 for (i = 0; i < fw_size; i++)
4328 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4329 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4330
4331 /* ME */
4332 fw_data = (const __le32 *)
4333 (adev->gfx.me_fw->data +
4334 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4335 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4336 WREG32(mmCP_ME_RAM_WADDR, 0);
4337 for (i = 0; i < fw_size; i++)
4338 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4339 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4340
4341 return 0;
4342}
4343
4344static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4345{
4346 u32 count = 0;
4347 const struct cs_section_def *sect = NULL;
4348 const struct cs_extent_def *ext = NULL;
4349
4350 /* begin clear state */
4351 count += 2;
4352 /* context control state */
4353 count += 3;
4354
4355 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4356 for (ext = sect->section; ext->extent != NULL; ++ext) {
4357 if (sect->id == SECT_CONTEXT)
4358 count += 2 + ext->reg_count;
4359 else
4360 return 0;
4361 }
4362 }
4363 /* pa_sc_raster_config/pa_sc_raster_config1 */
4364 count += 4;
4365 /* end clear state */
4366 count += 2;
4367 /* clear state */
4368 count += 2;
4369
4370 return count;
4371}
4372
4373static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4374{
4375 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4376 const struct cs_section_def *sect = NULL;
4377 const struct cs_extent_def *ext = NULL;
4378 int r, i;
4379
4380 /* init the CP */
4381 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4382 WREG32(mmCP_ENDIAN_SWAP, 0);
4383 WREG32(mmCP_DEVICE_ID, 1);
4384
4385 gfx_v8_0_cp_gfx_enable(adev, true);
4386
a27de35c 4387 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4388 if (r) {
4389 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4390 return r;
4391 }
4392
4393 /* clear state buffer */
4394 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4395 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4396
4397 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4398 amdgpu_ring_write(ring, 0x80000000);
4399 amdgpu_ring_write(ring, 0x80000000);
4400
4401 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4402 for (ext = sect->section; ext->extent != NULL; ++ext) {
4403 if (sect->id == SECT_CONTEXT) {
4404 amdgpu_ring_write(ring,
4405 PACKET3(PACKET3_SET_CONTEXT_REG,
4406 ext->reg_count));
4407 amdgpu_ring_write(ring,
4408 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4409 for (i = 0; i < ext->reg_count; i++)
4410 amdgpu_ring_write(ring, ext->extent[i]);
4411 }
4412 }
4413 }
4414
4415 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4416 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
93442184
AD
4417 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4418 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
aaa36a97
AD
4419
4420 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4421 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4422
4423 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4424 amdgpu_ring_write(ring, 0);
4425
4426 /* init the CE partitions */
4427 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4428 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4429 amdgpu_ring_write(ring, 0x8000);
4430 amdgpu_ring_write(ring, 0x8000);
4431
a27de35c 4432 amdgpu_ring_commit(ring);
aaa36a97
AD
4433
4434 return 0;
4435}
4f339b29
RZ
4436static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4437{
4438 u32 tmp;
4439 /* no gfx doorbells on iceland */
4440 if (adev->asic_type == CHIP_TOPAZ)
4441 return;
4442
4443 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4444
4445 if (ring->use_doorbell) {
4446 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4447 DOORBELL_OFFSET, ring->doorbell_index);
4448 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4449 DOORBELL_HIT, 0);
4450 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4451 DOORBELL_EN, 1);
4452 } else {
4453 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4454 }
4455
4456 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4457
4458 if (adev->flags & AMD_IS_APU)
4459 return;
4460
4461 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4462 DOORBELL_RANGE_LOWER,
4463 AMDGPU_DOORBELL_GFX_RING0);
4464 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4465
4466 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4467 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4468}
aaa36a97
AD
4469
4470static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4471{
4472 struct amdgpu_ring *ring;
4473 u32 tmp;
4474 u32 rb_bufsz;
42e8cb50 4475 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4476 int r;
4477
4478 /* Set the write pointer delay */
4479 WREG32(mmCP_RB_WPTR_DELAY, 0);
4480
4481 /* set the RB to use vmid 0 */
4482 WREG32(mmCP_RB_VMID, 0);
4483
4484 /* Set ring buffer size */
4485 ring = &adev->gfx.gfx_ring[0];
4486 rb_bufsz = order_base_2(ring->ring_size / 8);
4487 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4489 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4491#ifdef __BIG_ENDIAN
4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4493#endif
4494 WREG32(mmCP_RB0_CNTL, tmp);
4495
4496 /* Initialize the ring buffer's read and write pointers */
4497 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4498 ring->wptr = 0;
536fbf94 4499 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4500
4501 /* set the wb address wether it's enabled or not */
4502 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4503 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4504 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4505
42e8cb50
FM
4506 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4507 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4508 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4509 mdelay(1);
4510 WREG32(mmCP_RB0_CNTL, tmp);
4511
4512 rb_addr = ring->gpu_addr >> 8;
4513 WREG32(mmCP_RB0_BASE, rb_addr);
4514 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4515
4f339b29 4516 gfx_v8_0_set_cpg_door_bell(adev, ring);
aaa36a97 4517 /* start the ring */
f6bd7942 4518 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4519 gfx_v8_0_cp_gfx_start(adev);
4520 ring->ready = true;
4521 r = amdgpu_ring_test_ring(ring);
5003f278 4522 if (r)
aaa36a97 4523 ring->ready = false;
aaa36a97 4524
5003f278 4525 return r;
aaa36a97
AD
4526}
4527
4528static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4529{
4530 int i;
4531
4532 if (enable) {
4533 WREG32(mmCP_MEC_CNTL, 0);
4534 } else {
4535 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4536 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4537 adev->gfx.compute_ring[i].ready = false;
fcf17a43 4538 adev->gfx.kiq.ring.ready = false;
aaa36a97
AD
4539 }
4540 udelay(50);
4541}
4542
aaa36a97
AD
4543static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4544{
4545 const struct gfx_firmware_header_v1_0 *mec_hdr;
4546 const __le32 *fw_data;
4547 unsigned i, fw_size;
4548
4549 if (!adev->gfx.mec_fw)
4550 return -EINVAL;
4551
4552 gfx_v8_0_cp_compute_enable(adev, false);
4553
4554 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4555 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4556
4557 fw_data = (const __le32 *)
4558 (adev->gfx.mec_fw->data +
4559 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4560 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4561
4562 /* MEC1 */
4563 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4564 for (i = 0; i < fw_size; i++)
4565 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4566 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4567
4568 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4569 if (adev->gfx.mec2_fw) {
4570 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4571
4572 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4573 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4574
4575 fw_data = (const __le32 *)
4576 (adev->gfx.mec2_fw->data +
4577 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4578 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4579
4580 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4581 for (i = 0; i < fw_size; i++)
4582 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4583 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4584 }
4585
4586 return 0;
4587}
4588
4e638ae9
XY
4589/* KIQ functions */
4590static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4591{
4592 uint32_t tmp;
4593 struct amdgpu_device *adev = ring->adev;
4594
4595 /* tell RLC which is KIQ queue */
4596 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4597 tmp &= 0xffffff00;
4598 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4599 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4600 tmp |= 0x80;
4601 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4602}
4603
346586d5 4604static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4e638ae9 4605{
c3a49ab5 4606 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
de65513a 4607 uint64_t queue_mask = 0;
f776952b
AD
4608 int r, i;
4609
de65513a
AR
4610 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4611 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4612 continue;
4613
4614 /* This situation may be hit in the future if a new HW
4615 * generation exposes more than 64 queues. If so, the
4616 * definition of queue_mask needs updating */
1d11ee89 4617 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
de65513a
AR
4618 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4619 break;
4620 }
4621
4622 queue_mask |= (1ull << i);
4623 }
4624
6c10b5cc 4625 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
f776952b
AD
4626 if (r) {
4627 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
f776952b
AD
4628 return r;
4629 }
4e638ae9 4630 /* set resources */
346586d5
AD
4631 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4632 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
de65513a
AR
4633 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4634 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
346586d5
AD
4635 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4636 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4637 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4638 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
c3a49ab5
AD
4639 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4640 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4641 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4642 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4643
4644 /* map queues */
4645 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4646 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3d7e30b3
AD
4647 amdgpu_ring_write(kiq_ring,
4648 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4649 amdgpu_ring_write(kiq_ring,
4650 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4651 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4652 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4653 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
c3a49ab5
AD
4654 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4655 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4656 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4657 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4658 }
f776952b 4659
6c10b5cc
RZ
4660 r = amdgpu_ring_test_ring(kiq_ring);
4661 if (r) {
4662 DRM_ERROR("KCQ enable failed\n");
4663 kiq_ring->ready = false;
f776952b 4664 }
f776952b 4665 return r;
4e638ae9
XY
4666}
4667
34130fb1
AR
4668static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4669{
4670 int i, r = 0;
4671
4672 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4673 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4674 for (i = 0; i < adev->usec_timeout; i++) {
4675 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4676 break;
4677 udelay(1);
4678 }
4679 if (i == adev->usec_timeout)
4680 r = -ETIMEDOUT;
4681 }
4682 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4683 WREG32(mmCP_HQD_PQ_RPTR, 0);
4684 WREG32(mmCP_HQD_PQ_WPTR, 0);
4685
4686 return r;
4e638ae9
XY
4687}
4688
a2140e00 4689static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4e638ae9 4690{
015c2360 4691 struct amdgpu_device *adev = ring->adev;
a2140e00 4692 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4693 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4694 uint32_t tmp;
4695
4696 mqd->header = 0xC0310800;
4697 mqd->compute_pipelinestat_enable = 0x00000001;
4698 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4699 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4700 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4701 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4702 mqd->compute_misc_reserved = 0x00000003;
925d5d79
AD
4703 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4704 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4705 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4706 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
34534610 4707 eop_base_addr = ring->eop_gpu_addr >> 8;
4e638ae9
XY
4708 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4709 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4710
4711 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4712 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4713 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
268cb4c7 4714 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4e638ae9
XY
4715
4716 mqd->cp_hqd_eop_control = tmp;
4717
4718 /* enable doorbell? */
bb215962
TSD
4719 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4720 CP_HQD_PQ_DOORBELL_CONTROL,
4721 DOORBELL_EN,
4722 ring->use_doorbell ? 1 : 0);
4e638ae9
XY
4723
4724 mqd->cp_hqd_pq_doorbell_control = tmp;
4725
4e638ae9 4726 /* set the pointer to the MQD */
015c2360
AD
4727 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4728 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4729
4730 /* set MQD vmid to 0 */
4731 tmp = RREG32(mmCP_MQD_CONTROL);
4732 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4733 mqd->cp_mqd_control = tmp;
4734
4735 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4736 hqd_gpu_addr = ring->gpu_addr >> 8;
4737 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4738 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4739
4740 /* set up the HQD, this is similar to CP_RB0_CNTL */
4741 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4742 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4743 (order_base_2(ring->ring_size / 4) - 1));
4744 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4745 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4746#ifdef __BIG_ENDIAN
4747 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4748#endif
4749 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4750 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4751 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4752 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4753 mqd->cp_hqd_pq_control = tmp;
4754
4755 /* set the wb address whether it's enabled or not */
4756 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4757 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4758 mqd->cp_hqd_pq_rptr_report_addr_hi =
4759 upper_32_bits(wb_gpu_addr) & 0xffff;
4760
4761 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4762 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4763 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4764 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4765
4766 tmp = 0;
4767 /* enable the doorbell if requested */
4768 if (ring->use_doorbell) {
4769 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4770 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4771 DOORBELL_OFFSET, ring->doorbell_index);
4772
4773 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4774 DOORBELL_EN, 1);
4775 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4776 DOORBELL_SOURCE, 0);
4777 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4778 DOORBELL_HIT, 0);
4779 }
4780
4781 mqd->cp_hqd_pq_doorbell_control = tmp;
4782
4783 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4784 ring->wptr = 0;
4785 mqd->cp_hqd_pq_wptr = ring->wptr;
4786 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4787
4788 /* set the vmid for the queue */
4789 mqd->cp_hqd_vmid = 0;
4790
4791 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4792 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4793 mqd->cp_hqd_persistent_state = tmp;
4794
ed6f55d1
AD
4795 /* set MTYPE */
4796 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4797 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4798 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4799 mqd->cp_hqd_ib_control = tmp;
4800
4801 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4802 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4803 mqd->cp_hqd_iq_timer = tmp;
4804
4805 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4806 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4807 mqd->cp_hqd_ctx_save_control = tmp;
4808
97bf47b2
AR
4809 /* defaults */
4810 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4811 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4812 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4813 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4814 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4815 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4816 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4817 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4818 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4819 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4820 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4821 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4822 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4823 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4824 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4825
4e638ae9
XY
4826 /* activate the queue */
4827 mqd->cp_hqd_active = 1;
4828
4829 return 0;
4830}
4831
97bf47b2
AR
4832int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4833 struct vi_mqd *mqd)
4e638ae9 4834{
894700f3
AR
4835 uint32_t mqd_reg;
4836 uint32_t *mqd_data;
4e638ae9 4837
894700f3
AR
4838 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4839 mqd_data = &mqd->cp_mqd_base_addr_lo;
4e638ae9
XY
4840
4841 /* disable wptr polling */
0ac642c5 4842 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4e638ae9 4843
894700f3 4844 /* program all HQD registers */
ecd910eb
AR
4845 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4846 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4847
ecd910eb
AR
4848 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4849 * This is safe since EOP RPTR==WPTR for any inactive HQD
4850 * on ASICs that do not support context-save.
4851 * EOP writes/reads can start anywhere in the ring.
4852 */
4853 if (adev->asic_type != CHIP_TONGA) {
4854 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4855 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4856 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4e638ae9
XY
4857 }
4858
ecd910eb 4859 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
894700f3 4860 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4861
894700f3
AR
4862 /* activate the HQD */
4863 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4864 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4865
4e638ae9
XY
4866 return 0;
4867}
4e638ae9 4868
a2140e00 4869static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4870{
4871 struct amdgpu_device *adev = ring->adev;
a2140e00 4872 struct vi_mqd *mqd = ring->mqd_ptr;
1fb37a3d 4873 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9 4874
39300115 4875 gfx_v8_0_kiq_setting(ring);
4e638ae9 4876
13a752e3 4877 if (adev->in_gpu_reset) { /* for GPU_RESET case */
1fb37a3d
ML
4878 /* reset MQD to a clean status */
4879 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4880 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4e638ae9 4881
1fb37a3d
ML
4882 /* reset ring buffer */
4883 ring->wptr = 0;
4884 amdgpu_ring_clear_ring(ring);
39300115
AD
4885 mutex_lock(&adev->srbm_mutex);
4886 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
97bf47b2 4887 gfx_v8_0_mqd_commit(adev, mqd);
39300115
AD
4888 vi_srbm_select(adev, 0, 0, 0, 0);
4889 mutex_unlock(&adev->srbm_mutex);
a545e491 4890 } else {
6b0fa871 4891 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
2d6fb105
AD
4892 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4893 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
a545e491
AD
4894 mutex_lock(&adev->srbm_mutex);
4895 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4896 gfx_v8_0_mqd_init(ring);
97bf47b2 4897 gfx_v8_0_mqd_commit(adev, mqd);
a545e491
AD
4898 vi_srbm_select(adev, 0, 0, 0, 0);
4899 mutex_unlock(&adev->srbm_mutex);
4e638ae9 4900
a545e491 4901 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4902 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
1fb37a3d 4903 }
4e638ae9 4904
dcf75843 4905 return 0;
4e638ae9
XY
4906}
4907
39300115 4908static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4909{
4910 struct amdgpu_device *adev = ring->adev;
a2140e00 4911 struct vi_mqd *mqd = ring->mqd_ptr;
39300115 4912 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4e638ae9 4913
13a752e3 4914 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
6b0fa871 4915 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
2d6fb105
AD
4916 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4917 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
1fb37a3d
ML
4918 mutex_lock(&adev->srbm_mutex);
4919 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
a2140e00 4920 gfx_v8_0_mqd_init(ring);
1fb37a3d
ML
4921 vi_srbm_select(adev, 0, 0, 0, 0);
4922 mutex_unlock(&adev->srbm_mutex);
4923
4924 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4925 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
13a752e3 4926 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
1fb37a3d
ML
4927 /* reset MQD to a clean status */
4928 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4929 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
062d2e6a
AG
4930 /* reset ring buffer */
4931 ring->wptr = 0;
4932 amdgpu_ring_clear_ring(ring);
94c9cead
RZ
4933 } else {
4934 amdgpu_ring_clear_ring(ring);
1fb37a3d 4935 }
4e638ae9
XY
4936 return 0;
4937}
4938
4f339b29
RZ
4939static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4940{
4941 if (adev->asic_type > CHIP_TONGA) {
4942 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4943 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4944 }
6a124e67
AD
4945 /* enable doorbells */
4946 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4f339b29
RZ
4947}
4948
596c67d0 4949static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9 4950{
36859cd5
RZ
4951 struct amdgpu_ring *ring;
4952 int r;
4e638ae9
XY
4953
4954 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
4955
4956 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4957 if (unlikely(r != 0))
36859cd5 4958 return r;
6a6f380f
AD
4959
4960 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
36859cd5
RZ
4961 if (unlikely(r != 0))
4962 return r;
4963
4964 gfx_v8_0_kiq_init_queue(ring);
4965 amdgpu_bo_kunmap(ring->mqd_obj);
4966 ring->mqd_ptr = NULL;
6a6f380f 4967 amdgpu_bo_unreserve(ring->mqd_obj);
36859cd5
RZ
4968 ring->ready = true;
4969 return 0;
4970}
4971
4972static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4973{
4974 struct amdgpu_ring *ring = NULL;
4975 int r = 0, i;
4976
4977 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9 4978
4e638ae9
XY
4979 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4980 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
4981
4982 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4983 if (unlikely(r != 0))
4984 goto done;
4985 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4986 if (!r) {
39300115 4987 r = gfx_v8_0_kcq_init_queue(ring);
596c67d0 4988 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4989 ring->mqd_ptr = NULL;
596c67d0 4990 }
6a6f380f
AD
4991 amdgpu_bo_unreserve(ring->mqd_obj);
4992 if (r)
4993 goto done;
4e638ae9
XY
4994 }
4995
4f339b29 4996 gfx_v8_0_set_mec_doorbell_range(adev);
4e638ae9 4997
346586d5 4998 r = gfx_v8_0_kiq_kcq_enable(adev);
c3a49ab5
AD
4999 if (r)
5000 goto done;
aaa36a97 5001
346586d5 5002 /* Test KCQs */
aaa36a97 5003 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
c3a49ab5 5004 ring = &adev->gfx.compute_ring[i];
aaa36a97
AD
5005 ring->ready = true;
5006 r = amdgpu_ring_test_ring(ring);
5007 if (r)
5008 ring->ready = false;
5009 }
5010
6a6f380f
AD
5011done:
5012 return r;
aaa36a97
AD
5013}
5014
5015static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5016{
5017 int r;
5018
e3c7656c 5019 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5020 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5021
790d84fd 5022 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
ba5c2a87 5023 /* legacy firmware loading */
790d84fd
RZ
5024 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5025 if (r)
5026 return r;
aaa36a97 5027
790d84fd
RZ
5028 r = gfx_v8_0_cp_compute_load_microcode(adev);
5029 if (r)
5030 return r;
aaa36a97
AD
5031 }
5032
36859cd5 5033 r = gfx_v8_0_kiq_resume(adev);
aaa36a97
AD
5034 if (r)
5035 return r;
5036
36859cd5 5037 r = gfx_v8_0_cp_gfx_resume(adev);
aaa36a97
AD
5038 if (r)
5039 return r;
5040
36859cd5
RZ
5041 r = gfx_v8_0_kcq_resume(adev);
5042 if (r)
5043 return r;
aaa36a97
AD
5044 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5045
5046 return 0;
5047}
5048
5049static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5050{
5051 gfx_v8_0_cp_gfx_enable(adev, enable);
5052 gfx_v8_0_cp_compute_enable(adev, enable);
5053}
5054
5fc3aeeb 5055static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5056{
5057 int r;
5fc3aeeb 5058 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5059
5060 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5061 gfx_v8_0_gpu_init(adev);
5062
5063 r = gfx_v8_0_rlc_resume(adev);
5064 if (r)
5065 return r;
5066
5067 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5068
5069 return r;
5070}
5071
a62a49e5 5072static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
85f95ad6 5073{
a62a49e5
RZ
5074 int r, i;
5075 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
85f95ad6 5076
a62a49e5 5077 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
6c10b5cc 5078 if (r)
85f95ad6 5079 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
85f95ad6 5080
a62a49e5
RZ
5081 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5082 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5083
5084 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5085 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
85f95ad6
ML
5086 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5087 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5088 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5089 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
a62a49e5
RZ
5090 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5091 amdgpu_ring_write(kiq_ring, 0);
5092 amdgpu_ring_write(kiq_ring, 0);
5093 amdgpu_ring_write(kiq_ring, 0);
5094 }
6c10b5cc
RZ
5095 r = amdgpu_ring_test_ring(kiq_ring);
5096 if (r)
5097 DRM_ERROR("KCQ disable failed\n");
5098
85f95ad6
ML
5099 return r;
5100}
5101
5fc3aeeb 5102static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5103{
5fc3aeeb 5104 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5105
1d22a454
AD
5106 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5107 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
85f95ad6 5108
5a2f2913
DP
5109 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5110
04ad26bb
DP
5111 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5112
85f95ad6 5113 /* disable KCQ to avoid CPC touch memory not valid anymore */
a62a49e5 5114 gfx_v8_0_kcq_disable(adev);
85f95ad6 5115
84f3f05b
XY
5116 if (amdgpu_sriov_vf(adev)) {
5117 pr_debug("For SRIOV client, shouldn't do anything.\n");
5118 return 0;
5119 }
aaa36a97
AD
5120 gfx_v8_0_cp_enable(adev, false);
5121 gfx_v8_0_rlc_stop(adev);
aaa36a97
AD
5122
5123 return 0;
5124}
5125
5fc3aeeb 5126static int gfx_v8_0_suspend(void *handle)
aaa36a97 5127{
5fc3aeeb 5128 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
b4e40676 5129 adev->gfx.in_suspend = true;
aaa36a97
AD
5130 return gfx_v8_0_hw_fini(adev);
5131}
5132
5fc3aeeb 5133static int gfx_v8_0_resume(void *handle)
aaa36a97 5134{
b4e40676 5135 int r;
5fc3aeeb 5136 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5137
b4e40676
DP
5138 r = gfx_v8_0_hw_init(adev);
5139 adev->gfx.in_suspend = false;
5140 return r;
aaa36a97
AD
5141}
5142
5fc3aeeb 5143static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5144{
5fc3aeeb 5145 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5146
aaa36a97
AD
5147 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5148 return false;
5149 else
5150 return true;
5151}
5152
5fc3aeeb 5153static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5154{
5155 unsigned i;
5fc3aeeb 5156 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5157
5158 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5159 if (gfx_v8_0_is_idle(handle))
aaa36a97 5160 return 0;
5003f278 5161
aaa36a97
AD
5162 udelay(1);
5163 }
5164 return -ETIMEDOUT;
5165}
5166
da146d3b 5167static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5168{
3d7c6384 5169 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5170 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5171 u32 tmp;
5172
5173 /* GRBM_STATUS */
5174 tmp = RREG32(mmGRBM_STATUS);
5175 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5176 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5177 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5178 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5179 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5180 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5181 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5182 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5183 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5184 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5185 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5186 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5187 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5188 }
5189
5190 /* GRBM_STATUS2 */
5191 tmp = RREG32(mmGRBM_STATUS2);
5192 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5193 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5194 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5195
3d7c6384
CZ
5196 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5197 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5198 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5199 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5200 SOFT_RESET_CPF, 1);
5201 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5202 SOFT_RESET_CPC, 1);
5203 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5204 SOFT_RESET_CPG, 1);
5205 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5206 SOFT_RESET_GRBM, 1);
5207 }
5208
aaa36a97
AD
5209 /* SRBM_STATUS */
5210 tmp = RREG32(mmSRBM_STATUS);
5211 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5212 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5213 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5214 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5215 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5216 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5217
5218 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5219 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5220 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5221 return true;
3d7c6384 5222 } else {
3d7c6384
CZ
5223 adev->gfx.grbm_soft_reset = 0;
5224 adev->gfx.srbm_soft_reset = 0;
da146d3b 5225 return false;
3d7c6384 5226 }
3d7c6384 5227}
aaa36a97 5228
1057f20c
CZ
5229static int gfx_v8_0_pre_soft_reset(void *handle)
5230{
5231 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5232 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5233
da146d3b
AD
5234 if ((!adev->gfx.grbm_soft_reset) &&
5235 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5236 return 0;
5237
5238 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5239 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5240
5241 /* stop the rlc */
5242 gfx_v8_0_rlc_stop(adev);
5243
5244 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5245 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5246 /* Disable GFX parsing/prefetching */
5247 gfx_v8_0_cp_gfx_enable(adev, false);
5248
1057f20c
CZ
5249 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5250 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5251 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5252 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5253 int i;
5254
5255 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5256 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5257
a99f249d
AD
5258 mutex_lock(&adev->srbm_mutex);
5259 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5260 gfx_v8_0_deactivate_hqd(adev, 2);
5261 vi_srbm_select(adev, 0, 0, 0, 0);
5262 mutex_unlock(&adev->srbm_mutex);
1057f20c 5263 }
aaa36a97 5264 /* Disable MEC parsing/prefetching */
7776a693 5265 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5266 }
7776a693 5267
1057f20c
CZ
5268 return 0;
5269}
7776a693 5270
3d7c6384
CZ
5271static int gfx_v8_0_soft_reset(void *handle)
5272{
5273 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5274 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5275 u32 tmp;
aaa36a97 5276
da146d3b
AD
5277 if ((!adev->gfx.grbm_soft_reset) &&
5278 (!adev->gfx.srbm_soft_reset))
3d7c6384 5279 return 0;
aaa36a97 5280
3d7c6384
CZ
5281 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5282 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5283
3d7c6384
CZ
5284 if (grbm_soft_reset || srbm_soft_reset) {
5285 tmp = RREG32(mmGMCON_DEBUG);
5286 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5287 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5288 WREG32(mmGMCON_DEBUG, tmp);
5289 udelay(50);
5290 }
aaa36a97 5291
3d7c6384
CZ
5292 if (grbm_soft_reset) {
5293 tmp = RREG32(mmGRBM_SOFT_RESET);
5294 tmp |= grbm_soft_reset;
5295 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5296 WREG32(mmGRBM_SOFT_RESET, tmp);
5297 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5298
3d7c6384 5299 udelay(50);
aaa36a97 5300
3d7c6384
CZ
5301 tmp &= ~grbm_soft_reset;
5302 WREG32(mmGRBM_SOFT_RESET, tmp);
5303 tmp = RREG32(mmGRBM_SOFT_RESET);
5304 }
7776a693 5305
3d7c6384
CZ
5306 if (srbm_soft_reset) {
5307 tmp = RREG32(mmSRBM_SOFT_RESET);
5308 tmp |= srbm_soft_reset;
5309 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5310 WREG32(mmSRBM_SOFT_RESET, tmp);
5311 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5312
aaa36a97 5313 udelay(50);
7776a693 5314
3d7c6384
CZ
5315 tmp &= ~srbm_soft_reset;
5316 WREG32(mmSRBM_SOFT_RESET, tmp);
5317 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5318 }
7776a693 5319
3d7c6384
CZ
5320 if (grbm_soft_reset || srbm_soft_reset) {
5321 tmp = RREG32(mmGMCON_DEBUG);
5322 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5323 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5324 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5325 }
3d7c6384
CZ
5326
5327 /* Wait a little for things to settle down */
5328 udelay(50);
5329
aaa36a97
AD
5330 return 0;
5331}
5332
e4ae0fc3
CZ
5333static int gfx_v8_0_post_soft_reset(void *handle)
5334{
5335 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5336 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5337
da146d3b
AD
5338 if ((!adev->gfx.grbm_soft_reset) &&
5339 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5340 return 0;
5341
5342 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5343 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5344
e4ae0fc3
CZ
5345 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5346 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5347 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5348 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5349 int i;
5350
5351 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5352 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5353
a99f249d
AD
5354 mutex_lock(&adev->srbm_mutex);
5355 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5356 gfx_v8_0_deactivate_hqd(adev, 2);
5357 vi_srbm_select(adev, 0, 0, 0, 0);
5358 mutex_unlock(&adev->srbm_mutex);
e4ae0fc3 5359 }
b4e40676 5360 gfx_v8_0_kiq_resume(adev);
36859cd5 5361 gfx_v8_0_kcq_resume(adev);
e4ae0fc3 5362 }
36859cd5
RZ
5363
5364 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5365 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5366 gfx_v8_0_cp_gfx_resume(adev);
5367
e4ae0fc3
CZ
5368 gfx_v8_0_rlc_start(adev);
5369
aaa36a97
AD
5370 return 0;
5371}
5372
5373/**
5374 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5375 *
5376 * @adev: amdgpu_device pointer
5377 *
5378 * Fetches a GPU clock counter snapshot.
5379 * Returns the 64 bit clock counter snapshot.
5380 */
b95e31fd 5381static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5382{
5383 uint64_t clock;
5384
5385 mutex_lock(&adev->gfx.gpu_clock_mutex);
5386 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5387 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5388 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5389 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5390 return clock;
5391}
5392
5393static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5394 uint32_t vmid,
5395 uint32_t gds_base, uint32_t gds_size,
5396 uint32_t gws_base, uint32_t gws_size,
5397 uint32_t oa_base, uint32_t oa_size)
5398{
5399 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5400 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5401
5402 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5403 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5404
5405 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5406 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5407
5408 /* GDS Base */
5409 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5410 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5411 WRITE_DATA_DST_SEL(0)));
5412 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5413 amdgpu_ring_write(ring, 0);
5414 amdgpu_ring_write(ring, gds_base);
5415
5416 /* GDS Size */
5417 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5418 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5419 WRITE_DATA_DST_SEL(0)));
5420 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5421 amdgpu_ring_write(ring, 0);
5422 amdgpu_ring_write(ring, gds_size);
5423
5424 /* GWS */
5425 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5426 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5427 WRITE_DATA_DST_SEL(0)));
5428 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5429 amdgpu_ring_write(ring, 0);
5430 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5431
5432 /* OA */
5433 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5434 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5435 WRITE_DATA_DST_SEL(0)));
5436 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5437 amdgpu_ring_write(ring, 0);
5438 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5439}
5440
472259f0
TSD
5441static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5442{
bc24fbe9
TSD
5443 WREG32(mmSQ_IND_INDEX,
5444 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5445 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5446 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5447 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5448 return RREG32(mmSQ_IND_DATA);
5449}
5450
c5a60ce8
TSD
5451static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5452 uint32_t wave, uint32_t thread,
5453 uint32_t regno, uint32_t num, uint32_t *out)
5454{
5455 WREG32(mmSQ_IND_INDEX,
5456 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5457 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5458 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5459 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5460 (SQ_IND_INDEX__FORCE_READ_MASK) |
5461 (SQ_IND_INDEX__AUTO_INCR_MASK));
5462 while (num--)
5463 *(out++) = RREG32(mmSQ_IND_DATA);
5464}
5465
472259f0
TSD
5466static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5467{
5468 /* type 0 wave data */
5469 dst[(*no_fields)++] = 0;
5470 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5471 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5472 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5473 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5474 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5475 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5476 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5477 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5478 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5479 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5480 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5481 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5482 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5483 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5484 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5485 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5486 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5487 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5488}
5489
c5a60ce8
TSD
5490static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5491 uint32_t wave, uint32_t start,
5492 uint32_t size, uint32_t *dst)
5493{
5494 wave_read_regs(
5495 adev, simd, wave, 0,
5496 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5497}
5498
472259f0 5499
b95e31fd
AD
5500static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5501 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5502 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5503 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5504 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
f7a9ee81 5505 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
b95e31fd
AD
5506};
5507
5fc3aeeb 5508static int gfx_v8_0_early_init(void *handle)
aaa36a97 5509{
5fc3aeeb 5510 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5511
5512 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
78c16834 5513 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
b95e31fd 5514 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5515 gfx_v8_0_set_ring_funcs(adev);
5516 gfx_v8_0_set_irq_funcs(adev);
5517 gfx_v8_0_set_gds_init(adev);
dbff57bc 5518 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5519
5520 return 0;
5521}
5522
ccba7691
AD
5523static int gfx_v8_0_late_init(void *handle)
5524{
5525 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5526 int r;
5527
1d22a454
AD
5528 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5529 if (r)
5530 return r;
5531
5532 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5533 if (r)
5534 return r;
5535
ccba7691
AD
5536 /* requires IBs so do in late init after IB pool is initialized */
5537 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5538 if (r)
5539 return r;
5540
5a2f2913
DP
5541 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5542 if (r) {
5543 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5544 return r;
5545 }
5546
04ad26bb
DP
5547 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5548 if (r) {
5549 DRM_ERROR(
5550 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5551 r);
5552 return r;
5553 }
5554
ccba7691
AD
5555 return 0;
5556}
5557
c2546f55
AD
5558static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5559 bool enable)
62a86fc2 5560{
85f80cb3 5561 if (((adev->asic_type == CHIP_POLARIS11) ||
71765469 5562 (adev->asic_type == CHIP_POLARIS12) ||
85f80cb3
RZ
5563 (adev->asic_type == CHIP_VEGAM)) &&
5564 adev->powerplay.pp_funcs->set_powergating_by_smu)
c2546f55 5565 /* Send msg to SMU via Powerplay */
85f80cb3 5566 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
62a86fc2 5567
61cb8cef 5568 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5569}
5570
c2546f55
AD
5571static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5572 bool enable)
62a86fc2 5573{
61cb8cef 5574 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5575}
5576
2cc0c0b5 5577static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5578 bool enable)
5579{
61cb8cef 5580 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5581}
5582
2c547165
AD
5583static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5584 bool enable)
5585{
61cb8cef 5586 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5587}
5588
5589static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5590 bool enable)
5591{
61cb8cef 5592 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5593
5594 /* Read any GFX register to wake up GFX. */
5595 if (!enable)
61cb8cef 5596 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5597}
5598
5599static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5600 bool enable)
5601{
5602 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5603 cz_enable_gfx_cg_power_gating(adev, true);
5604 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5605 cz_enable_gfx_pipeline_power_gating(adev, true);
5606 } else {
5607 cz_enable_gfx_cg_power_gating(adev, false);
5608 cz_enable_gfx_pipeline_power_gating(adev, false);
5609 }
5610}
5611
5fc3aeeb 5612static int gfx_v8_0_set_powergating_state(void *handle,
5613 enum amd_powergating_state state)
aaa36a97 5614{
62a86fc2 5615 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5616 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5617
ce137c04
ML
5618 if (amdgpu_sriov_vf(adev))
5619 return 0;
5620
1f06dee8
RZ
5621 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5622 AMD_PG_SUPPORT_RLC_SMU_HS |
5623 AMD_PG_SUPPORT_CP |
5624 AMD_PG_SUPPORT_GFX_DMG))
5625 adev->gfx.rlc.funcs->enter_safe_mode(adev);
62a86fc2 5626 switch (adev->asic_type) {
2c547165
AD
5627 case CHIP_CARRIZO:
5628 case CHIP_STONEY:
ad1830d5 5629
5c964221
RZ
5630 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5631 cz_enable_sck_slow_down_on_power_up(adev, true);
5632 cz_enable_sck_slow_down_on_power_down(adev, true);
5633 } else {
5634 cz_enable_sck_slow_down_on_power_up(adev, false);
5635 cz_enable_sck_slow_down_on_power_down(adev, false);
5636 }
5637 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5638 cz_enable_cp_power_gating(adev, true);
5639 else
5640 cz_enable_cp_power_gating(adev, false);
5641
ad1830d5 5642 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5643
5644 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5645 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5646 else
5647 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5648
5649 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5650 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5651 else
5652 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5653 break;
2cc0c0b5 5654 case CHIP_POLARIS11:
c4642a47 5655 case CHIP_POLARIS12:
71765469 5656 case CHIP_VEGAM:
7ba0eb6d
AD
5657 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5658 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5659 else
5660 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5661
5662 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5663 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5664 else
5665 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5666
5667 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5668 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5669 else
7ba0eb6d 5670 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5671 break;
5672 default:
5673 break;
5674 }
1f06dee8
RZ
5675 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5676 AMD_PG_SUPPORT_RLC_SMU_HS |
5677 AMD_PG_SUPPORT_CP |
5678 AMD_PG_SUPPORT_GFX_DMG))
5679 adev->gfx.rlc.funcs->exit_safe_mode(adev);
aaa36a97
AD
5680 return 0;
5681}
5682
ebd843d6
HR
5683static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5684{
5685 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5686 int data;
5687
ce137c04
ML
5688 if (amdgpu_sriov_vf(adev))
5689 *flags = 0;
5690
ebd843d6
HR
5691 /* AMD_CG_SUPPORT_GFX_MGCG */
5692 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5693 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5694 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5695
5696 /* AMD_CG_SUPPORT_GFX_CGLG */
5697 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5698 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5699 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5700
5701 /* AMD_CG_SUPPORT_GFX_CGLS */
5702 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5703 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5704
5705 /* AMD_CG_SUPPORT_GFX_CGTS */
5706 data = RREG32(mmCGTS_SM_CTRL_REG);
5707 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5708 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5709
5710 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5711 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5712 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5713
5714 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5715 data = RREG32(mmRLC_MEM_SLP_CNTL);
5716 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5717 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5718
5719 /* AMD_CG_SUPPORT_GFX_CP_LS */
5720 data = RREG32(mmCP_MEM_SLP_CNTL);
5721 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5722 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5723}
5724
79deaaf4 5725static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5726 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5727{
5728 uint32_t data;
5729
9559ef5b 5730 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5731
5732 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5733 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5734
5735 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5736 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5737 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5738 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5739 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5740 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5741 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5742 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5743 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5744 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5745 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5746 else
5747 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5748 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5749 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5750 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5751 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5752 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5753 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5754 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5755 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5756 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5757 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5758 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5759 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5760 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5761 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5762
5763 WREG32(mmRLC_SERDES_WR_CTRL, data);
5764}
5765
dbff57bc
AD
5766#define MSG_ENTER_RLC_SAFE_MODE 1
5767#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
5768#define RLC_GPR_REG2__REQ_MASK 0x00000001
5769#define RLC_GPR_REG2__REQ__SHIFT 0
5770#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5771#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 5772
dbff57bc
AD
5773static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5774{
5775 u32 data;
5776 unsigned i;
5777
5778 data = RREG32(mmRLC_CNTL);
5779 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5780 return;
5781
5782 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5783 data |= RLC_SAFE_MODE__CMD_MASK;
5784 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5785 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5786 WREG32(mmRLC_SAFE_MODE, data);
5787
5788 for (i = 0; i < adev->usec_timeout; i++) {
5789 if ((RREG32(mmRLC_GPM_STAT) &
5790 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5791 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5792 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5793 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5794 break;
5795 udelay(1);
5796 }
5797
5798 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5799 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5800 break;
5801 udelay(1);
5802 }
5803 adev->gfx.rlc.in_safe_mode = true;
5804 }
5805}
5806
5807static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5808{
5809 u32 data = 0;
5810 unsigned i;
5811
5812 data = RREG32(mmRLC_CNTL);
5813 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5814 return;
5815
5816 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5817 if (adev->gfx.rlc.in_safe_mode) {
5818 data |= RLC_SAFE_MODE__CMD_MASK;
5819 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5820 WREG32(mmRLC_SAFE_MODE, data);
5821 adev->gfx.rlc.in_safe_mode = false;
5822 }
5823 }
5824
5825 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5826 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5827 break;
5828 udelay(1);
5829 }
5830}
5831
dbff57bc
AD
5832static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5833 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5834 .exit_safe_mode = iceland_exit_rlc_safe_mode
5835};
5836
dbff57bc
AD
5837static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5838 bool enable)
6e378858
EH
5839{
5840 uint32_t temp, data;
5841
dbff57bc
AD
5842 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5843
6e378858 5844 /* It is disabled by HW by default */
14698b6c
AD
5845 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5846 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 5847 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 5848 /* 1 - RLC memory Light sleep */
61cb8cef 5849 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 5850
61cb8cef
TSD
5851 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5852 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 5853 }
6e378858
EH
5854
5855 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5856 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
5857 if (adev->flags & AMD_IS_APU)
5858 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5859 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5860 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5861 else
5862 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5863 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5864 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5865 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
5866
5867 if (temp != data)
5868 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5869
5870 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5871 gfx_v8_0_wait_for_rlc_serdes(adev);
5872
5873 /* 5 - clear mgcg override */
79deaaf4 5874 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 5875
14698b6c
AD
5876 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5877 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5878 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5879 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5880 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5881 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5882 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5883 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5884 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5885 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5886 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5887 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5888 if (temp != data)
5889 WREG32(mmCGTS_SM_CTRL_REG, data);
5890 }
6e378858
EH
5891 udelay(50);
5892
5893 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5894 gfx_v8_0_wait_for_rlc_serdes(adev);
5895 } else {
5896 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5897 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5898 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5899 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5900 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5901 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5902 if (temp != data)
5903 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5904
5905 /* 2 - disable MGLS in RLC */
5906 data = RREG32(mmRLC_MEM_SLP_CNTL);
5907 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5908 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5909 WREG32(mmRLC_MEM_SLP_CNTL, data);
5910 }
5911
5912 /* 3 - disable MGLS in CP */
5913 data = RREG32(mmCP_MEM_SLP_CNTL);
5914 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5915 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5916 WREG32(mmCP_MEM_SLP_CNTL, data);
5917 }
5918
5919 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5920 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5921 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5922 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5923 if (temp != data)
5924 WREG32(mmCGTS_SM_CTRL_REG, data);
5925
5926 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5927 gfx_v8_0_wait_for_rlc_serdes(adev);
5928
5929 /* 6 - set mgcg override */
79deaaf4 5930 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5931
5932 udelay(50);
5933
5934 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5935 gfx_v8_0_wait_for_rlc_serdes(adev);
5936 }
dbff57bc
AD
5937
5938 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
5939}
5940
dbff57bc
AD
5941static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5942 bool enable)
6e378858
EH
5943{
5944 uint32_t temp, temp1, data, data1;
5945
5946 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5947
dbff57bc
AD
5948 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5949
14698b6c 5950 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
5951 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5952 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5953 if (temp1 != data1)
5954 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5955
dd31ae9a 5956 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
5957 gfx_v8_0_wait_for_rlc_serdes(adev);
5958
dd31ae9a 5959 /* 2 - clear cgcg override */
79deaaf4 5960 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
5961
5962 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5963 gfx_v8_0_wait_for_rlc_serdes(adev);
5964
dd31ae9a 5965 /* 3 - write cmd to set CGLS */
79deaaf4 5966 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 5967
dd31ae9a 5968 /* 4 - enable cgcg */
6e378858
EH
5969 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5970
14698b6c
AD
5971 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5972 /* enable cgls*/
5973 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 5974
14698b6c
AD
5975 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5976 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 5977
14698b6c
AD
5978 if (temp1 != data1)
5979 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5980 } else {
5981 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5982 }
6e378858
EH
5983
5984 if (temp != data)
5985 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
5986
5987 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5988 * Cmp_busy/GFX_Idle interrupts
5989 */
5990 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
5991 } else {
5992 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5993 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5994
5995 /* TEST CGCG */
5996 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5997 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5998 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5999 if (temp1 != data1)
6000 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6001
6002 /* read gfx register to wake up cgcg */
6003 RREG32(mmCB_CGTT_SCLK_CTRL);
6004 RREG32(mmCB_CGTT_SCLK_CTRL);
6005 RREG32(mmCB_CGTT_SCLK_CTRL);
6006 RREG32(mmCB_CGTT_SCLK_CTRL);
6007
6008 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6009 gfx_v8_0_wait_for_rlc_serdes(adev);
6010
6011 /* write cmd to Set CGCG Overrride */
79deaaf4 6012 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6013
6014 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6015 gfx_v8_0_wait_for_rlc_serdes(adev);
6016
6017 /* write cmd to Clear CGLS */
79deaaf4 6018 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6019
6020 /* disable cgcg, cgls should be disabled too. */
6021 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6022 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6023 if (temp != data)
6024 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
d5dc36a4
AD
6025 /* enable interrupts again for PG */
6026 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858 6027 }
dbff57bc 6028
7894745a
TSD
6029 gfx_v8_0_wait_for_rlc_serdes(adev);
6030
dbff57bc 6031 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6032}
dbff57bc
AD
6033static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6034 bool enable)
6e378858
EH
6035{
6036 if (enable) {
6037 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6038 * === MGCG + MGLS + TS(CG/LS) ===
6039 */
dbff57bc
AD
6040 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6041 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6042 } else {
6043 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6044 * === CGCG + CGLS ===
6045 */
dbff57bc
AD
6046 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6047 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6048 }
6049 return 0;
6050}
6051
a8ca3413
RZ
6052static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6053 enum amd_clockgating_state state)
6054{
8a19e7fa
RZ
6055 uint32_t msg_id, pp_state = 0;
6056 uint32_t pp_support_state = 0;
a8ca3413 6057
8a19e7fa
RZ
6058 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6059 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6060 pp_support_state = PP_STATE_SUPPORT_LS;
6061 pp_state = PP_STATE_LS;
6062 }
6063 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6064 pp_support_state |= PP_STATE_SUPPORT_CG;
6065 pp_state |= PP_STATE_CG;
6066 }
6067 if (state == AMD_CG_STATE_UNGATE)
6068 pp_state = 0;
6069
6070 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6071 PP_BLOCK_GFX_CG,
6072 pp_support_state,
6073 pp_state);
3811f8f0
RZ
6074 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6075 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6076 }
a8ca3413 6077
8a19e7fa
RZ
6078 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6079 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6080 pp_support_state = PP_STATE_SUPPORT_LS;
6081 pp_state = PP_STATE_LS;
6082 }
a8ca3413 6083
8a19e7fa
RZ
6084 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6085 pp_support_state |= PP_STATE_SUPPORT_CG;
6086 pp_state |= PP_STATE_CG;
6087 }
6088
6089 if (state == AMD_CG_STATE_UNGATE)
6090 pp_state = 0;
6091
6092 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6093 PP_BLOCK_GFX_MG,
6094 pp_support_state,
6095 pp_state);
3811f8f0
RZ
6096 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6097 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6098 }
a8ca3413
RZ
6099
6100 return 0;
6101}
6102
6103static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6104 enum amd_clockgating_state state)
6105{
8a19e7fa
RZ
6106
6107 uint32_t msg_id, pp_state = 0;
6108 uint32_t pp_support_state = 0;
a8ca3413 6109
8a19e7fa
RZ
6110 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6111 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6112 pp_support_state = PP_STATE_SUPPORT_LS;
6113 pp_state = PP_STATE_LS;
6114 }
6115 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6116 pp_support_state |= PP_STATE_SUPPORT_CG;
6117 pp_state |= PP_STATE_CG;
6118 }
6119 if (state == AMD_CG_STATE_UNGATE)
6120 pp_state = 0;
6121
6122 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6123 PP_BLOCK_GFX_CG,
6124 pp_support_state,
6125 pp_state);
3811f8f0
RZ
6126 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6127 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6128 }
a8ca3413 6129
8a19e7fa
RZ
6130 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6131 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6132 pp_support_state = PP_STATE_SUPPORT_LS;
6133 pp_state = PP_STATE_LS;
6134 }
6135 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6136 pp_support_state |= PP_STATE_SUPPORT_CG;
6137 pp_state |= PP_STATE_CG;
6138 }
6139 if (state == AMD_CG_STATE_UNGATE)
6140 pp_state = 0;
6141
6142 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6143 PP_BLOCK_GFX_3D,
6144 pp_support_state,
6145 pp_state);
3811f8f0
RZ
6146 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6147 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6148 }
a8ca3413 6149
8a19e7fa
RZ
6150 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6151 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6152 pp_support_state = PP_STATE_SUPPORT_LS;
6153 pp_state = PP_STATE_LS;
6154 }
a8ca3413 6155
8a19e7fa
RZ
6156 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6157 pp_support_state |= PP_STATE_SUPPORT_CG;
6158 pp_state |= PP_STATE_CG;
6159 }
a8ca3413 6160
8a19e7fa
RZ
6161 if (state == AMD_CG_STATE_UNGATE)
6162 pp_state = 0;
a8ca3413 6163
8a19e7fa
RZ
6164 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6165 PP_BLOCK_GFX_MG,
6166 pp_support_state,
6167 pp_state);
3811f8f0
RZ
6168 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6169 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa
RZ
6170 }
6171
6172 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6173 pp_support_state = PP_STATE_SUPPORT_LS;
6174
6175 if (state == AMD_CG_STATE_UNGATE)
6176 pp_state = 0;
6177 else
6178 pp_state = PP_STATE_LS;
6179
6180 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6181 PP_BLOCK_GFX_RLC,
6182 pp_support_state,
6183 pp_state);
3811f8f0
RZ
6184 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6185 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa
RZ
6186 }
6187
6188 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6189 pp_support_state = PP_STATE_SUPPORT_LS;
6190
6191 if (state == AMD_CG_STATE_UNGATE)
6192 pp_state = 0;
6193 else
6194 pp_state = PP_STATE_LS;
6195 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6196 PP_BLOCK_GFX_CP,
8a19e7fa 6197 pp_support_state,
a8ca3413 6198 pp_state);
3811f8f0
RZ
6199 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6200 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6201 }
a8ca3413
RZ
6202
6203 return 0;
6204}
6205
5fc3aeeb 6206static int gfx_v8_0_set_clockgating_state(void *handle,
6207 enum amd_clockgating_state state)
aaa36a97 6208{
6e378858
EH
6209 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6210
ce137c04
ML
6211 if (amdgpu_sriov_vf(adev))
6212 return 0;
6213
6e378858
EH
6214 switch (adev->asic_type) {
6215 case CHIP_FIJI:
dbff57bc
AD
6216 case CHIP_CARRIZO:
6217 case CHIP_STONEY:
6218 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6219 state == AMD_CG_STATE_GATE);
6e378858 6220 break;
a8ca3413
RZ
6221 case CHIP_TONGA:
6222 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6223 break;
6224 case CHIP_POLARIS10:
6225 case CHIP_POLARIS11:
739e9fff 6226 case CHIP_POLARIS12:
71765469 6227 case CHIP_VEGAM:
a8ca3413
RZ
6228 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6229 break;
6e378858
EH
6230 default:
6231 break;
6232 }
aaa36a97
AD
6233 return 0;
6234}
6235
536fbf94 6236static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6237{
5003f278 6238 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6239}
6240
536fbf94 6241static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6242{
6243 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6244
6245 if (ring->use_doorbell)
6246 /* XXX check if swapping is necessary on BE */
5003f278 6247 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6248 else
5003f278 6249 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6250}
6251
6252static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6253{
6254 struct amdgpu_device *adev = ring->adev;
6255
6256 if (ring->use_doorbell) {
6257 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6258 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6259 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6260 } else {
536fbf94 6261 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6262 (void)RREG32(mmCP_RB0_WPTR);
6263 }
6264}
6265
d2edb07b 6266static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6267{
6268 u32 ref_and_mask, reg_mem_engine;
6269
4e638ae9
XY
6270 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6271 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6272 switch (ring->me) {
6273 case 1:
6274 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6275 break;
6276 case 2:
6277 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6278 break;
6279 default:
6280 return;
6281 }
6282 reg_mem_engine = 0;
6283 } else {
6284 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6285 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6286 }
6287
6288 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6289 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6290 WAIT_REG_MEM_FUNCTION(3) | /* == */
6291 reg_mem_engine));
6292 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6293 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6294 amdgpu_ring_write(ring, ref_and_mask);
6295 amdgpu_ring_write(ring, ref_and_mask);
6296 amdgpu_ring_write(ring, 0x20); /* poll interval */
6297}
6298
45682886
ML
6299static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6300{
6301 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6302 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6303 EVENT_INDEX(4));
6304
6305 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6306 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6307 EVENT_INDEX(0));
6308}
6309
93323131 6310static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583 6311 struct amdgpu_ib *ib,
c4f46f22 6312 unsigned vmid, bool ctx_switch)
aaa36a97
AD
6313{
6314 u32 header, control = 0;
aaa36a97 6315
de807f81 6316 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6317 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6318 else
6319 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6320
c4f46f22 6321 control |= ib->length_dw | (vmid << 24);
aaa36a97 6322
635e7132 6323 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
2e2e3c7f
ML
6324 control |= INDIRECT_BUFFER_PRE_ENB(1);
6325
635e7132
ML
6326 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6327 gfx_v8_0_ring_emit_de_meta(ring);
6328 }
6329
aaa36a97
AD
6330 amdgpu_ring_write(ring, header);
6331 amdgpu_ring_write(ring,
6332#ifdef __BIG_ENDIAN
6333 (2 << 0) |
6334#endif
6335 (ib->gpu_addr & 0xFFFFFFFC));
6336 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6337 amdgpu_ring_write(ring, control);
6338}
6339
93323131 6340static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583 6341 struct amdgpu_ib *ib,
c4f46f22 6342 unsigned vmid, bool ctx_switch)
93323131 6343{
c4f46f22 6344 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
93323131 6345
33b7ed01 6346 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6347 amdgpu_ring_write(ring,
6348#ifdef __BIG_ENDIAN
62d2ce4b 6349 (2 << 0) |
93323131 6350#endif
62d2ce4b 6351 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6352 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6353 amdgpu_ring_write(ring, control);
6354}
6355
aaa36a97 6356static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6357 u64 seq, unsigned flags)
aaa36a97 6358{
890ee23f
CZ
6359 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6360 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6361
aaa36a97
AD
6362 /* EVENT_WRITE_EOP - flush caches, send int */
6363 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6364 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6365 EOP_TC_ACTION_EN |
f84e63f2 6366 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6367 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6368 EVENT_INDEX(5)));
6369 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6370 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6371 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6372 amdgpu_ring_write(ring, lower_32_bits(seq));
6373 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6374
aaa36a97
AD
6375}
6376
b8c7b39e 6377static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6378{
21cd942e 6379 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6380 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6381 uint64_t addr = ring->fence_drv.gpu_addr;
6382
6383 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6384 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6385 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6386 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6387 amdgpu_ring_write(ring, addr & 0xfffffffc);
6388 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6389 amdgpu_ring_write(ring, seq);
6390 amdgpu_ring_write(ring, 0xffffffff);
6391 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6392}
6393
6394static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
c633c00b 6395 unsigned vmid, uint64_t pd_addr)
b8c7b39e 6396{
21cd942e 6397 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6398
c633c00b 6399 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
aaa36a97
AD
6400
6401 /* wait for the invalidate to complete */
6402 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6403 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6404 WAIT_REG_MEM_FUNCTION(0) | /* always */
6405 WAIT_REG_MEM_ENGINE(0))); /* me */
6406 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6407 amdgpu_ring_write(ring, 0);
6408 amdgpu_ring_write(ring, 0); /* ref */
6409 amdgpu_ring_write(ring, 0); /* mask */
6410 amdgpu_ring_write(ring, 0x20); /* poll interval */
6411
6412 /* compute doesn't have PFP */
6413 if (usepfp) {
6414 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6415 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6416 amdgpu_ring_write(ring, 0x0);
aaa36a97
AD
6417 }
6418}
6419
536fbf94 6420static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6421{
6422 return ring->adev->wb.wb[ring->wptr_offs];
6423}
6424
6425static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6426{
6427 struct amdgpu_device *adev = ring->adev;
6428
6429 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6430 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6431 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6432}
6433
b8866c26
AR
6434static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6435 bool acquire)
6436{
6437 struct amdgpu_device *adev = ring->adev;
6438 int pipe_num, tmp, reg;
6439 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6440
6441 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6442
6443 /* first me only has 2 entries, GFX and HP3D */
6444 if (ring->me > 0)
6445 pipe_num -= 2;
6446
6447 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6448 tmp = RREG32(reg);
6449 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6450 WREG32(reg, tmp);
6451}
6452
6453static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6454 struct amdgpu_ring *ring,
6455 bool acquire)
6456{
6457 int i, pipe;
6458 bool reserve;
6459 struct amdgpu_ring *iring;
6460
6461 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6462 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6463 if (acquire)
6464 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6465 else
6466 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6467
6468 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6469 /* Clear all reservations - everyone reacquires all resources */
6470 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6471 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6472 true);
6473
6474 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6475 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6476 true);
6477 } else {
6478 /* Lower all pipes without a current reservation */
6479 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6480 iring = &adev->gfx.gfx_ring[i];
6481 pipe = amdgpu_gfx_queue_to_bit(adev,
6482 iring->me,
6483 iring->pipe,
6484 0);
6485 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6486 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6487 }
6488
6489 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6490 iring = &adev->gfx.compute_ring[i];
6491 pipe = amdgpu_gfx_queue_to_bit(adev,
6492 iring->me,
6493 iring->pipe,
6494 0);
6495 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6496 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6497 }
6498 }
6499
6500 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6501}
6502
6503static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6504 struct amdgpu_ring *ring,
6505 bool acquire)
6506{
6507 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6508 uint32_t queue_priority = acquire ? 0xf : 0x0;
6509
6510 mutex_lock(&adev->srbm_mutex);
6511 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6512
6513 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6514 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6515
6516 vi_srbm_select(adev, 0, 0, 0, 0);
6517 mutex_unlock(&adev->srbm_mutex);
6518}
6519static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
1b1f42d8 6520 enum drm_sched_priority priority)
b8866c26
AR
6521{
6522 struct amdgpu_device *adev = ring->adev;
1b1f42d8 6523 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
b8866c26
AR
6524
6525 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6526 return;
6527
6528 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6529 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6530}
6531
aaa36a97
AD
6532static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6533 u64 addr, u64 seq,
890ee23f 6534 unsigned flags)
aaa36a97 6535{
890ee23f
CZ
6536 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6537 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6538
aaa36a97
AD
6539 /* RELEASE_MEM - flush caches, send int */
6540 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6541 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6542 EOP_TC_ACTION_EN |
a3d5aaa8 6543 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6544 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6545 EVENT_INDEX(5)));
890ee23f 6546 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6547 amdgpu_ring_write(ring, addr & 0xfffffffc);
6548 amdgpu_ring_write(ring, upper_32_bits(addr));
6549 amdgpu_ring_write(ring, lower_32_bits(seq));
6550 amdgpu_ring_write(ring, upper_32_bits(seq));
6551}
6552
4e638ae9
XY
6553static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6554 u64 seq, unsigned int flags)
6555{
6556 /* we only allocate 32bit for each seq wb address */
f10b478d 6557 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6558
6559 /* write fence seq to the "addr" */
6560 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6561 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6562 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6563 amdgpu_ring_write(ring, lower_32_bits(addr));
6564 amdgpu_ring_write(ring, upper_32_bits(addr));
6565 amdgpu_ring_write(ring, lower_32_bits(seq));
6566
6567 if (flags & AMDGPU_FENCE_FLAG_INT) {
6568 /* set register to trigger INT */
6569 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6570 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6571 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6572 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6573 amdgpu_ring_write(ring, 0);
6574 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6575 }
6576}
6577
c2167a65
ML
6578static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6579{
6580 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6581 amdgpu_ring_write(ring, 0);
6582}
6583
753ad49c
ML
6584static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6585{
6586 uint32_t dw2 = 0;
6587
c2ce92fc 6588 if (amdgpu_sriov_vf(ring->adev))
95243543 6589 gfx_v8_0_ring_emit_ce_meta(ring);
c2ce92fc 6590
753ad49c
ML
6591 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6592 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6593 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6594 /* set load_global_config & load_global_uconfig */
6595 dw2 |= 0x8001;
6596 /* set load_cs_sh_regs */
6597 dw2 |= 0x01000000;
6598 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6599 dw2 |= 0x10002;
6600
6601 /* set load_ce_ram if preamble presented */
6602 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6603 dw2 |= 0x10000000;
6604 } else {
6605 /* still load_ce_ram if this is the first time preamble presented
6606 * although there is no context switch happens.
6607 */
6608 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6609 dw2 |= 0x10000000;
6610 }
6611
6612 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6613 amdgpu_ring_write(ring, dw2);
6614 amdgpu_ring_write(ring, 0);
6615}
6616
806ba2d4
ML
6617static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6618{
6619 unsigned ret;
6620
6621 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6622 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6623 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6624 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6625 ret = ring->wptr & ring->buf_mask;
6626 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6627 return ret;
6628}
6629
6630static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6631{
6632 unsigned cur;
6633
6634 BUG_ON(offset > ring->buf_mask);
6635 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6636
6637 cur = (ring->wptr & ring->buf_mask) - 1;
6638 if (likely(cur > offset))
6639 ring->ring[offset] = cur - offset;
6640 else
6641 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6642}
6643
880e87e3
XY
6644static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6645{
6646 struct amdgpu_device *adev = ring->adev;
6647
6648 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6649 amdgpu_ring_write(ring, 0 | /* src: register*/
6650 (5 << 8) | /* dst: memory */
6651 (1 << 20)); /* write confirm */
6652 amdgpu_ring_write(ring, reg);
6653 amdgpu_ring_write(ring, 0);
6654 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6655 adev->virt.reg_val_offs * 4));
6656 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6657 adev->virt.reg_val_offs * 4));
6658}
6659
6660static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6661 uint32_t val)
6662{
9ed88047
CK
6663 uint32_t cmd;
6664
6665 switch (ring->funcs->type) {
6666 case AMDGPU_RING_TYPE_GFX:
6667 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6668 break;
6669 case AMDGPU_RING_TYPE_KIQ:
6670 cmd = 1 << 16; /* no inc addr */
6671 break;
6672 default:
6673 cmd = WR_CONFIRM;
6674 break;
6675 }
6676
880e87e3 6677 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
9ed88047 6678 amdgpu_ring_write(ring, cmd);
880e87e3
XY
6679 amdgpu_ring_write(ring, reg);
6680 amdgpu_ring_write(ring, 0);
6681 amdgpu_ring_write(ring, val);
6682}
6683
f5d85033
CK
6684static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6685{
6686 struct amdgpu_device *adev = ring->adev;
6687 uint32_t value = 0;
6688
6689 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6690 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6691 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6692 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6693 WREG32(mmSQ_CMD, value);
6694}
6695
aaa36a97
AD
6696static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6697 enum amdgpu_interrupt_state state)
6698{
61cb8cef
TSD
6699 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6700 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6701}
6702
6703static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6704 int me, int pipe,
6705 enum amdgpu_interrupt_state state)
6706{
d0c55cdf 6707 u32 mec_int_cntl, mec_int_cntl_reg;
aaa36a97 6708
aaa36a97 6709 /*
d0c55cdf
AD
6710 * amdgpu controls only the first MEC. That's why this function only
6711 * handles the setting of interrupts for this specific MEC. All other
aaa36a97
AD
6712 * pipes' interrupts are set by amdkfd.
6713 */
6714
6715 if (me == 1) {
6716 switch (pipe) {
6717 case 0:
d0c55cdf
AD
6718 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6719 break;
6720 case 1:
6721 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6722 break;
6723 case 2:
6724 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6725 break;
6726 case 3:
6727 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
aaa36a97
AD
6728 break;
6729 default:
6730 DRM_DEBUG("invalid pipe %d\n", pipe);
6731 return;
6732 }
6733 } else {
6734 DRM_DEBUG("invalid me %d\n", me);
6735 return;
6736 }
6737
d0c55cdf
AD
6738 switch (state) {
6739 case AMDGPU_IRQ_STATE_DISABLE:
6740 mec_int_cntl = RREG32(mec_int_cntl_reg);
6741 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6742 WREG32(mec_int_cntl_reg, mec_int_cntl);
6743 break;
6744 case AMDGPU_IRQ_STATE_ENABLE:
6745 mec_int_cntl = RREG32(mec_int_cntl_reg);
6746 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6747 WREG32(mec_int_cntl_reg, mec_int_cntl);
6748 break;
6749 default:
6750 break;
6751 }
aaa36a97
AD
6752}
6753
6754static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6755 struct amdgpu_irq_src *source,
6756 unsigned type,
6757 enum amdgpu_interrupt_state state)
6758{
61cb8cef
TSD
6759 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6760 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6761
6762 return 0;
6763}
6764
6765static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6766 struct amdgpu_irq_src *source,
6767 unsigned type,
6768 enum amdgpu_interrupt_state state)
6769{
61cb8cef
TSD
6770 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6771 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6772
6773 return 0;
6774}
6775
6776static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6777 struct amdgpu_irq_src *src,
6778 unsigned type,
6779 enum amdgpu_interrupt_state state)
6780{
6781 switch (type) {
6782 case AMDGPU_CP_IRQ_GFX_EOP:
6783 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6784 break;
6785 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6786 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6787 break;
6788 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6789 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6790 break;
6791 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6792 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6793 break;
6794 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6795 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6796 break;
6797 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6798 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6799 break;
6800 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6801 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6802 break;
6803 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6804 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6805 break;
6806 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6807 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6808 break;
6809 default:
6810 break;
6811 }
6812 return 0;
6813}
6814
5a2f2913
DP
6815static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6816 struct amdgpu_irq_src *source,
6817 unsigned int type,
6818 enum amdgpu_interrupt_state state)
6819{
6820 int enable_flag;
6821
6822 switch (state) {
6823 case AMDGPU_IRQ_STATE_DISABLE:
6824 enable_flag = 0;
6825 break;
6826
6827 case AMDGPU_IRQ_STATE_ENABLE:
6828 enable_flag = 1;
6829 break;
6830
6831 default:
6832 return -EINVAL;
6833 }
6834
6835 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6836 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6837 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6838 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6839 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6840 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6841 enable_flag);
6842 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6843 enable_flag);
6844 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6845 enable_flag);
6846 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6847 enable_flag);
6848 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6849 enable_flag);
6850 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6851 enable_flag);
6852 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6853 enable_flag);
6854 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6855 enable_flag);
6856
6857 return 0;
6858}
6859
04ad26bb
DP
6860static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6861 struct amdgpu_irq_src *source,
6862 unsigned int type,
6863 enum amdgpu_interrupt_state state)
6864{
6865 int enable_flag;
6866
6867 switch (state) {
6868 case AMDGPU_IRQ_STATE_DISABLE:
6869 enable_flag = 1;
6870 break;
6871
6872 case AMDGPU_IRQ_STATE_ENABLE:
6873 enable_flag = 0;
6874 break;
6875
6876 default:
6877 return -EINVAL;
6878 }
6879
6880 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6881 enable_flag);
6882
6883 return 0;
6884}
6885
aaa36a97
AD
6886static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6887 struct amdgpu_irq_src *source,
6888 struct amdgpu_iv_entry *entry)
6889{
6890 int i;
6891 u8 me_id, pipe_id, queue_id;
6892 struct amdgpu_ring *ring;
6893
6894 DRM_DEBUG("IH: CP EOP\n");
6895 me_id = (entry->ring_id & 0x0c) >> 2;
6896 pipe_id = (entry->ring_id & 0x03) >> 0;
6897 queue_id = (entry->ring_id & 0x70) >> 4;
6898
6899 switch (me_id) {
6900 case 0:
6901 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6902 break;
6903 case 1:
6904 case 2:
6905 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6906 ring = &adev->gfx.compute_ring[i];
6907 /* Per-queue interrupt is supported for MEC starting from VI.
6908 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6909 */
6910 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6911 amdgpu_fence_process(ring);
6912 }
6913 break;
6914 }
6915 return 0;
6916}
6917
6918static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6919 struct amdgpu_irq_src *source,
6920 struct amdgpu_iv_entry *entry)
6921{
6922 DRM_ERROR("Illegal register access in command stream\n");
6923 schedule_work(&adev->reset_work);
6924 return 0;
6925}
6926
6927static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6928 struct amdgpu_irq_src *source,
6929 struct amdgpu_iv_entry *entry)
6930{
6931 DRM_ERROR("Illegal instruction in command stream\n");
6932 schedule_work(&adev->reset_work);
6933 return 0;
6934}
6935
5a2f2913
DP
6936static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6937 struct amdgpu_irq_src *source,
6938 struct amdgpu_iv_entry *entry)
6939{
04ad26bb
DP
6940 DRM_ERROR("CP EDC/ECC error detected.");
6941 return 0;
6942}
6943
9bdc2092 6944static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
04ad26bb 6945{
9bdc2092 6946 u32 enc, se_id, sh_id, cu_id;
04ad26bb 6947 char type[20];
9bdc2092 6948 int sq_edc_source = -1;
d9e222b4
AG
6949
6950 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6951 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
04ad26bb
DP
6952
6953 switch (enc) {
6954 case 0:
6955 DRM_INFO("SQ general purpose intr detected:"
6956 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6957 "host_cmd_overflow %d, cmd_timestamp %d,"
6958 "reg_timestamp %d, thread_trace_buff_full %d,"
6959 "wlt %d, thread_trace %d.\n",
6960 se_id,
d9e222b4
AG
6961 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6962 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6963 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6964 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6965 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6966 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6967 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6968 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
04ad26bb
DP
6969 );
6970 break;
6971 case 1:
6972 case 2:
6973
9bdc2092
AG
6974 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6975 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6976
6977 /*
6978 * This function can be called either directly from ISR
6979 * or from BH in which case we can access SQ_EDC_INFO
6980 * instance
6981 */
6982 if (in_task()) {
6983 mutex_lock(&adev->grbm_idx_mutex);
6984 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6985
6986 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6987
6988 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6989 mutex_unlock(&adev->grbm_idx_mutex);
6990 }
6991
04ad26bb
DP
6992 if (enc == 1)
6993 sprintf(type, "instruction intr");
6994 else
6995 sprintf(type, "EDC/ECC error");
6996
6997 DRM_INFO(
6998 "SQ %s detected: "
9bdc2092
AG
6999 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
7000 "trap %s, sq_ed_info.source %s.\n",
7001 type, se_id, sh_id, cu_id,
d9e222b4
AG
7002 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
7003 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
7004 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
7005 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
9bdc2092
AG
7006 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
7007 );
04ad26bb
DP
7008 break;
7009 default:
7010 DRM_ERROR("SQ invalid encoding type\n.");
9bdc2092
AG
7011 }
7012}
7013
7014static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7015{
7016
7017 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7018 struct sq_work *sq_work = container_of(work, struct sq_work, work);
7019
7020 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7021}
7022
7023static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7024 struct amdgpu_irq_src *source,
7025 struct amdgpu_iv_entry *entry)
7026{
7027 unsigned ih_data = entry->src_data[0];
7028
7029 /*
7030 * Try to submit work so SQ_EDC_INFO can be accessed from
7031 * BH. If previous work submission hasn't finished yet
7032 * just print whatever info is possible directly from the ISR.
7033 */
7034 if (work_pending(&adev->gfx.sq_work.work)) {
7035 gfx_v8_0_parse_sq_irq(adev, ih_data);
7036 } else {
7037 adev->gfx.sq_work.ih_data = ih_data;
7038 schedule_work(&adev->gfx.sq_work.work);
04ad26bb
DP
7039 }
7040
5a2f2913
DP
7041 return 0;
7042}
7043
4e638ae9
XY
7044static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7045 struct amdgpu_irq_src *src,
7046 unsigned int type,
7047 enum amdgpu_interrupt_state state)
7048{
07c397f9 7049 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 7050
4e638ae9
XY
7051 switch (type) {
7052 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
ccaf3574
TSD
7053 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7054 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7055 if (ring->me == 1)
7056 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7057 ring->pipe,
7058 GENERIC2_INT_ENABLE,
7059 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7060 else
7061 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7062 ring->pipe,
7063 GENERIC2_INT_ENABLE,
7064 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
4e638ae9
XY
7065 break;
7066 default:
7067 BUG(); /* kiq only support GENERIC2_INT now */
7068 break;
7069 }
7070 return 0;
7071}
7072
7073static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7074 struct amdgpu_irq_src *source,
7075 struct amdgpu_iv_entry *entry)
7076{
7077 u8 me_id, pipe_id, queue_id;
07c397f9 7078 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 7079
4e638ae9
XY
7080 me_id = (entry->ring_id & 0x0c) >> 2;
7081 pipe_id = (entry->ring_id & 0x03) >> 0;
7082 queue_id = (entry->ring_id & 0x70) >> 4;
7083 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7084 me_id, pipe_id, queue_id);
7085
7086 amdgpu_fence_process(ring);
7087 return 0;
7088}
7089
a1255107 7090static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 7091 .name = "gfx_v8_0",
aaa36a97 7092 .early_init = gfx_v8_0_early_init,
ccba7691 7093 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
7094 .sw_init = gfx_v8_0_sw_init,
7095 .sw_fini = gfx_v8_0_sw_fini,
7096 .hw_init = gfx_v8_0_hw_init,
7097 .hw_fini = gfx_v8_0_hw_fini,
7098 .suspend = gfx_v8_0_suspend,
7099 .resume = gfx_v8_0_resume,
7100 .is_idle = gfx_v8_0_is_idle,
7101 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 7102 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 7103 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 7104 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 7105 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
7106 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7107 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 7108 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
7109};
7110
7111static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 7112 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
7113 .align_mask = 0xff,
7114 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7115 .support_64bit_ptrs = false,
e7706b42 7116 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7117 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7118 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e9d672b2
ML
7119 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7120 5 + /* COND_EXEC */
7121 7 + /* PIPELINE_SYNC */
5518625d 7122 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
e9d672b2
ML
7123 8 + /* FENCE for VM_FLUSH */
7124 20 + /* GDS switch */
7125 4 + /* double SWITCH_BUFFER,
7126 the first COND_EXEC jump to the place just
7127 prior to this double SWITCH_BUFFER */
7128 5 + /* COND_EXEC */
7129 7 + /* HDP_flush */
7130 4 + /* VGT_flush */
7131 14 + /* CE_META */
7132 31 + /* DE_META */
7133 3 + /* CNTX_CTRL */
7134 5 + /* HDP_INVL */
7135 8 + 8 + /* FENCE x2 */
7136 2, /* SWITCH_BUFFER */
e12f3d7a 7137 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 7138 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 7139 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 7140 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7141 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7142 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 7143 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
7144 .test_ring = gfx_v8_0_ring_test_ring,
7145 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7146 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7147 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 7148 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 7149 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
806ba2d4
ML
7150 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7151 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
9ed88047 7152 .emit_wreg = gfx_v8_0_ring_emit_wreg,
f5d85033 7153 .soft_recovery = gfx_v8_0_ring_soft_recovery,
aaa36a97
AD
7154};
7155
7156static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 7157 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
7158 .align_mask = 0xff,
7159 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7160 .support_64bit_ptrs = false,
e7706b42 7161 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7162 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7163 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
7164 .emit_frame_size =
7165 20 + /* gfx_v8_0_ring_emit_gds_switch */
7166 7 + /* gfx_v8_0_ring_emit_hdp_flush */
2ee150cd 7167 5 + /* hdp_invalidate */
e12f3d7a 7168 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
5518625d 7169 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
e12f3d7a
CK
7170 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7171 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 7172 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 7173 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 7174 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7175 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7176 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 7177 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
7178 .test_ring = gfx_v8_0_ring_test_ring,
7179 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7180 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7181 .pad_ib = amdgpu_ring_generic_pad_ib,
b8866c26 7182 .set_priority = gfx_v8_0_ring_set_priority_compute,
9ed88047 7183 .emit_wreg = gfx_v8_0_ring_emit_wreg,
aaa36a97
AD
7184};
7185
4e638ae9
XY
7186static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7187 .type = AMDGPU_RING_TYPE_KIQ,
7188 .align_mask = 0xff,
7189 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7190 .support_64bit_ptrs = false,
4e638ae9
XY
7191 .get_rptr = gfx_v8_0_ring_get_rptr,
7192 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7193 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7194 .emit_frame_size =
7195 20 + /* gfx_v8_0_ring_emit_gds_switch */
7196 7 + /* gfx_v8_0_ring_emit_hdp_flush */
2ee150cd 7197 5 + /* hdp_invalidate */
4e638ae9
XY
7198 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7199 17 + /* gfx_v8_0_ring_emit_vm_flush */
7200 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7201 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7202 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7203 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
4e638ae9
XY
7204 .test_ring = gfx_v8_0_ring_test_ring,
7205 .test_ib = gfx_v8_0_ring_test_ib,
7206 .insert_nop = amdgpu_ring_insert_nop,
7207 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
7208 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7209 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7210};
7211
aaa36a97
AD
7212static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7213{
7214 int i;
7215
4e638ae9
XY
7216 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7217
aaa36a97
AD
7218 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7219 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7220
7221 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7222 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7223}
7224
7225static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7226 .set = gfx_v8_0_set_eop_interrupt_state,
7227 .process = gfx_v8_0_eop_irq,
7228};
7229
7230static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7231 .set = gfx_v8_0_set_priv_reg_fault_state,
7232 .process = gfx_v8_0_priv_reg_irq,
7233};
7234
7235static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7236 .set = gfx_v8_0_set_priv_inst_fault_state,
7237 .process = gfx_v8_0_priv_inst_irq,
7238};
7239
4e638ae9
XY
7240static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7241 .set = gfx_v8_0_kiq_set_interrupt_state,
7242 .process = gfx_v8_0_kiq_irq,
7243};
7244
5a2f2913
DP
7245static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7246 .set = gfx_v8_0_set_cp_ecc_int_state,
7247 .process = gfx_v8_0_cp_ecc_error_irq,
7248};
7249
04ad26bb
DP
7250static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7251 .set = gfx_v8_0_set_sq_int_state,
7252 .process = gfx_v8_0_sq_irq,
7253};
7254
aaa36a97
AD
7255static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7256{
7257 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7258 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7259
7260 adev->gfx.priv_reg_irq.num_types = 1;
7261 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7262
7263 adev->gfx.priv_inst_irq.num_types = 1;
7264 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7265
7266 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7267 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
5a2f2913
DP
7268
7269 adev->gfx.cp_ecc_error_irq.num_types = 1;
7270 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
04ad26bb
DP
7271
7272 adev->gfx.sq_irq.num_types = 1;
7273 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
aaa36a97
AD
7274}
7275
dbff57bc
AD
7276static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7277{
ae6a58e4 7278 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7279}
7280
aaa36a97
AD
7281static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7282{
7283 /* init asci gds info */
7284 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7285 adev->gds.gws.total_size = 64;
7286 adev->gds.oa.total_size = 16;
7287
7288 if (adev->gds.mem.total_size == 64 * 1024) {
7289 adev->gds.mem.gfx_partition_size = 4096;
7290 adev->gds.mem.cs_partition_size = 4096;
7291
7292 adev->gds.gws.gfx_partition_size = 4;
7293 adev->gds.gws.cs_partition_size = 4;
7294
7295 adev->gds.oa.gfx_partition_size = 4;
7296 adev->gds.oa.cs_partition_size = 1;
7297 } else {
7298 adev->gds.mem.gfx_partition_size = 1024;
7299 adev->gds.mem.cs_partition_size = 1024;
7300
7301 adev->gds.gws.gfx_partition_size = 16;
7302 adev->gds.gws.cs_partition_size = 16;
7303
7304 adev->gds.oa.gfx_partition_size = 4;
7305 adev->gds.oa.cs_partition_size = 4;
7306 }
7307}
7308
9de06de8
NH
7309static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7310 u32 bitmap)
7311{
7312 u32 data;
7313
7314 if (!bitmap)
7315 return;
7316
7317 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7318 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7319
7320 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7321}
7322
8f8e00c1 7323static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7324{
8f8e00c1 7325 u32 data, mask;
aaa36a97 7326
5003f278
TSD
7327 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7328 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7329
378506a7 7330 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7331
5003f278 7332 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7333}
7334
7dae69a2 7335static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7336{
7337 int i, j, k, counter, active_cu_number = 0;
7338 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7339 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7340 unsigned disable_masks[4 * 2];
fe723cd3 7341 u32 ao_cu_num;
aaa36a97 7342
6157bd7a
FC
7343 memset(cu_info, 0, sizeof(*cu_info));
7344
fe723cd3
RZ
7345 if (adev->flags & AMD_IS_APU)
7346 ao_cu_num = 2;
7347 else
7348 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7349
9de06de8
NH
7350 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7351
aaa36a97
AD
7352 mutex_lock(&adev->grbm_idx_mutex);
7353 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7354 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7355 mask = 1;
7356 ao_bitmap = 0;
7357 counter = 0;
9559ef5b 7358 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7359 if (i < 4 && j < 2)
7360 gfx_v8_0_set_user_cu_inactive_bitmap(
7361 adev, disable_masks[i * 2 + j]);
8f8e00c1 7362 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7363 cu_info->bitmap[i][j] = bitmap;
7364
fe723cd3 7365 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
aaa36a97 7366 if (bitmap & mask) {
fe723cd3 7367 if (counter < ao_cu_num)
aaa36a97
AD
7368 ao_bitmap |= mask;
7369 counter ++;
7370 }
7371 mask <<= 1;
7372 }
7373 active_cu_number += counter;
dbfe85ea
FC
7374 if (i < 2 && j < 2)
7375 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7376 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
aaa36a97
AD
7377 }
7378 }
9559ef5b 7379 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7380 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7381
7382 cu_info->number = active_cu_number;
7383 cu_info->ao_cu_mask = ao_cu_mask;
ebdebf42
FC
7384 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7385 cu_info->max_waves_per_simd = 10;
7386 cu_info->max_scratch_slots_per_cu = 32;
7387 cu_info->wave_front_size = 64;
7388 cu_info->lds_size = 64;
aaa36a97 7389}
a1255107
AD
7390
7391const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7392{
7393 .type = AMD_IP_BLOCK_TYPE_GFX,
7394 .major = 8,
7395 .minor = 0,
7396 .rev = 0,
7397 .funcs = &gfx_v8_0_ip_funcs,
7398};
7399
7400const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7401{
7402 .type = AMD_IP_BLOCK_TYPE_GFX,
7403 .major = 8,
7404 .minor = 1,
7405 .rev = 0,
7406 .funcs = &gfx_v8_0_ip_funcs,
7407};
acad2b2a 7408
95243543 7409static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
acad2b2a
ML
7410{
7411 uint64_t ce_payload_addr;
7412 int cnt_ce;
d81a2209 7413 union {
49abb980
XY
7414 struct vi_ce_ib_state regular;
7415 struct vi_ce_ib_state_chained_ib chained;
e8411302 7416 } ce_payload = {};
acad2b2a
ML
7417
7418 if (ring->adev->virt.chained_ib_support) {
6f05c4e9 7419 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
97745f68 7420 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7421 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7422 } else {
6f05c4e9 7423 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
97745f68 7424 offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7425 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7426 }
7427
7428 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7429 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7430 WRITE_DATA_DST_SEL(8) |
7431 WR_CONFIRM) |
7432 WRITE_DATA_CACHE_POLICY(0));
7433 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7434 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7435 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7436}
7437
95243543 7438static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
acad2b2a 7439{
95243543 7440 uint64_t de_payload_addr, gds_addr, csa_addr;
acad2b2a 7441 int cnt_de;
d81a2209 7442 union {
49abb980
XY
7443 struct vi_de_ib_state regular;
7444 struct vi_de_ib_state_chained_ib chained;
e8411302 7445 } de_payload = {};
acad2b2a 7446
6f05c4e9 7447 csa_addr = amdgpu_csa_vaddr(ring->adev);
acad2b2a
ML
7448 gds_addr = csa_addr + 4096;
7449 if (ring->adev->virt.chained_ib_support) {
7450 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7451 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7452 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7453 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7454 } else {
7455 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7456 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7457 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7458 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7459 }
7460
7461 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7462 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7463 WRITE_DATA_DST_SEL(8) |
7464 WR_CONFIRM) |
7465 WRITE_DATA_CACHE_POLICY(0));
7466 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7467 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7468 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7469}