]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: drop VMID per ring tracking
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
aeab2032 28#include "vi_structs.h"
aaa36a97
AD
29#include "vid.h"
30#include "amdgpu_ucode.h"
68182d90 31#include "amdgpu_atombios.h"
eeade25a 32#include "atombios_i2c.h"
aaa36a97
AD
33#include "clearstate_vi.h"
34
35#include "gmc/gmc_8_2_d.h"
36#include "gmc/gmc_8_2_sh_mask.h"
37
38#include "oss/oss_3_0_d.h"
39#include "oss/oss_3_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
aaa36a97
AD
54#define GFX8_NUM_GFX_RINGS 1
55#define GFX8_NUM_COMPUTE_RINGS 8
56
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
6e378858
EH
72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79/* BPM SERDES CMD */
80#define SET_BPM_SERDES_CMD 1
81#define CLE_BPM_SERDES_CMD 0
82
83/* BPM Register Address*/
84enum {
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 BPM_REG_FGCG_MAX
91};
92
2b6cd977
EH
93#define RLC_FormatDirectRegListLength 14
94
c65444fe
JZ
95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
e3c7656c
SL
102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
c65444fe
JZ
108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 120
af15a2d5
DZ
121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
2cc0c0b5
FC
128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 134
2cc0c0b5
FC
135MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 141
c4642a47
JZ
142MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
aaa36a97
AD
149static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150{
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167};
168
169static const u32 golden_settings_tonga_a11[] =
170{
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
187};
188
189static const u32 tonga_golden_common_all[] =
190{
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199};
200
201static const u32 tonga_mgcg_cgcg_init[] =
202{
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278};
279
2cc0c0b5 280static const u32 golden_settings_polaris11_a11[] =
68182d90 281{
9761bc53
HR
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
299};
300
2cc0c0b5 301static const u32 polaris11_golden_common_all[] =
68182d90
FC
302{
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309};
310
2cc0c0b5 311static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
312{
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
330};
331
2cc0c0b5 332static const u32 polaris10_golden_common_all[] =
68182d90
FC
333{
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342};
343
af15a2d5
DZ
344static const u32 fiji_golden_common_all[] =
345{
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
356};
357
358static const u32 golden_settings_fiji_a10[] =
359{
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
371};
372
373static const u32 fiji_mgcg_cgcg_init[] =
374{
a7ca8ef9 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410};
411
aaa36a97
AD
412static const u32 golden_settings_iceland_a11[] =
413{
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430};
431
432static const u32 iceland_golden_common_all[] =
433{
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442};
443
444static const u32 iceland_mgcg_cgcg_init[] =
445{
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510};
511
512static const u32 cz_golden_settings_a11[] =
513{
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526};
527
528static const u32 cz_golden_common_all[] =
529{
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538};
539
540static const u32 cz_mgcg_cgcg_init[] =
541{
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617};
618
e3c7656c
SL
619static const u32 stoney_golden_settings_a11[] =
620{
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631};
632
633static const u32 stoney_golden_common_all[] =
634{
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643};
644
645static const u32 stoney_mgcg_cgcg_init[] =
646{
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
652};
653
aaa36a97
AD
654static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
acad2b2a
ML
660static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
0875a242
AD
662static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
aaa36a97
AD
664
665static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666{
667 switch (adev->asic_type) {
668 case CHIP_TOPAZ:
669 amdgpu_program_register_sequence(adev,
670 iceland_mgcg_cgcg_init,
671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 golden_settings_iceland_a11,
674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 amdgpu_program_register_sequence(adev,
676 iceland_golden_common_all,
677 (const u32)ARRAY_SIZE(iceland_golden_common_all));
678 break;
af15a2d5
DZ
679 case CHIP_FIJI:
680 amdgpu_program_register_sequence(adev,
681 fiji_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_fiji_a10,
685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 amdgpu_program_register_sequence(adev,
687 fiji_golden_common_all,
688 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 break;
690
aaa36a97
AD
691 case CHIP_TONGA:
692 amdgpu_program_register_sequence(adev,
693 tonga_mgcg_cgcg_init,
694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 amdgpu_program_register_sequence(adev,
696 golden_settings_tonga_a11,
697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 amdgpu_program_register_sequence(adev,
699 tonga_golden_common_all,
700 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 break;
2cc0c0b5 702 case CHIP_POLARIS11:
c4642a47 703 case CHIP_POLARIS12:
68182d90 704 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
705 golden_settings_polaris11_a11,
706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 707 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
708 polaris11_golden_common_all,
709 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 710 break;
2cc0c0b5 711 case CHIP_POLARIS10:
68182d90 712 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
713 golden_settings_polaris10_a11,
714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 715 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
716 polaris10_golden_common_all,
717 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
719 if (adev->pdev->revision == 0xc7 &&
720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 }
68182d90 726 break;
aaa36a97
AD
727 case CHIP_CARRIZO:
728 amdgpu_program_register_sequence(adev,
729 cz_mgcg_cgcg_init,
730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_settings_a11,
733 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 amdgpu_program_register_sequence(adev,
735 cz_golden_common_all,
736 (const u32)ARRAY_SIZE(cz_golden_common_all));
737 break;
e3c7656c
SL
738 case CHIP_STONEY:
739 amdgpu_program_register_sequence(adev,
740 stoney_mgcg_cgcg_init,
741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_settings_a11,
744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 amdgpu_program_register_sequence(adev,
746 stoney_golden_common_all,
747 (const u32)ARRAY_SIZE(stoney_golden_common_all));
748 break;
aaa36a97
AD
749 default:
750 break;
751 }
752}
753
754static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755{
aaa36a97
AD
756 adev->gfx.scratch.num_reg = 7;
757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
759}
760
761static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762{
763 struct amdgpu_device *adev = ring->adev;
764 uint32_t scratch;
765 uint32_t tmp = 0;
766 unsigned i;
767 int r;
768
769 r = amdgpu_gfx_scratch_get(adev, &scratch);
770 if (r) {
771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772 return r;
773 }
774 WREG32(scratch, 0xCAFEDEAD);
a27de35c 775 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
776 if (r) {
777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778 ring->idx, r);
779 amdgpu_gfx_scratch_free(adev, scratch);
780 return r;
781 }
782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 785 amdgpu_ring_commit(ring);
aaa36a97
AD
786
787 for (i = 0; i < adev->usec_timeout; i++) {
788 tmp = RREG32(scratch);
789 if (tmp == 0xDEADBEEF)
790 break;
791 DRM_UDELAY(1);
792 }
793 if (i < adev->usec_timeout) {
794 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795 ring->idx, i);
796 } else {
797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 ring->idx, scratch, tmp);
799 r = -EINVAL;
800 }
801 amdgpu_gfx_scratch_free(adev, scratch);
802 return r;
803}
804
bbec97aa 805static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
806{
807 struct amdgpu_device *adev = ring->adev;
808 struct amdgpu_ib ib;
f54d1867 809 struct dma_fence *f = NULL;
aaa36a97
AD
810 uint32_t scratch;
811 uint32_t tmp = 0;
bbec97aa 812 long r;
aaa36a97
AD
813
814 r = amdgpu_gfx_scratch_get(adev, &scratch);
815 if (r) {
bbec97aa 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
817 return r;
818 }
819 WREG32(scratch, 0xCAFEDEAD);
b203dd95 820 memset(&ib, 0, sizeof(ib));
b07c60c0 821 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 822 if (r) {
bbec97aa 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 824 goto err1;
aaa36a97
AD
825 }
826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 ib.ptr[2] = 0xDEADBEEF;
829 ib.length_dw = 3;
42d13693 830
50ddc75e 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
832 if (r)
833 goto err2;
834
f54d1867 835 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
836 if (r == 0) {
837 DRM_ERROR("amdgpu: IB test timed out.\n");
838 r = -ETIMEDOUT;
839 goto err2;
840 } else if (r < 0) {
841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 842 goto err2;
aaa36a97 843 }
6d44565d
CK
844 tmp = RREG32(scratch);
845 if (tmp == 0xDEADBEEF) {
846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 847 r = 0;
aaa36a97
AD
848 } else {
849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850 scratch, tmp);
851 r = -EINVAL;
852 }
42d13693 853err2:
cc55c45d 854 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 855 dma_fence_put(f);
42d13693
CZ
856err1:
857 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
858 return r;
859}
860
13331ac3
ML
861
862static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 if ((adev->asic_type != CHIP_STONEY) &&
874 (adev->asic_type != CHIP_TOPAZ))
875 release_firmware(adev->gfx.mec2_fw);
876 adev->gfx.mec2_fw = NULL;
877
878 kfree(adev->gfx.rlc.register_list_format);
879}
880
aaa36a97
AD
881static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882{
883 const char *chip_name;
884 char fw_name[30];
885 int err;
886 struct amdgpu_firmware_info *info = NULL;
887 const struct common_firmware_header *header = NULL;
595fd013 888 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
889 const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 unsigned int *tmp = NULL, i;
aaa36a97
AD
891
892 DRM_DEBUG("\n");
893
894 switch (adev->asic_type) {
895 case CHIP_TOPAZ:
896 chip_name = "topaz";
897 break;
898 case CHIP_TONGA:
899 chip_name = "tonga";
900 break;
901 case CHIP_CARRIZO:
902 chip_name = "carrizo";
903 break;
af15a2d5
DZ
904 case CHIP_FIJI:
905 chip_name = "fiji";
906 break;
2cc0c0b5
FC
907 case CHIP_POLARIS11:
908 chip_name = "polaris11";
68182d90 909 break;
2cc0c0b5
FC
910 case CHIP_POLARIS10:
911 chip_name = "polaris10";
68182d90 912 break;
c4642a47
JZ
913 case CHIP_POLARIS12:
914 chip_name = "polaris12";
915 break;
e3c7656c
SL
916 case CHIP_STONEY:
917 chip_name = "stoney";
918 break;
aaa36a97
AD
919 default:
920 BUG();
921 }
922
c65444fe 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 if (err)
926 goto out;
927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 if (err)
929 goto out;
595fd013
JZ
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 933
c65444fe 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 if (err)
937 goto out;
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 if (err)
940 goto out;
595fd013
JZ
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d
ML
943
944 /* chain ib ucode isn't formal released, just disable it by far
945 * TODO: when ucod ready we should use ucode version to judge if
946 * chain-ib support or not.
947 */
948 adev->virt.chained_ib_support = false;
949
595fd013 950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 951
c65444fe 952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954 if (err)
955 goto out;
956 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957 if (err)
958 goto out;
595fd013
JZ
959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 962
c65444fe 963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965 if (err)
966 goto out;
967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972 adev->gfx.rlc.save_and_restore_offset =
973 le32_to_cpu(rlc_hdr->save_and_restore_offset);
974 adev->gfx.rlc.clear_state_descriptor_offset =
975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976 adev->gfx.rlc.avail_scratch_ram_locations =
977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978 adev->gfx.rlc.reg_restore_list_size =
979 le32_to_cpu(rlc_hdr->reg_restore_list_size);
980 adev->gfx.rlc.reg_list_format_start =
981 le32_to_cpu(rlc_hdr->reg_list_format_start);
982 adev->gfx.rlc.reg_list_format_separate_start =
983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984 adev->gfx.rlc.starting_offsets_start =
985 le32_to_cpu(rlc_hdr->starting_offsets_start);
986 adev->gfx.rlc.reg_list_format_size_bytes =
987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988 adev->gfx.rlc.reg_list_size_bytes =
989 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991 adev->gfx.rlc.register_list_format =
992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995 if (!adev->gfx.rlc.register_list_format) {
996 err = -ENOMEM;
997 goto out;
998 }
999
ae17c999 1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
ae17c999 1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1011
c65444fe 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014 if (err)
1015 goto out;
1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017 if (err)
1018 goto out;
595fd013
JZ
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1022
97dde76a
AD
1023 if ((adev->asic_type != CHIP_STONEY) &&
1024 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027 if (!err) {
1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029 if (err)
1030 goto out;
1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032 adev->gfx.mec2_fw->data;
1033 adev->gfx.mec2_fw_version =
1034 le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.mec2_feature_version =
1036 le32_to_cpu(cp_hdr->ucode_feature_version);
1037 } else {
1038 err = 0;
1039 adev->gfx.mec2_fw = NULL;
1040 }
aaa36a97
AD
1041 }
1042
e635ee07 1043 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
aaa36a97
AD
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046 info->fw = adev->gfx.pfp_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053 info->fw = adev->gfx.me_fw;
1054 header = (const struct common_firmware_header *)info->fw->data;
1055 adev->firmware.fw_size +=
1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060 info->fw = adev->gfx.ce_fw;
1061 header = (const struct common_firmware_header *)info->fw->data;
1062 adev->firmware.fw_size +=
1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067 info->fw = adev->gfx.rlc_fw;
1068 header = (const struct common_firmware_header *)info->fw->data;
1069 adev->firmware.fw_size +=
1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074 info->fw = adev->gfx.mec_fw;
1075 header = (const struct common_firmware_header *)info->fw->data;
1076 adev->firmware.fw_size +=
1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
4c2b2453
ML
1079 /* we need account JT in */
1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081 adev->firmware.fw_size +=
1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
bed5712e
ML
1084 if (amdgpu_sriov_vf(adev)) {
1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087 info->fw = adev->gfx.mec_fw;
1088 adev->firmware.fw_size +=
1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090 }
1091
aaa36a97
AD
1092 if (adev->gfx.mec2_fw) {
1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095 info->fw = adev->gfx.mec2_fw;
1096 header = (const struct common_firmware_header *)info->fw->data;
1097 adev->firmware.fw_size +=
1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099 }
1100
1101 }
1102
1103out:
1104 if (err) {
1105 dev_err(adev->dev,
1106 "gfx8: Failed to load firmware \"%s\"\n",
1107 fw_name);
1108 release_firmware(adev->gfx.pfp_fw);
1109 adev->gfx.pfp_fw = NULL;
1110 release_firmware(adev->gfx.me_fw);
1111 adev->gfx.me_fw = NULL;
1112 release_firmware(adev->gfx.ce_fw);
1113 adev->gfx.ce_fw = NULL;
1114 release_firmware(adev->gfx.rlc_fw);
1115 adev->gfx.rlc_fw = NULL;
1116 release_firmware(adev->gfx.mec_fw);
1117 adev->gfx.mec_fw = NULL;
1118 release_firmware(adev->gfx.mec2_fw);
1119 adev->gfx.mec2_fw = NULL;
1120 }
1121 return err;
1122}
1123
2b6cd977
EH
1124static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125 volatile u32 *buffer)
1126{
1127 u32 count = 0, i;
1128 const struct cs_section_def *sect = NULL;
1129 const struct cs_extent_def *ext = NULL;
1130
1131 if (adev->gfx.rlc.cs_data == NULL)
1132 return;
1133 if (buffer == NULL)
1134 return;
1135
1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140 buffer[count++] = cpu_to_le32(0x80000000);
1141 buffer[count++] = cpu_to_le32(0x80000000);
1142
1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145 if (sect->id == SECT_CONTEXT) {
1146 buffer[count++] =
1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148 buffer[count++] = cpu_to_le32(ext->reg_index -
1149 PACKET3_SET_CONTEXT_REG_START);
1150 for (i = 0; i < ext->reg_count; i++)
1151 buffer[count++] = cpu_to_le32(ext->extent[i]);
1152 } else {
1153 return;
1154 }
1155 }
1156 }
1157
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1163
1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168 buffer[count++] = cpu_to_le32(0);
1169}
1170
fb16007b
AD
1171static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172{
1173 const __le32 *fw_data;
1174 volatile u32 *dst_ptr;
1175 int me, i, max_me = 4;
1176 u32 bo_offset = 0;
1177 u32 table_offset, table_size;
1178
1179 if (adev->asic_type == CHIP_CARRIZO)
1180 max_me = 5;
1181
1182 /* write the cp table buffer */
1183 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184 for (me = 0; me < max_me; me++) {
1185 if (me == 0) {
1186 const struct gfx_firmware_header_v1_0 *hdr =
1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188 fw_data = (const __le32 *)
1189 (adev->gfx.ce_fw->data +
1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191 table_offset = le32_to_cpu(hdr->jt_offset);
1192 table_size = le32_to_cpu(hdr->jt_size);
1193 } else if (me == 1) {
1194 const struct gfx_firmware_header_v1_0 *hdr =
1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196 fw_data = (const __le32 *)
1197 (adev->gfx.pfp_fw->data +
1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199 table_offset = le32_to_cpu(hdr->jt_offset);
1200 table_size = le32_to_cpu(hdr->jt_size);
1201 } else if (me == 2) {
1202 const struct gfx_firmware_header_v1_0 *hdr =
1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204 fw_data = (const __le32 *)
1205 (adev->gfx.me_fw->data +
1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207 table_offset = le32_to_cpu(hdr->jt_offset);
1208 table_size = le32_to_cpu(hdr->jt_size);
1209 } else if (me == 3) {
1210 const struct gfx_firmware_header_v1_0 *hdr =
1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 fw_data = (const __le32 *)
1213 (adev->gfx.mec_fw->data +
1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215 table_offset = le32_to_cpu(hdr->jt_offset);
1216 table_size = le32_to_cpu(hdr->jt_size);
1217 } else if (me == 4) {
1218 const struct gfx_firmware_header_v1_0 *hdr =
1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220 fw_data = (const __le32 *)
1221 (adev->gfx.mec2_fw->data +
1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223 table_offset = le32_to_cpu(hdr->jt_offset);
1224 table_size = le32_to_cpu(hdr->jt_size);
1225 }
1226
1227 for (i = 0; i < table_size; i ++) {
1228 dst_ptr[bo_offset + i] =
1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230 }
1231
1232 bo_offset += table_size;
1233 }
1234}
1235
2b6cd977
EH
1236static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237{
1238 int r;
1239
1240 /* clear state block */
1241 if (adev->gfx.rlc.clear_state_obj) {
1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243 if (unlikely(r != 0))
62d2ce4b 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
2b6cd977
EH
1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2b6cd977
EH
1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248 adev->gfx.rlc.clear_state_obj = NULL;
1249 }
fb16007b
AD
1250
1251 /* jump table block */
1252 if (adev->gfx.rlc.cp_table_obj) {
1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254 if (unlikely(r != 0))
1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259 adev->gfx.rlc.cp_table_obj = NULL;
1260 }
2b6cd977
EH
1261}
1262
1263static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264{
1265 volatile u32 *dst_ptr;
1266 u32 dws;
1267 const struct cs_section_def *cs_data;
1268 int r;
1269
1270 adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272 cs_data = adev->gfx.rlc.cs_data;
1273
1274 if (cs_data) {
1275 /* clear state block */
1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
2b6cd977
EH
1283 NULL, NULL,
1284 &adev->gfx.rlc.clear_state_obj);
1285 if (r) {
1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287 gfx_v8_0_rlc_fini(adev);
1288 return r;
1289 }
1290 }
1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292 if (unlikely(r != 0)) {
1293 gfx_v8_0_rlc_fini(adev);
1294 return r;
1295 }
1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297 &adev->gfx.rlc.clear_state_gpu_addr);
1298 if (r) {
1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
62d2ce4b 1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
2b6cd977
EH
1301 gfx_v8_0_rlc_fini(adev);
1302 return r;
1303 }
1304
1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306 if (r) {
62d2ce4b 1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
2b6cd977
EH
1308 gfx_v8_0_rlc_fini(adev);
1309 return r;
1310 }
1311 /* set up the cs buffer */
1312 dst_ptr = adev->gfx.rlc.cs_ptr;
1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316 }
1317
fb16007b
AD
1318 if ((adev->asic_type == CHIP_CARRIZO) ||
1319 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1321 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
fb16007b
AD
1326 NULL, NULL,
1327 &adev->gfx.rlc.cp_table_obj);
1328 if (r) {
1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330 return r;
1331 }
1332 }
1333
1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335 if (unlikely(r != 0)) {
1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337 return r;
1338 }
1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340 &adev->gfx.rlc.cp_table_gpu_addr);
1341 if (r) {
1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
62d2ce4b 1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
fb16007b
AD
1344 return r;
1345 }
1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347 if (r) {
1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349 return r;
1350 }
1351
1352 cz_init_cp_jump_table(adev);
1353
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1356 }
1357
2b6cd977
EH
1358 return 0;
1359}
1360
aaa36a97
AD
1361static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362{
1363 int r;
1364
1365 if (adev->gfx.mec.hpd_eop_obj) {
1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367 if (unlikely(r != 0))
1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
aaa36a97
AD
1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372 adev->gfx.mec.hpd_eop_obj = NULL;
1373 }
1374}
1375
4e638ae9
XY
1376static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377 struct amdgpu_ring *ring,
1378 struct amdgpu_irq_src *irq)
1379{
34534610 1380 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4e638ae9
XY
1381 int r = 0;
1382
bffa2280
ML
1383 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1384 if (r)
1385 return r;
880e87e3 1386
4e638ae9
XY
1387 ring->adev = NULL;
1388 ring->ring_obj = NULL;
1389 ring->use_doorbell = true;
1390 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1391 if (adev->gfx.mec2_fw) {
1392 ring->me = 2;
1393 ring->pipe = 0;
1394 } else {
1395 ring->me = 1;
1396 ring->pipe = 1;
1397 }
1398
4e638ae9 1399 ring->queue = 0;
34534610 1400 ring->eop_gpu_addr = kiq->eop_gpu_addr;
4e638ae9
XY
1401 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1402 r = amdgpu_ring_init(adev, ring, 1024,
1403 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1404 if (r)
1405 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1406
1407 return r;
1408}
4e638ae9
XY
1409static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410 struct amdgpu_irq_src *irq)
1411{
bffa2280 1412 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
4e638ae9 1413 amdgpu_ring_fini(ring);
4e638ae9
XY
1414}
1415
aaa36a97
AD
1416#define MEC_HPD_SIZE 2048
1417
1418static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1419{
1420 int r;
1421 u32 *hpd;
1422
1423 /*
1424 * we assign only 1 pipe because all other pipes will
1425 * be handled by KFD
1426 */
1427 adev->gfx.mec.num_mec = 1;
1428 adev->gfx.mec.num_pipe = 1;
1429 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1430
1431 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1432 r = amdgpu_bo_create(adev,
ad3b9614 1433 adev->gfx.mec.num_queue * MEC_HPD_SIZE,
aaa36a97 1434 PAGE_SIZE, true,
72d7668b 1435 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1436 &adev->gfx.mec.hpd_eop_obj);
1437 if (r) {
1438 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1439 return r;
1440 }
1441 }
1442
1443 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1444 if (unlikely(r != 0)) {
1445 gfx_v8_0_mec_fini(adev);
1446 return r;
1447 }
1448 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1449 &adev->gfx.mec.hpd_eop_gpu_addr);
1450 if (r) {
1451 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1452 gfx_v8_0_mec_fini(adev);
1453 return r;
1454 }
1455 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1456 if (r) {
1457 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1458 gfx_v8_0_mec_fini(adev);
1459 return r;
1460 }
1461
ad3b9614 1462 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
aaa36a97
AD
1463
1464 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1465 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1466
1467 return 0;
1468}
1469
4e638ae9
XY
1470static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1471{
1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
4e638ae9
XY
1475}
1476
1477static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1478{
1479 int r;
1480 u32 *hpd;
1481 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1482
1483 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1484 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1485 &kiq->eop_gpu_addr, (void **)&hpd);
1486 if (r) {
1487 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1488 return r;
1489 }
1490
1491 memset(hpd, 0, MEC_HPD_SIZE);
1492
f2effd49
AD
1493 r = amdgpu_bo_reserve(kiq->eop_obj, false);
1494 if (unlikely(r != 0))
1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
4e638ae9 1496 amdgpu_bo_kunmap(kiq->eop_obj);
f2effd49 1497 amdgpu_bo_unreserve(kiq->eop_obj);
4e638ae9
XY
1498
1499 return 0;
1500}
1501
ccba7691
AD
1502static const u32 vgpr_init_compute_shader[] =
1503{
1504 0x7e000209, 0x7e020208,
1505 0x7e040207, 0x7e060206,
1506 0x7e080205, 0x7e0a0204,
1507 0x7e0c0203, 0x7e0e0202,
1508 0x7e100201, 0x7e120200,
1509 0x7e140209, 0x7e160208,
1510 0x7e180207, 0x7e1a0206,
1511 0x7e1c0205, 0x7e1e0204,
1512 0x7e200203, 0x7e220202,
1513 0x7e240201, 0x7e260200,
1514 0x7e280209, 0x7e2a0208,
1515 0x7e2c0207, 0x7e2e0206,
1516 0x7e300205, 0x7e320204,
1517 0x7e340203, 0x7e360202,
1518 0x7e380201, 0x7e3a0200,
1519 0x7e3c0209, 0x7e3e0208,
1520 0x7e400207, 0x7e420206,
1521 0x7e440205, 0x7e460204,
1522 0x7e480203, 0x7e4a0202,
1523 0x7e4c0201, 0x7e4e0200,
1524 0x7e500209, 0x7e520208,
1525 0x7e540207, 0x7e560206,
1526 0x7e580205, 0x7e5a0204,
1527 0x7e5c0203, 0x7e5e0202,
1528 0x7e600201, 0x7e620200,
1529 0x7e640209, 0x7e660208,
1530 0x7e680207, 0x7e6a0206,
1531 0x7e6c0205, 0x7e6e0204,
1532 0x7e700203, 0x7e720202,
1533 0x7e740201, 0x7e760200,
1534 0x7e780209, 0x7e7a0208,
1535 0x7e7c0207, 0x7e7e0206,
1536 0xbf8a0000, 0xbf810000,
1537};
1538
1539static const u32 sgpr_init_compute_shader[] =
1540{
1541 0xbe8a0100, 0xbe8c0102,
1542 0xbe8e0104, 0xbe900106,
1543 0xbe920108, 0xbe940100,
1544 0xbe960102, 0xbe980104,
1545 0xbe9a0106, 0xbe9c0108,
1546 0xbe9e0100, 0xbea00102,
1547 0xbea20104, 0xbea40106,
1548 0xbea60108, 0xbea80100,
1549 0xbeaa0102, 0xbeac0104,
1550 0xbeae0106, 0xbeb00108,
1551 0xbeb20100, 0xbeb40102,
1552 0xbeb60104, 0xbeb80106,
1553 0xbeba0108, 0xbebc0100,
1554 0xbebe0102, 0xbec00104,
1555 0xbec20106, 0xbec40108,
1556 0xbec60100, 0xbec80102,
1557 0xbee60004, 0xbee70005,
1558 0xbeea0006, 0xbeeb0007,
1559 0xbee80008, 0xbee90009,
1560 0xbefc0000, 0xbf8a0000,
1561 0xbf810000, 0x00000000,
1562};
1563
1564static const u32 vgpr_init_regs[] =
1565{
1566 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567 mmCOMPUTE_RESOURCE_LIMITS, 0,
1568 mmCOMPUTE_NUM_THREAD_X, 256*4,
1569 mmCOMPUTE_NUM_THREAD_Y, 1,
1570 mmCOMPUTE_NUM_THREAD_Z, 1,
1571 mmCOMPUTE_PGM_RSRC2, 20,
1572 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582};
1583
1584static const u32 sgpr1_init_regs[] =
1585{
1586 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588 mmCOMPUTE_NUM_THREAD_X, 256*5,
1589 mmCOMPUTE_NUM_THREAD_Y, 1,
1590 mmCOMPUTE_NUM_THREAD_Z, 1,
1591 mmCOMPUTE_PGM_RSRC2, 20,
1592 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602};
1603
1604static const u32 sgpr2_init_regs[] =
1605{
1606 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608 mmCOMPUTE_NUM_THREAD_X, 256*5,
1609 mmCOMPUTE_NUM_THREAD_Y, 1,
1610 mmCOMPUTE_NUM_THREAD_Z, 1,
1611 mmCOMPUTE_PGM_RSRC2, 20,
1612 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622};
1623
1624static const u32 sec_ded_counter_registers[] =
1625{
1626 mmCPC_EDC_ATC_CNT,
1627 mmCPC_EDC_SCRATCH_CNT,
1628 mmCPC_EDC_UCODE_CNT,
1629 mmCPF_EDC_ATC_CNT,
1630 mmCPF_EDC_ROQ_CNT,
1631 mmCPF_EDC_TAG_CNT,
1632 mmCPG_EDC_ATC_CNT,
1633 mmCPG_EDC_DMA_CNT,
1634 mmCPG_EDC_TAG_CNT,
1635 mmDC_EDC_CSINVOC_CNT,
1636 mmDC_EDC_RESTORE_CNT,
1637 mmDC_EDC_STATE_CNT,
1638 mmGDS_EDC_CNT,
1639 mmGDS_EDC_GRBM_CNT,
1640 mmGDS_EDC_OA_DED,
1641 mmSPI_EDC_CNT,
1642 mmSQC_ATC_EDC_GATCL1_CNT,
1643 mmSQC_EDC_CNT,
1644 mmSQ_EDC_DED_CNT,
1645 mmSQ_EDC_INFO,
1646 mmSQ_EDC_SEC_CNT,
1647 mmTCC_EDC_CNT,
1648 mmTCP_ATC_EDC_GATCL1_CNT,
1649 mmTCP_EDC_CNT,
1650 mmTD_EDC_CNT
1651};
1652
1653static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654{
1655 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656 struct amdgpu_ib ib;
f54d1867 1657 struct dma_fence *f = NULL;
ccba7691
AD
1658 int r, i;
1659 u32 tmp;
1660 unsigned total_size, vgpr_offset, sgpr_offset;
1661 u64 gpu_addr;
1662
1663 /* only supported on CZ */
1664 if (adev->asic_type != CHIP_CARRIZO)
1665 return 0;
1666
1667 /* bail if the compute ring is not ready */
1668 if (!ring->ready)
1669 return 0;
1670
1671 tmp = RREG32(mmGB_EDC_MODE);
1672 WREG32(mmGB_EDC_MODE, 0);
1673
1674 total_size =
1675 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676 total_size +=
1677 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678 total_size +=
1679 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680 total_size = ALIGN(total_size, 256);
1681 vgpr_offset = total_size;
1682 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683 sgpr_offset = total_size;
1684 total_size += sizeof(sgpr_init_compute_shader);
1685
1686 /* allocate an indirect buffer to put the commands in */
1687 memset(&ib, 0, sizeof(ib));
b07c60c0 1688 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1689 if (r) {
1690 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691 return r;
1692 }
1693
1694 /* load the compute shaders */
1695 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697
1698 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700
1701 /* init the ib length to 0 */
1702 ib.length_dw = 0;
1703
1704 /* VGPR */
1705 /* write the register state for the compute dispatch */
1706 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710 }
1711 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717
1718 /* write dispatch packet */
1719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720 ib.ptr[ib.length_dw++] = 8; /* x */
1721 ib.ptr[ib.length_dw++] = 1; /* y */
1722 ib.ptr[ib.length_dw++] = 1; /* z */
1723 ib.ptr[ib.length_dw++] =
1724 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725
1726 /* write CS partial flush packet */
1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729
1730 /* SGPR1 */
1731 /* write the register state for the compute dispatch */
1732 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736 }
1737 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743
1744 /* write dispatch packet */
1745 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746 ib.ptr[ib.length_dw++] = 8; /* x */
1747 ib.ptr[ib.length_dw++] = 1; /* y */
1748 ib.ptr[ib.length_dw++] = 1; /* z */
1749 ib.ptr[ib.length_dw++] =
1750 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751
1752 /* write CS partial flush packet */
1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755
1756 /* SGPR2 */
1757 /* write the register state for the compute dispatch */
1758 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762 }
1763 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769
1770 /* write dispatch packet */
1771 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772 ib.ptr[ib.length_dw++] = 8; /* x */
1773 ib.ptr[ib.length_dw++] = 1; /* y */
1774 ib.ptr[ib.length_dw++] = 1; /* z */
1775 ib.ptr[ib.length_dw++] =
1776 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777
1778 /* write CS partial flush packet */
1779 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781
1782 /* shedule the ib on the ring */
50ddc75e 1783 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1784 if (r) {
1785 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786 goto fail;
1787 }
1788
1789 /* wait for the GPU to finish processing the IB */
f54d1867 1790 r = dma_fence_wait(f, false);
ccba7691
AD
1791 if (r) {
1792 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793 goto fail;
1794 }
1795
1796 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798 WREG32(mmGB_EDC_MODE, tmp);
1799
1800 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803
1804
1805 /* read back registers to clear the counters */
1806 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807 RREG32(sec_ded_counter_registers[i]);
1808
1809fail:
cc55c45d 1810 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1811 dma_fence_put(f);
ccba7691
AD
1812
1813 return r;
1814}
1815
68182d90 1816static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1817{
1818 u32 gb_addr_config;
1819 u32 mc_shared_chmap, mc_arb_ramcfg;
1820 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821 u32 tmp;
68182d90 1822 int ret;
0bde3a95
AD
1823
1824 switch (adev->asic_type) {
1825 case CHIP_TOPAZ:
1826 adev->gfx.config.max_shader_engines = 1;
1827 adev->gfx.config.max_tile_pipes = 2;
1828 adev->gfx.config.max_cu_per_sh = 6;
1829 adev->gfx.config.max_sh_per_se = 1;
1830 adev->gfx.config.max_backends_per_se = 2;
1831 adev->gfx.config.max_texture_channel_caches = 2;
1832 adev->gfx.config.max_gprs = 256;
1833 adev->gfx.config.max_gs_threads = 32;
1834 adev->gfx.config.max_hw_contexts = 8;
1835
1836 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841 break;
1842 case CHIP_FIJI:
1843 adev->gfx.config.max_shader_engines = 4;
1844 adev->gfx.config.max_tile_pipes = 16;
1845 adev->gfx.config.max_cu_per_sh = 16;
1846 adev->gfx.config.max_sh_per_se = 1;
1847 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1848 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1849 adev->gfx.config.max_gprs = 256;
1850 adev->gfx.config.max_gs_threads = 32;
1851 adev->gfx.config.max_hw_contexts = 8;
1852
68182d90
FC
1853 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858 break;
2cc0c0b5 1859 case CHIP_POLARIS11:
c4642a47 1860 case CHIP_POLARIS12:
68182d90
FC
1861 ret = amdgpu_atombios_get_gfx_info(adev);
1862 if (ret)
1863 return ret;
1864 adev->gfx.config.max_gprs = 256;
1865 adev->gfx.config.max_gs_threads = 32;
1866 adev->gfx.config.max_hw_contexts = 8;
1867
1868 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1872 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1873 break;
2cc0c0b5 1874 case CHIP_POLARIS10:
68182d90
FC
1875 ret = amdgpu_atombios_get_gfx_info(adev);
1876 if (ret)
1877 return ret;
1878 adev->gfx.config.max_gprs = 256;
1879 adev->gfx.config.max_gs_threads = 32;
1880 adev->gfx.config.max_hw_contexts = 8;
1881
0bde3a95
AD
1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887 break;
1888 case CHIP_TONGA:
1889 adev->gfx.config.max_shader_engines = 4;
1890 adev->gfx.config.max_tile_pipes = 8;
1891 adev->gfx.config.max_cu_per_sh = 8;
1892 adev->gfx.config.max_sh_per_se = 1;
1893 adev->gfx.config.max_backends_per_se = 2;
1894 adev->gfx.config.max_texture_channel_caches = 8;
1895 adev->gfx.config.max_gprs = 256;
1896 adev->gfx.config.max_gs_threads = 32;
1897 adev->gfx.config.max_hw_contexts = 8;
1898
1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904 break;
1905 case CHIP_CARRIZO:
1906 adev->gfx.config.max_shader_engines = 1;
1907 adev->gfx.config.max_tile_pipes = 2;
1908 adev->gfx.config.max_sh_per_se = 1;
1909 adev->gfx.config.max_backends_per_se = 2;
1910
1911 switch (adev->pdev->revision) {
1912 case 0xc4:
1913 case 0x84:
1914 case 0xc8:
1915 case 0xcc:
b8b339ea
AD
1916 case 0xe1:
1917 case 0xe3:
0bde3a95
AD
1918 /* B10 */
1919 adev->gfx.config.max_cu_per_sh = 8;
1920 break;
1921 case 0xc5:
1922 case 0x81:
1923 case 0x85:
1924 case 0xc9:
1925 case 0xcd:
b8b339ea
AD
1926 case 0xe2:
1927 case 0xe4:
0bde3a95
AD
1928 /* B8 */
1929 adev->gfx.config.max_cu_per_sh = 6;
1930 break;
1931 case 0xc6:
1932 case 0xca:
1933 case 0xce:
b8b339ea 1934 case 0x88:
0bde3a95
AD
1935 /* B6 */
1936 adev->gfx.config.max_cu_per_sh = 6;
1937 break;
1938 case 0xc7:
1939 case 0x87:
1940 case 0xcb:
b8b339ea
AD
1941 case 0xe5:
1942 case 0x89:
0bde3a95
AD
1943 default:
1944 /* B4 */
1945 adev->gfx.config.max_cu_per_sh = 4;
1946 break;
1947 }
1948
1949 adev->gfx.config.max_texture_channel_caches = 2;
1950 adev->gfx.config.max_gprs = 256;
1951 adev->gfx.config.max_gs_threads = 32;
1952 adev->gfx.config.max_hw_contexts = 8;
1953
e3c7656c
SL
1954 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1955 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1956 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1957 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1958 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1959 break;
1960 case CHIP_STONEY:
1961 adev->gfx.config.max_shader_engines = 1;
1962 adev->gfx.config.max_tile_pipes = 2;
1963 adev->gfx.config.max_sh_per_se = 1;
1964 adev->gfx.config.max_backends_per_se = 1;
1965
1966 switch (adev->pdev->revision) {
1967 case 0xc0:
1968 case 0xc1:
1969 case 0xc2:
1970 case 0xc4:
1971 case 0xc8:
1972 case 0xc9:
1973 adev->gfx.config.max_cu_per_sh = 3;
1974 break;
1975 case 0xd0:
1976 case 0xd1:
1977 case 0xd2:
1978 default:
1979 adev->gfx.config.max_cu_per_sh = 2;
1980 break;
1981 }
1982
1983 adev->gfx.config.max_texture_channel_caches = 2;
1984 adev->gfx.config.max_gprs = 256;
1985 adev->gfx.config.max_gs_threads = 16;
1986 adev->gfx.config.max_hw_contexts = 8;
1987
0bde3a95
AD
1988 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1992 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1993 break;
1994 default:
1995 adev->gfx.config.max_shader_engines = 2;
1996 adev->gfx.config.max_tile_pipes = 4;
1997 adev->gfx.config.max_cu_per_sh = 2;
1998 adev->gfx.config.max_sh_per_se = 1;
1999 adev->gfx.config.max_backends_per_se = 2;
2000 adev->gfx.config.max_texture_channel_caches = 4;
2001 adev->gfx.config.max_gprs = 256;
2002 adev->gfx.config.max_gs_threads = 32;
2003 adev->gfx.config.max_hw_contexts = 8;
2004
2005 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2006 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2007 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2008 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2009 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2010 break;
2011 }
2012
2013 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2014 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2015 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2016
2017 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2018 adev->gfx.config.mem_max_burst_length_bytes = 256;
2019 if (adev->flags & AMD_IS_APU) {
2020 /* Get memory bank mapping mode. */
2021 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2022 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2023 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2024
2025 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2026 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2027 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2028
2029 /* Validate settings in case only one DIMM installed. */
2030 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2031 dimm00_addr_map = 0;
2032 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2033 dimm01_addr_map = 0;
2034 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2035 dimm10_addr_map = 0;
2036 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2037 dimm11_addr_map = 0;
2038
2039 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2040 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2041 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2042 adev->gfx.config.mem_row_size_in_kb = 2;
2043 else
2044 adev->gfx.config.mem_row_size_in_kb = 1;
2045 } else {
2046 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2047 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2048 if (adev->gfx.config.mem_row_size_in_kb > 4)
2049 adev->gfx.config.mem_row_size_in_kb = 4;
2050 }
2051
2052 adev->gfx.config.shader_engine_tile_size = 32;
2053 adev->gfx.config.num_gpus = 1;
2054 adev->gfx.config.multi_gpu_tile_size = 64;
2055
2056 /* fix up row size */
2057 switch (adev->gfx.config.mem_row_size_in_kb) {
2058 case 1:
2059 default:
2060 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2061 break;
2062 case 2:
2063 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2064 break;
2065 case 4:
2066 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2067 break;
2068 }
2069 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
2070
2071 return 0;
0bde3a95
AD
2072}
2073
5fc3aeeb 2074static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
2075{
2076 int i, r;
2077 struct amdgpu_ring *ring;
4e638ae9 2078 struct amdgpu_kiq *kiq;
5fc3aeeb 2079 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2080
4e638ae9 2081 /* KIQ event */
d766e6a3 2082 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2083 if (r)
2084 return r;
2085
aaa36a97 2086 /* EOP Event */
d766e6a3 2087 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2088 if (r)
2089 return r;
2090
2091 /* Privileged reg */
d766e6a3
AD
2092 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2093 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2094 if (r)
2095 return r;
2096
2097 /* Privileged inst */
d766e6a3
AD
2098 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2099 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2100 if (r)
2101 return r;
2102
2103 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2104
2105 gfx_v8_0_scratch_init(adev);
2106
2107 r = gfx_v8_0_init_microcode(adev);
2108 if (r) {
2109 DRM_ERROR("Failed to load gfx firmware!\n");
2110 return r;
2111 }
2112
2b6cd977
EH
2113 r = gfx_v8_0_rlc_init(adev);
2114 if (r) {
2115 DRM_ERROR("Failed to init rlc BOs!\n");
2116 return r;
2117 }
2118
aaa36a97
AD
2119 r = gfx_v8_0_mec_init(adev);
2120 if (r) {
2121 DRM_ERROR("Failed to init MEC BOs!\n");
2122 return r;
2123 }
2124
aaa36a97
AD
2125 /* set up the gfx ring */
2126 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2127 ring = &adev->gfx.gfx_ring[i];
2128 ring->ring_obj = NULL;
2129 sprintf(ring->name, "gfx");
2130 /* no gfx doorbells on iceland */
2131 if (adev->asic_type != CHIP_TOPAZ) {
2132 ring->use_doorbell = true;
2133 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2134 }
2135
79887142
CK
2136 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2137 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2138 if (r)
2139 return r;
2140 }
2141
2142 /* set up the compute queues */
2143 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2144 unsigned irq_type;
2145
2146 /* max 32 queues per MEC */
2147 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2148 DRM_ERROR("Too many (%d) compute rings!\n", i);
2149 break;
2150 }
2151 ring = &adev->gfx.compute_ring[i];
2152 ring->ring_obj = NULL;
2153 ring->use_doorbell = true;
2154 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2155 ring->me = 1; /* first MEC */
2156 ring->pipe = i / 8;
2157 ring->queue = i % 8;
34534610 2158 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
771c8ec1 2159 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
aaa36a97
AD
2160 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2161 /* type-2 packets are deprecated on MEC, use type-3 instead */
79887142
CK
2162 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2163 irq_type);
aaa36a97
AD
2164 if (r)
2165 return r;
2166 }
2167
596c67d0
ML
2168 if (amdgpu_sriov_vf(adev)) {
2169 r = gfx_v8_0_kiq_init(adev);
2170 if (r) {
2171 DRM_ERROR("Failed to init KIQ BOs!\n");
2172 return r;
2173 }
2174
2175 kiq = &adev->gfx.kiq;
2176 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2177 if (r)
2178 return r;
2179
2180 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
0875a242 2181 r = gfx_v8_0_compute_mqd_sw_init(adev);
596c67d0
ML
2182 if (r)
2183 return r;
2184 }
2185
aaa36a97 2186 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2187 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2188 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2189 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2190 if (r)
2191 return r;
2192
78bbbd9c
CK
2193 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2194 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2195 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2196 if (r)
2197 return r;
2198
78bbbd9c
CK
2199 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2200 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2201 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2202 if (r)
2203 return r;
2204
a101a899
KW
2205 adev->gfx.ce_ram_size = 0x8000;
2206
68182d90
FC
2207 r = gfx_v8_0_gpu_early_init(adev);
2208 if (r)
2209 return r;
0bde3a95 2210
aaa36a97
AD
2211 return 0;
2212}
2213
5fc3aeeb 2214static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2215{
2216 int i;
5fc3aeeb 2217 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2218
8640faed
JZ
2219 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2220 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2221 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2222
2223 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2224 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2225 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2226 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2227
596c67d0 2228 if (amdgpu_sriov_vf(adev)) {
0875a242 2229 gfx_v8_0_compute_mqd_sw_fini(adev);
596c67d0
ML
2230 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2231 gfx_v8_0_kiq_fini(adev);
2232 }
2233
aaa36a97 2234 gfx_v8_0_mec_fini(adev);
2b6cd977 2235 gfx_v8_0_rlc_fini(adev);
13331ac3 2236 gfx_v8_0_free_microcode(adev);
2b6cd977 2237
aaa36a97
AD
2238 return 0;
2239}
2240
2241static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2242{
90bea0ab 2243 uint32_t *modearray, *mod2array;
eb64526f
TSD
2244 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2245 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2246 u32 reg_offset;
aaa36a97 2247
90bea0ab
TSD
2248 modearray = adev->gfx.config.tile_mode_array;
2249 mod2array = adev->gfx.config.macrotile_mode_array;
2250
2251 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2252 modearray[reg_offset] = 0;
2253
2254 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2255 mod2array[reg_offset] = 0;
aaa36a97
AD
2256
2257 switch (adev->asic_type) {
2258 case CHIP_TOPAZ:
90bea0ab
TSD
2259 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260 PIPE_CONFIG(ADDR_SURF_P2) |
2261 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2263 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264 PIPE_CONFIG(ADDR_SURF_P2) |
2265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2267 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2271 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272 PIPE_CONFIG(ADDR_SURF_P2) |
2273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2275 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P2) |
2277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2280 PIPE_CONFIG(ADDR_SURF_P2) |
2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284 PIPE_CONFIG(ADDR_SURF_P2) |
2285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2288 PIPE_CONFIG(ADDR_SURF_P2));
2289 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2290 PIPE_CONFIG(ADDR_SURF_P2) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2293 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 PIPE_CONFIG(ADDR_SURF_P2) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2297 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2298 PIPE_CONFIG(ADDR_SURF_P2) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2301 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P2) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P2) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2314 PIPE_CONFIG(ADDR_SURF_P2) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2317 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2318 PIPE_CONFIG(ADDR_SURF_P2) |
2319 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2321 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2322 PIPE_CONFIG(ADDR_SURF_P2) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2325 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2326 PIPE_CONFIG(ADDR_SURF_P2) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2329 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2330 PIPE_CONFIG(ADDR_SURF_P2) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2333 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2334 PIPE_CONFIG(ADDR_SURF_P2) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2342 PIPE_CONFIG(ADDR_SURF_P2) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2346 PIPE_CONFIG(ADDR_SURF_P2) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354 PIPE_CONFIG(ADDR_SURF_P2) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358 PIPE_CONFIG(ADDR_SURF_P2) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361
2362 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365 NUM_BANKS(ADDR_SURF_8_BANK));
2366 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369 NUM_BANKS(ADDR_SURF_8_BANK));
2370 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2373 NUM_BANKS(ADDR_SURF_8_BANK));
2374 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377 NUM_BANKS(ADDR_SURF_8_BANK));
2378 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381 NUM_BANKS(ADDR_SURF_8_BANK));
2382 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385 NUM_BANKS(ADDR_SURF_8_BANK));
2386 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2389 NUM_BANKS(ADDR_SURF_8_BANK));
2390 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2393 NUM_BANKS(ADDR_SURF_16_BANK));
2394 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2397 NUM_BANKS(ADDR_SURF_16_BANK));
2398 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2401 NUM_BANKS(ADDR_SURF_16_BANK));
2402 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405 NUM_BANKS(ADDR_SURF_16_BANK));
2406 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2409 NUM_BANKS(ADDR_SURF_16_BANK));
2410 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413 NUM_BANKS(ADDR_SURF_16_BANK));
2414 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417 NUM_BANKS(ADDR_SURF_8_BANK));
2418
2419 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2420 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2421 reg_offset != 23)
2422 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2423
2424 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2425 if (reg_offset != 7)
2426 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2427
8cdacf44 2428 break;
af15a2d5 2429 case CHIP_FIJI:
90bea0ab
TSD
2430 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2464 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2476 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2505 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2521 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2533 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2548 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2552
2553 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2556 NUM_BANKS(ADDR_SURF_8_BANK));
2557 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2560 NUM_BANKS(ADDR_SURF_8_BANK));
2561 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2564 NUM_BANKS(ADDR_SURF_8_BANK));
2565 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568 NUM_BANKS(ADDR_SURF_8_BANK));
2569 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 NUM_BANKS(ADDR_SURF_8_BANK));
2573 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576 NUM_BANKS(ADDR_SURF_8_BANK));
2577 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580 NUM_BANKS(ADDR_SURF_8_BANK));
2581 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 NUM_BANKS(ADDR_SURF_8_BANK));
2585 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588 NUM_BANKS(ADDR_SURF_8_BANK));
2589 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 NUM_BANKS(ADDR_SURF_8_BANK));
2593 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2596 NUM_BANKS(ADDR_SURF_8_BANK));
2597 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2600 NUM_BANKS(ADDR_SURF_8_BANK));
2601 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604 NUM_BANKS(ADDR_SURF_8_BANK));
2605 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608 NUM_BANKS(ADDR_SURF_4_BANK));
2609
2610 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2611 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2612
2613 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2614 if (reg_offset != 7)
2615 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2616
5f2e816b 2617 break;
aaa36a97 2618 case CHIP_TONGA:
90bea0ab
TSD
2619 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2638 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2653 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2665 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2694 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2722 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2737 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2741
2742 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2745 NUM_BANKS(ADDR_SURF_16_BANK));
2746 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749 NUM_BANKS(ADDR_SURF_16_BANK));
2750 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 NUM_BANKS(ADDR_SURF_16_BANK));
2754 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757 NUM_BANKS(ADDR_SURF_16_BANK));
2758 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2762 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 NUM_BANKS(ADDR_SURF_16_BANK));
2766 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2769 NUM_BANKS(ADDR_SURF_16_BANK));
2770 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773 NUM_BANKS(ADDR_SURF_16_BANK));
2774 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 NUM_BANKS(ADDR_SURF_16_BANK));
2778 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2781 NUM_BANKS(ADDR_SURF_16_BANK));
2782 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2785 NUM_BANKS(ADDR_SURF_16_BANK));
2786 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2789 NUM_BANKS(ADDR_SURF_8_BANK));
2790 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2793 NUM_BANKS(ADDR_SURF_4_BANK));
2794 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2797 NUM_BANKS(ADDR_SURF_4_BANK));
2798
2799 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2800 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2801
2802 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2803 if (reg_offset != 7)
2804 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2805
68182d90 2806 break;
2cc0c0b5 2807 case CHIP_POLARIS11:
c4642a47 2808 case CHIP_POLARIS12:
68182d90
FC
2809 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2833 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2837 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2842 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2843 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2855 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2907 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2927 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2931
2932 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935 NUM_BANKS(ADDR_SURF_16_BANK));
2936
2937 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2940 NUM_BANKS(ADDR_SURF_16_BANK));
2941
2942 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945 NUM_BANKS(ADDR_SURF_16_BANK));
2946
2947 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950 NUM_BANKS(ADDR_SURF_16_BANK));
2951
2952 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2955 NUM_BANKS(ADDR_SURF_16_BANK));
2956
2957 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2960 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2965 NUM_BANKS(ADDR_SURF_16_BANK));
2966
2967 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2980 NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985 NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2990 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2995 NUM_BANKS(ADDR_SURF_8_BANK));
2996
2997 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3000 NUM_BANKS(ADDR_SURF_4_BANK));
3001
3002 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3003 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3004
3005 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3006 if (reg_offset != 7)
3007 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3008
3009 break;
2cc0c0b5 3010 case CHIP_POLARIS10:
68182d90
FC
3011 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3030 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3031 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3035 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3038 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3039 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3045 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3057 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3086 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3102 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3121 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3129 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3133
3134 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137 NUM_BANKS(ADDR_SURF_16_BANK));
3138
3139 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142 NUM_BANKS(ADDR_SURF_16_BANK));
3143
3144 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147 NUM_BANKS(ADDR_SURF_16_BANK));
3148
3149 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3152 NUM_BANKS(ADDR_SURF_16_BANK));
3153
3154 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157 NUM_BANKS(ADDR_SURF_16_BANK));
3158
3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 NUM_BANKS(ADDR_SURF_16_BANK));
3163
3164 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3167 NUM_BANKS(ADDR_SURF_16_BANK));
3168
3169 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3172 NUM_BANKS(ADDR_SURF_16_BANK));
3173
3174 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3177 NUM_BANKS(ADDR_SURF_16_BANK));
3178
3179 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182 NUM_BANKS(ADDR_SURF_16_BANK));
3183
3184 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187 NUM_BANKS(ADDR_SURF_16_BANK));
3188
3189 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3192 NUM_BANKS(ADDR_SURF_8_BANK));
3193
3194 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3197 NUM_BANKS(ADDR_SURF_4_BANK));
3198
3199 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3202 NUM_BANKS(ADDR_SURF_4_BANK));
3203
3204 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3205 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3206
3207 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3208 if (reg_offset != 7)
3209 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3210
aaa36a97 3211 break;
e3c7656c 3212 case CHIP_STONEY:
90bea0ab
TSD
3213 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226 PIPE_CONFIG(ADDR_SURF_P2) |
3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3233 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3234 PIPE_CONFIG(ADDR_SURF_P2) |
3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 PIPE_CONFIG(ADDR_SURF_P2) |
3239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3242 PIPE_CONFIG(ADDR_SURF_P2));
3243 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3272 PIPE_CONFIG(ADDR_SURF_P2) |
3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3276 PIPE_CONFIG(ADDR_SURF_P2) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3307 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315
3316 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 NUM_BANKS(ADDR_SURF_8_BANK));
3320 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323 NUM_BANKS(ADDR_SURF_8_BANK));
3324 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 NUM_BANKS(ADDR_SURF_8_BANK));
3328 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3331 NUM_BANKS(ADDR_SURF_8_BANK));
3332 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3335 NUM_BANKS(ADDR_SURF_8_BANK));
3336 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3339 NUM_BANKS(ADDR_SURF_8_BANK));
3340 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 NUM_BANKS(ADDR_SURF_8_BANK));
3344 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347 NUM_BANKS(ADDR_SURF_16_BANK));
3348 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351 NUM_BANKS(ADDR_SURF_16_BANK));
3352 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 NUM_BANKS(ADDR_SURF_16_BANK));
3356 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3359 NUM_BANKS(ADDR_SURF_16_BANK));
3360 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363 NUM_BANKS(ADDR_SURF_16_BANK));
3364 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 NUM_BANKS(ADDR_SURF_16_BANK));
3368 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371 NUM_BANKS(ADDR_SURF_8_BANK));
3372
3373 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3374 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3375 reg_offset != 23)
3376 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3377
3378 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3379 if (reg_offset != 7)
3380 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3381
e3c7656c 3382 break;
aaa36a97 3383 default:
90bea0ab
TSD
3384 dev_warn(adev->dev,
3385 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3386 adev->asic_type);
3387
3388 case CHIP_CARRIZO:
3389 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390 PIPE_CONFIG(ADDR_SURF_P2) |
3391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3394 PIPE_CONFIG(ADDR_SURF_P2) |
3395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3398 PIPE_CONFIG(ADDR_SURF_P2) |
3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402 PIPE_CONFIG(ADDR_SURF_P2) |
3403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3405 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3406 PIPE_CONFIG(ADDR_SURF_P2) |
3407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3409 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3410 PIPE_CONFIG(ADDR_SURF_P2) |
3411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3413 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414 PIPE_CONFIG(ADDR_SURF_P2) |
3415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3417 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3418 PIPE_CONFIG(ADDR_SURF_P2));
3419 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3420 PIPE_CONFIG(ADDR_SURF_P2) |
3421 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3424 PIPE_CONFIG(ADDR_SURF_P2) |
3425 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3436 PIPE_CONFIG(ADDR_SURF_P2) |
3437 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3439 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3440 PIPE_CONFIG(ADDR_SURF_P2) |
3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3443 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3444 PIPE_CONFIG(ADDR_SURF_P2) |
3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3447 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3448 PIPE_CONFIG(ADDR_SURF_P2) |
3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3452 PIPE_CONFIG(ADDR_SURF_P2) |
3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3456 PIPE_CONFIG(ADDR_SURF_P2) |
3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3460 PIPE_CONFIG(ADDR_SURF_P2) |
3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3464 PIPE_CONFIG(ADDR_SURF_P2) |
3465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3467 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3468 PIPE_CONFIG(ADDR_SURF_P2) |
3469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3471 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3472 PIPE_CONFIG(ADDR_SURF_P2) |
3473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3475 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3476 PIPE_CONFIG(ADDR_SURF_P2) |
3477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3479 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3480 PIPE_CONFIG(ADDR_SURF_P2) |
3481 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3483 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3484 PIPE_CONFIG(ADDR_SURF_P2) |
3485 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3487 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3488 PIPE_CONFIG(ADDR_SURF_P2) |
3489 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3491
3492 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3495 NUM_BANKS(ADDR_SURF_8_BANK));
3496 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3499 NUM_BANKS(ADDR_SURF_8_BANK));
3500 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503 NUM_BANKS(ADDR_SURF_8_BANK));
3504 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3507 NUM_BANKS(ADDR_SURF_8_BANK));
3508 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3511 NUM_BANKS(ADDR_SURF_8_BANK));
3512 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3515 NUM_BANKS(ADDR_SURF_8_BANK));
3516 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3519 NUM_BANKS(ADDR_SURF_8_BANK));
3520 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523 NUM_BANKS(ADDR_SURF_16_BANK));
3524 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527 NUM_BANKS(ADDR_SURF_16_BANK));
3528 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3531 NUM_BANKS(ADDR_SURF_16_BANK));
3532 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3535 NUM_BANKS(ADDR_SURF_16_BANK));
3536 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3539 NUM_BANKS(ADDR_SURF_16_BANK));
3540 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3543 NUM_BANKS(ADDR_SURF_16_BANK));
3544 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3547 NUM_BANKS(ADDR_SURF_8_BANK));
3548
3549 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3550 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3551 reg_offset != 23)
3552 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3553
3554 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3555 if (reg_offset != 7)
3556 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3557
3558 break;
aaa36a97
AD
3559 }
3560}
3561
05fb7291 3562static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3563 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3564{
9559ef5b
TSD
3565 u32 data;
3566
3567 if (instance == 0xffffffff)
3568 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3569 else
3570 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3571
5003f278 3572 if (se_num == 0xffffffff)
aaa36a97 3573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3574 else
aaa36a97 3575 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3576
3577 if (sh_num == 0xffffffff)
3578 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3579 else
aaa36a97 3580 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3581
aaa36a97
AD
3582 WREG32(mmGRBM_GFX_INDEX, data);
3583}
3584
8f8e00c1
AD
3585static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3586{
3587 return (u32)((1ULL << bit_width) - 1);
3588}
3589
3590static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3591{
3592 u32 data, mask;
3593
5003f278
TSD
3594 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3595 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3596
5003f278 3597 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3598
8f8e00c1
AD
3599 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3600 adev->gfx.config.max_sh_per_se);
aaa36a97 3601
8f8e00c1 3602 return (~data) & mask;
aaa36a97
AD
3603}
3604
167ac573
HR
3605static void
3606gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3607{
3608 switch (adev->asic_type) {
3609 case CHIP_FIJI:
3610 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3611 RB_XSEL2(1) | PKR_MAP(2) |
3612 PKR_XSEL(1) | PKR_YSEL(1) |
3613 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3614 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3615 SE_PAIR_YSEL(2);
3616 break;
3617 case CHIP_TONGA:
3618 case CHIP_POLARIS10:
3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3620 SE_XSEL(1) | SE_YSEL(1);
3621 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3622 SE_PAIR_YSEL(2);
3623 break;
3624 case CHIP_TOPAZ:
3625 case CHIP_CARRIZO:
3626 *rconf |= RB_MAP_PKR0(2);
3627 *rconf1 |= 0x0;
3628 break;
3629 case CHIP_POLARIS11:
c4642a47 3630 case CHIP_POLARIS12:
167ac573
HR
3631 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3632 SE_XSEL(1) | SE_YSEL(1);
3633 *rconf1 |= 0x0;
3634 break;
3635 case CHIP_STONEY:
3636 *rconf |= 0x0;
3637 *rconf1 |= 0x0;
3638 break;
3639 default:
3640 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3641 break;
3642 }
3643}
3644
3645static void
3646gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3647 u32 raster_config, u32 raster_config_1,
3648 unsigned rb_mask, unsigned num_rb)
3649{
3650 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3651 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3652 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3653 unsigned rb_per_se = num_rb / num_se;
3654 unsigned se_mask[4];
3655 unsigned se;
3656
3657 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3658 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3659 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3660 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3661
3662 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3663 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3664 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3665
3666 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3667 (!se_mask[2] && !se_mask[3]))) {
3668 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3669
3670 if (!se_mask[0] && !se_mask[1]) {
3671 raster_config_1 |=
3672 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3673 } else {
3674 raster_config_1 |=
3675 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3676 }
3677 }
3678
3679 for (se = 0; se < num_se; se++) {
3680 unsigned raster_config_se = raster_config;
3681 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3682 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3683 int idx = (se / 2) * 2;
3684
3685 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3686 raster_config_se &= ~SE_MAP_MASK;
3687
3688 if (!se_mask[idx]) {
3689 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3690 } else {
3691 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3692 }
3693 }
3694
3695 pkr0_mask &= rb_mask;
3696 pkr1_mask &= rb_mask;
3697 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3698 raster_config_se &= ~PKR_MAP_MASK;
3699
3700 if (!pkr0_mask) {
3701 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3702 } else {
3703 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3704 }
3705 }
3706
3707 if (rb_per_se >= 2) {
3708 unsigned rb0_mask = 1 << (se * rb_per_se);
3709 unsigned rb1_mask = rb0_mask << 1;
3710
3711 rb0_mask &= rb_mask;
3712 rb1_mask &= rb_mask;
3713 if (!rb0_mask || !rb1_mask) {
3714 raster_config_se &= ~RB_MAP_PKR0_MASK;
3715
3716 if (!rb0_mask) {
3717 raster_config_se |=
3718 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3719 } else {
3720 raster_config_se |=
3721 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3722 }
3723 }
3724
3725 if (rb_per_se > 2) {
3726 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3727 rb1_mask = rb0_mask << 1;
3728 rb0_mask &= rb_mask;
3729 rb1_mask &= rb_mask;
3730 if (!rb0_mask || !rb1_mask) {
3731 raster_config_se &= ~RB_MAP_PKR1_MASK;
3732
3733 if (!rb0_mask) {
3734 raster_config_se |=
3735 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3736 } else {
3737 raster_config_se |=
3738 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3739 }
3740 }
3741 }
3742 }
3743
3744 /* GRBM_GFX_INDEX has a different offset on VI */
3745 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3746 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3747 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3748 }
3749
3750 /* GRBM_GFX_INDEX has a different offset on VI */
3751 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3752}
3753
8f8e00c1 3754static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3755{
3756 int i, j;
aac1e3ca 3757 u32 data;
167ac573 3758 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3759 u32 active_rbs = 0;
6157bd7a
FC
3760 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3761 adev->gfx.config.max_sh_per_se;
167ac573 3762 unsigned num_rb_pipes;
aaa36a97
AD
3763
3764 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3765 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3766 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3767 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3768 data = gfx_v8_0_get_rb_active_bitmap(adev);
3769 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3770 rb_bitmap_width_per_sh);
aaa36a97
AD
3771 }
3772 }
9559ef5b 3773 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3774
8f8e00c1 3775 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3776 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3777
3778 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3779 adev->gfx.config.max_shader_engines, 16);
3780
3781 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3782
3783 if (!adev->gfx.config.backend_enable_mask ||
3784 adev->gfx.config.num_rbs >= num_rb_pipes) {
3785 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3786 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3787 } else {
3788 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3789 adev->gfx.config.backend_enable_mask,
3790 num_rb_pipes);
3791 }
3792
392f0c77
AD
3793 /* cache the values for userspace */
3794 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3795 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3796 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3797 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3798 RREG32(mmCC_RB_BACKEND_DISABLE);
3799 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3800 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3801 adev->gfx.config.rb_config[i][j].raster_config =
3802 RREG32(mmPA_SC_RASTER_CONFIG);
3803 adev->gfx.config.rb_config[i][j].raster_config_1 =
3804 RREG32(mmPA_SC_RASTER_CONFIG_1);
3805 }
3806 }
3807 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3808 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3809}
3810
cd06bf68 3811/**
35c7a952 3812 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
3813 *
3814 * @rdev: amdgpu_device pointer
3815 *
3816 * Initialize compute vmid sh_mem registers
3817 *
3818 */
3819#define DEFAULT_SH_MEM_BASES (0x6000)
3820#define FIRST_COMPUTE_VMID (8)
3821#define LAST_COMPUTE_VMID (16)
35c7a952 3822static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3823{
3824 int i;
3825 uint32_t sh_mem_config;
3826 uint32_t sh_mem_bases;
3827
3828 /*
3829 * Configure apertures:
3830 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3831 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3832 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3833 */
3834 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3835
3836 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3837 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3838 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3839 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3840 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3841 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3842
3843 mutex_lock(&adev->srbm_mutex);
3844 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3845 vi_srbm_select(adev, 0, 0, 0, i);
3846 /* CP and shaders */
3847 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3848 WREG32(mmSH_MEM_APE1_BASE, 1);
3849 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3850 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3851 }
3852 vi_srbm_select(adev, 0, 0, 0, 0);
3853 mutex_unlock(&adev->srbm_mutex);
3854}
3855
df6e2c4a
JZ
3856static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3857{
3858 switch (adev->asic_type) {
3859 default:
3860 adev->gfx.config.double_offchip_lds_buf = 1;
3861 break;
3862 case CHIP_CARRIZO:
3863 case CHIP_STONEY:
3864 adev->gfx.config.double_offchip_lds_buf = 0;
3865 break;
3866 }
3867}
3868
aaa36a97
AD
3869static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3870{
8fe73328 3871 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3872 int i;
3873
61cb8cef 3874 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3875 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3876 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3877 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3878
3879 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3880 gfx_v8_0_setup_rb(adev);
7dae69a2 3881 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3882 gfx_v8_0_config_init(adev);
aaa36a97
AD
3883
3884 /* XXX SH_MEM regs */
3885 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3886 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3887 SWIZZLE_ENABLE, 1);
3888 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3889 ELEMENT_SIZE, 1);
3890 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3891 INDEX_STRIDE, 3);
aaa36a97 3892 mutex_lock(&adev->srbm_mutex);
8fe73328 3893 for (i = 0; i < adev->vm_manager.num_ids; i++) {
aaa36a97
AD
3894 vi_srbm_select(adev, 0, 0, 0, i);
3895 /* CP and shaders */
3896 if (i == 0) {
3897 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3898 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3899 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3900 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3901 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3902 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3903 } else {
3904 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3905 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3906 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3907 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3908 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328
JZ
3909 tmp = adev->mc.shared_aperture_start >> 48;
3910 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3911 }
3912
3913 WREG32(mmSH_MEM_APE1_BASE, 1);
3914 WREG32(mmSH_MEM_APE1_LIMIT, 0);
8fe73328 3915 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
aaa36a97
AD
3916 }
3917 vi_srbm_select(adev, 0, 0, 0, 0);
3918 mutex_unlock(&adev->srbm_mutex);
3919
35c7a952 3920 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3921
aaa36a97
AD
3922 mutex_lock(&adev->grbm_idx_mutex);
3923 /*
3924 * making sure that the following register writes will be broadcasted
3925 * to all the shaders
3926 */
9559ef5b 3927 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3928
3929 WREG32(mmPA_SC_FIFO_SIZE,
3930 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3931 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3932 (adev->gfx.config.sc_prim_fifo_size_backend <<
3933 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3934 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3935 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3936 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3937 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3938
3939 tmp = RREG32(mmSPI_ARB_PRIORITY);
3940 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3941 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3942 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3943 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3944 WREG32(mmSPI_ARB_PRIORITY, tmp);
3945
aaa36a97
AD
3946 mutex_unlock(&adev->grbm_idx_mutex);
3947
3948}
3949
3950static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3951{
3952 u32 i, j, k;
3953 u32 mask;
3954
3955 mutex_lock(&adev->grbm_idx_mutex);
3956 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3957 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3958 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3959 for (k = 0; k < adev->usec_timeout; k++) {
3960 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3961 break;
3962 udelay(1);
3963 }
3964 }
3965 }
9559ef5b 3966 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3967 mutex_unlock(&adev->grbm_idx_mutex);
3968
3969 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3970 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3971 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3972 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3973 for (k = 0; k < adev->usec_timeout; k++) {
3974 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3975 break;
3976 udelay(1);
3977 }
3978}
3979
3980static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3981 bool enable)
3982{
3983 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3984
0d07db7e
TSD
3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3986 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3987 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3988 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3989
aaa36a97
AD
3990 WREG32(mmCP_INT_CNTL_RING0, tmp);
3991}
3992
2b6cd977
EH
3993static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3994{
3995 /* csib */
3996 WREG32(mmRLC_CSIB_ADDR_HI,
3997 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3998 WREG32(mmRLC_CSIB_ADDR_LO,
3999 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4000 WREG32(mmRLC_CSIB_LENGTH,
4001 adev->gfx.rlc.clear_state_size);
4002}
4003
4004static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4005 int ind_offset,
4006 int list_size,
4007 int *unique_indices,
4008 int *indices_count,
4009 int max_indices,
4010 int *ind_start_offsets,
4011 int *offset_count,
4012 int max_offset)
4013{
4014 int indices;
4015 bool new_entry = true;
4016
4017 for (; ind_offset < list_size; ind_offset++) {
4018
4019 if (new_entry) {
4020 new_entry = false;
4021 ind_start_offsets[*offset_count] = ind_offset;
4022 *offset_count = *offset_count + 1;
4023 BUG_ON(*offset_count >= max_offset);
4024 }
4025
4026 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4027 new_entry = true;
4028 continue;
4029 }
4030
4031 ind_offset += 2;
4032
4033 /* look for the matching indice */
4034 for (indices = 0;
4035 indices < *indices_count;
4036 indices++) {
4037 if (unique_indices[indices] ==
4038 register_list_format[ind_offset])
4039 break;
4040 }
4041
4042 if (indices >= *indices_count) {
4043 unique_indices[*indices_count] =
4044 register_list_format[ind_offset];
4045 indices = *indices_count;
4046 *indices_count = *indices_count + 1;
4047 BUG_ON(*indices_count >= max_indices);
4048 }
4049
4050 register_list_format[ind_offset] = indices;
4051 }
4052}
4053
4054static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4055{
4056 int i, temp, data;
4057 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4058 int indices_count = 0;
4059 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4060 int offset_count = 0;
4061
4062 int list_size;
4063 unsigned int *register_list_format =
4064 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4065 if (!register_list_format)
2b6cd977
EH
4066 return -ENOMEM;
4067 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4068 adev->gfx.rlc.reg_list_format_size_bytes);
4069
4070 gfx_v8_0_parse_ind_reg_list(register_list_format,
4071 RLC_FormatDirectRegListLength,
4072 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4073 unique_indices,
4074 &indices_count,
4075 sizeof(unique_indices) / sizeof(int),
4076 indirect_start_offsets,
4077 &offset_count,
4078 sizeof(indirect_start_offsets)/sizeof(int));
4079
4080 /* save and restore list */
61cb8cef 4081 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4082
4083 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4084 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4085 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4086
4087 /* indirect list */
4088 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4089 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4090 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4091
4092 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4093 list_size = list_size >> 1;
4094 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4095 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4096
4097 /* starting offsets starts */
4098 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4099 adev->gfx.rlc.starting_offsets_start);
4100 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4101 WREG32(mmRLC_GPM_SCRATCH_DATA,
4102 indirect_start_offsets[i]);
4103
4104 /* unique indices */
4105 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4106 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4107 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
202e0b22 4108 if (unique_indices[i] != 0) {
b85c9d2a
ML
4109 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4110 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4111 }
2b6cd977
EH
4112 }
4113 kfree(register_list_format);
4114
4115 return 0;
4116}
4117
4118static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4119{
61cb8cef 4120 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4121}
4122
fb16007b 4123static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4124{
4125 uint32_t data;
4126
c4d17b81
RZ
4127 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4128
4129 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4130 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4131 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4132 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4133 WREG32(mmRLC_PG_DELAY, data);
4134
4135 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4136 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4137
f4bfffdd
EH
4138}
4139
2c547165
AD
4140static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4141 bool enable)
4142{
61cb8cef 4143 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4144}
4145
4146static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4147 bool enable)
4148{
61cb8cef 4149 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4150}
4151
4152static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4153{
eb584241 4154 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4155}
4156
2b6cd977
EH
4157static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4158{
c4d17b81
RZ
4159 if ((adev->asic_type == CHIP_CARRIZO) ||
4160 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4161 gfx_v8_0_init_csb(adev);
4162 gfx_v8_0_init_save_restore_list(adev);
4163 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4164 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4165 gfx_v8_0_init_power_gating(adev);
4166 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47
JZ
4167 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4168 (adev->asic_type == CHIP_POLARIS12)) {
c4d17b81
RZ
4169 gfx_v8_0_init_csb(adev);
4170 gfx_v8_0_init_save_restore_list(adev);
4171 gfx_v8_0_enable_save_restore_machine(adev);
4172 gfx_v8_0_init_power_gating(adev);
2b6cd977 4173 }
c4d17b81 4174
2b6cd977
EH
4175}
4176
761c2e82 4177static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4178{
61cb8cef 4179 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4180
4181 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4182 gfx_v8_0_wait_for_rlc_serdes(adev);
4183}
4184
4185static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4186{
61cb8cef 4187 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4188 udelay(50);
61cb8cef
TSD
4189
4190 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4191 udelay(50);
4192}
4193
4194static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4195{
61cb8cef 4196 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4197
4198 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4199 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4200 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4201
4202 udelay(50);
4203}
4204
4205static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4206{
4207 const struct rlc_firmware_header_v2_0 *hdr;
4208 const __le32 *fw_data;
4209 unsigned i, fw_size;
4210
4211 if (!adev->gfx.rlc_fw)
4212 return -EINVAL;
4213
4214 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4215 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4216
4217 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4218 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4219 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4220
4221 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4222 for (i = 0; i < fw_size; i++)
4223 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4224 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4225
4226 return 0;
4227}
4228
4229static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4230{
4231 int r;
6ae81452 4232 u32 tmp;
aaa36a97
AD
4233
4234 gfx_v8_0_rlc_stop(adev);
4235
4236 /* disable CG */
6ae81452
AD
4237 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4238 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4239 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4240 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4241 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
4242 adev->asic_type == CHIP_POLARIS10 ||
4243 adev->asic_type == CHIP_POLARIS12) {
6ae81452
AD
4244 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4245 tmp &= ~0x3;
4246 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4247 }
aaa36a97
AD
4248
4249 /* disable PG */
4250 WREG32(mmRLC_PG_CNTL, 0);
4251
4252 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4253 gfx_v8_0_init_pg(adev);
4254
e61710c5 4255 if (!adev->pp_enabled) {
e635ee07 4256 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
4257 /* legacy rlc firmware loading */
4258 r = gfx_v8_0_rlc_load_microcode(adev);
4259 if (r)
4260 return r;
4261 } else {
4262 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4263 AMDGPU_UCODE_ID_RLC_G);
4264 if (r)
4265 return -EINVAL;
4266 }
aaa36a97
AD
4267 }
4268
4269 gfx_v8_0_rlc_start(adev);
4270
4271 return 0;
4272}
4273
4274static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4275{
4276 int i;
4277 u32 tmp = RREG32(mmCP_ME_CNTL);
4278
4279 if (enable) {
4280 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4281 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4282 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4283 } else {
4284 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4285 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4286 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4287 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4288 adev->gfx.gfx_ring[i].ready = false;
4289 }
4290 WREG32(mmCP_ME_CNTL, tmp);
4291 udelay(50);
4292}
4293
4294static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4295{
4296 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4297 const struct gfx_firmware_header_v1_0 *ce_hdr;
4298 const struct gfx_firmware_header_v1_0 *me_hdr;
4299 const __le32 *fw_data;
4300 unsigned i, fw_size;
4301
4302 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4303 return -EINVAL;
4304
4305 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4306 adev->gfx.pfp_fw->data;
4307 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4308 adev->gfx.ce_fw->data;
4309 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4310 adev->gfx.me_fw->data;
4311
4312 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4313 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4314 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4315
4316 gfx_v8_0_cp_gfx_enable(adev, false);
4317
4318 /* PFP */
4319 fw_data = (const __le32 *)
4320 (adev->gfx.pfp_fw->data +
4321 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4322 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4323 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4324 for (i = 0; i < fw_size; i++)
4325 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4326 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4327
4328 /* CE */
4329 fw_data = (const __le32 *)
4330 (adev->gfx.ce_fw->data +
4331 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4332 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4333 WREG32(mmCP_CE_UCODE_ADDR, 0);
4334 for (i = 0; i < fw_size; i++)
4335 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4336 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4337
4338 /* ME */
4339 fw_data = (const __le32 *)
4340 (adev->gfx.me_fw->data +
4341 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4342 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4343 WREG32(mmCP_ME_RAM_WADDR, 0);
4344 for (i = 0; i < fw_size; i++)
4345 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4346 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4347
4348 return 0;
4349}
4350
4351static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4352{
4353 u32 count = 0;
4354 const struct cs_section_def *sect = NULL;
4355 const struct cs_extent_def *ext = NULL;
4356
4357 /* begin clear state */
4358 count += 2;
4359 /* context control state */
4360 count += 3;
4361
4362 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4363 for (ext = sect->section; ext->extent != NULL; ++ext) {
4364 if (sect->id == SECT_CONTEXT)
4365 count += 2 + ext->reg_count;
4366 else
4367 return 0;
4368 }
4369 }
4370 /* pa_sc_raster_config/pa_sc_raster_config1 */
4371 count += 4;
4372 /* end clear state */
4373 count += 2;
4374 /* clear state */
4375 count += 2;
4376
4377 return count;
4378}
4379
4380static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4381{
4382 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4383 const struct cs_section_def *sect = NULL;
4384 const struct cs_extent_def *ext = NULL;
4385 int r, i;
4386
4387 /* init the CP */
4388 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4389 WREG32(mmCP_ENDIAN_SWAP, 0);
4390 WREG32(mmCP_DEVICE_ID, 1);
4391
4392 gfx_v8_0_cp_gfx_enable(adev, true);
4393
a27de35c 4394 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4395 if (r) {
4396 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4397 return r;
4398 }
4399
4400 /* clear state buffer */
4401 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4402 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4403
4404 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4405 amdgpu_ring_write(ring, 0x80000000);
4406 amdgpu_ring_write(ring, 0x80000000);
4407
4408 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4409 for (ext = sect->section; ext->extent != NULL; ++ext) {
4410 if (sect->id == SECT_CONTEXT) {
4411 amdgpu_ring_write(ring,
4412 PACKET3(PACKET3_SET_CONTEXT_REG,
4413 ext->reg_count));
4414 amdgpu_ring_write(ring,
4415 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4416 for (i = 0; i < ext->reg_count; i++)
4417 amdgpu_ring_write(ring, ext->extent[i]);
4418 }
4419 }
4420 }
4421
4422 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4423 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4424 switch (adev->asic_type) {
4425 case CHIP_TONGA:
2cc0c0b5 4426 case CHIP_POLARIS10:
aaa36a97
AD
4427 amdgpu_ring_write(ring, 0x16000012);
4428 amdgpu_ring_write(ring, 0x0000002A);
4429 break;
2cc0c0b5 4430 case CHIP_POLARIS11:
c4642a47 4431 case CHIP_POLARIS12:
68182d90
FC
4432 amdgpu_ring_write(ring, 0x16000012);
4433 amdgpu_ring_write(ring, 0x00000000);
4434 break;
fa676048
FC
4435 case CHIP_FIJI:
4436 amdgpu_ring_write(ring, 0x3a00161a);
4437 amdgpu_ring_write(ring, 0x0000002e);
4438 break;
aaa36a97
AD
4439 case CHIP_CARRIZO:
4440 amdgpu_ring_write(ring, 0x00000002);
4441 amdgpu_ring_write(ring, 0x00000000);
4442 break;
d1a7f7aa
KW
4443 case CHIP_TOPAZ:
4444 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4445 0x00000000 : 0x00000002);
4446 amdgpu_ring_write(ring, 0x00000000);
4447 break;
e3c7656c
SL
4448 case CHIP_STONEY:
4449 amdgpu_ring_write(ring, 0x00000000);
4450 amdgpu_ring_write(ring, 0x00000000);
4451 break;
aaa36a97
AD
4452 default:
4453 BUG();
4454 }
4455
4456 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4457 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4458
4459 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4460 amdgpu_ring_write(ring, 0);
4461
4462 /* init the CE partitions */
4463 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4464 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4465 amdgpu_ring_write(ring, 0x8000);
4466 amdgpu_ring_write(ring, 0x8000);
4467
a27de35c 4468 amdgpu_ring_commit(ring);
aaa36a97
AD
4469
4470 return 0;
4471}
4472
4473static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4474{
4475 struct amdgpu_ring *ring;
4476 u32 tmp;
4477 u32 rb_bufsz;
42e8cb50 4478 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4479 int r;
4480
4481 /* Set the write pointer delay */
4482 WREG32(mmCP_RB_WPTR_DELAY, 0);
4483
4484 /* set the RB to use vmid 0 */
4485 WREG32(mmCP_RB_VMID, 0);
4486
4487 /* Set ring buffer size */
4488 ring = &adev->gfx.gfx_ring[0];
4489 rb_bufsz = order_base_2(ring->ring_size / 8);
4490 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4491 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4493 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4494#ifdef __BIG_ENDIAN
4495 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4496#endif
4497 WREG32(mmCP_RB0_CNTL, tmp);
4498
4499 /* Initialize the ring buffer's read and write pointers */
4500 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4501 ring->wptr = 0;
536fbf94 4502 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4503
4504 /* set the wb address wether it's enabled or not */
4505 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4506 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4507 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4508
42e8cb50
FM
4509 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4510 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4511 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4512 mdelay(1);
4513 WREG32(mmCP_RB0_CNTL, tmp);
4514
4515 rb_addr = ring->gpu_addr >> 8;
4516 WREG32(mmCP_RB0_BASE, rb_addr);
4517 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4518
4519 /* no gfx doorbells on iceland */
4520 if (adev->asic_type != CHIP_TOPAZ) {
4521 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4522 if (ring->use_doorbell) {
4523 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4524 DOORBELL_OFFSET, ring->doorbell_index);
68182d90
FC
4525 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4526 DOORBELL_HIT, 0);
aaa36a97
AD
4527 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4528 DOORBELL_EN, 1);
4529 } else {
4530 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4531 DOORBELL_EN, 0);
4532 }
4533 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4534
4535 if (adev->asic_type == CHIP_TONGA) {
4536 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4537 DOORBELL_RANGE_LOWER,
4538 AMDGPU_DOORBELL_GFX_RING0);
4539 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4540
4541 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4542 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4543 }
4544
4545 }
4546
4547 /* start the ring */
f6bd7942 4548 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4549 gfx_v8_0_cp_gfx_start(adev);
4550 ring->ready = true;
4551 r = amdgpu_ring_test_ring(ring);
5003f278 4552 if (r)
aaa36a97 4553 ring->ready = false;
aaa36a97 4554
5003f278 4555 return r;
aaa36a97
AD
4556}
4557
4558static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4559{
4560 int i;
4561
4562 if (enable) {
4563 WREG32(mmCP_MEC_CNTL, 0);
4564 } else {
4565 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4566 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4567 adev->gfx.compute_ring[i].ready = false;
fcf17a43 4568 adev->gfx.kiq.ring.ready = false;
aaa36a97
AD
4569 }
4570 udelay(50);
4571}
4572
aaa36a97
AD
4573static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4574{
4575 const struct gfx_firmware_header_v1_0 *mec_hdr;
4576 const __le32 *fw_data;
4577 unsigned i, fw_size;
4578
4579 if (!adev->gfx.mec_fw)
4580 return -EINVAL;
4581
4582 gfx_v8_0_cp_compute_enable(adev, false);
4583
4584 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4585 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4586
4587 fw_data = (const __le32 *)
4588 (adev->gfx.mec_fw->data +
4589 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4590 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4591
4592 /* MEC1 */
4593 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4594 for (i = 0; i < fw_size; i++)
4595 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4596 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4597
4598 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4599 if (adev->gfx.mec2_fw) {
4600 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4601
4602 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4603 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4604
4605 fw_data = (const __le32 *)
4606 (adev->gfx.mec2_fw->data +
4607 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4608 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4609
4610 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4611 for (i = 0; i < fw_size; i++)
4612 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4613 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4614 }
4615
4616 return 0;
4617}
4618
aaa36a97
AD
4619static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4620{
4621 int i, r;
4622
4623 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4624 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4625
4626 if (ring->mqd_obj) {
4627 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4628 if (unlikely(r != 0))
4629 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4630
4631 amdgpu_bo_unpin(ring->mqd_obj);
4632 amdgpu_bo_unreserve(ring->mqd_obj);
4633
4634 amdgpu_bo_unref(&ring->mqd_obj);
4635 ring->mqd_obj = NULL;
f3972b53
ML
4636 ring->mqd_ptr = NULL;
4637 ring->mqd_gpu_addr = 0;
aaa36a97
AD
4638 }
4639 }
4640}
4641
4e638ae9
XY
4642/* KIQ functions */
4643static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4644{
4645 uint32_t tmp;
4646 struct amdgpu_device *adev = ring->adev;
4647
4648 /* tell RLC which is KIQ queue */
4649 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4650 tmp &= 0xffffff00;
4651 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4652 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4653 tmp |= 0x80;
4654 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4655}
4656
4657static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4658{
4659 amdgpu_ring_alloc(ring, 8);
4660 /* set resources */
4661 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4662 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4663 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
4664 amdgpu_ring_write(ring, 0); /* queue mask hi */
4665 amdgpu_ring_write(ring, 0); /* gws mask lo */
4666 amdgpu_ring_write(ring, 0); /* gws mask hi */
4667 amdgpu_ring_write(ring, 0); /* oac mask */
4668 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
4669 amdgpu_ring_commit(ring);
4670 udelay(50);
4671}
4672
4673static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4674 struct amdgpu_ring *ring)
4675{
4676 struct amdgpu_device *adev = kiq_ring->adev;
4677 uint64_t mqd_addr, wptr_addr;
4678
4679 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4680 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4681 amdgpu_ring_alloc(kiq_ring, 8);
4682
4683 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4684 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4685 amdgpu_ring_write(kiq_ring, 0x21010000);
4686 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4687 (ring->queue << 26) |
4688 (ring->pipe << 29) |
4689 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4690 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4691 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4692 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4693 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4694 amdgpu_ring_commit(kiq_ring);
4695 udelay(50);
4696}
4697
a2140e00 4698static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4e638ae9 4699{
015c2360 4700 struct amdgpu_device *adev = ring->adev;
a2140e00 4701 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4702 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4703 uint32_t tmp;
4704
4705 mqd->header = 0xC0310800;
4706 mqd->compute_pipelinestat_enable = 0x00000001;
4707 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4708 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4709 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4710 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4711 mqd->compute_misc_reserved = 0x00000003;
4712
34534610 4713 eop_base_addr = ring->eop_gpu_addr >> 8;
4e638ae9
XY
4714 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4715 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4716
4717 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4718 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4719 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4720 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4721
4722 mqd->cp_hqd_eop_control = tmp;
4723
4724 /* enable doorbell? */
bb215962
TSD
4725 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4726 CP_HQD_PQ_DOORBELL_CONTROL,
4727 DOORBELL_EN,
4728 ring->use_doorbell ? 1 : 0);
4e638ae9
XY
4729
4730 mqd->cp_hqd_pq_doorbell_control = tmp;
4731
4732 /* disable the queue if it's active */
4733 mqd->cp_hqd_dequeue_request = 0;
4734 mqd->cp_hqd_pq_rptr = 0;
4735 mqd->cp_hqd_pq_wptr = 0;
4736
4737 /* set the pointer to the MQD */
015c2360
AD
4738 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4739 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4740
4741 /* set MQD vmid to 0 */
4742 tmp = RREG32(mmCP_MQD_CONTROL);
4743 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4744 mqd->cp_mqd_control = tmp;
4745
4746 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4747 hqd_gpu_addr = ring->gpu_addr >> 8;
4748 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4749 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4750
4751 /* set up the HQD, this is similar to CP_RB0_CNTL */
4752 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4753 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4754 (order_base_2(ring->ring_size / 4) - 1));
4755 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4756 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4757#ifdef __BIG_ENDIAN
4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4759#endif
4760 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4762 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4763 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4764 mqd->cp_hqd_pq_control = tmp;
4765
4766 /* set the wb address whether it's enabled or not */
4767 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4768 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4769 mqd->cp_hqd_pq_rptr_report_addr_hi =
4770 upper_32_bits(wb_gpu_addr) & 0xffff;
4771
4772 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4773 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4774 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4775 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4776
4777 tmp = 0;
4778 /* enable the doorbell if requested */
4779 if (ring->use_doorbell) {
4780 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4781 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4782 DOORBELL_OFFSET, ring->doorbell_index);
4783
4784 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4785 DOORBELL_EN, 1);
4786 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4787 DOORBELL_SOURCE, 0);
4788 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4789 DOORBELL_HIT, 0);
4790 }
4791
4792 mqd->cp_hqd_pq_doorbell_control = tmp;
4793
4794 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4795 ring->wptr = 0;
4796 mqd->cp_hqd_pq_wptr = ring->wptr;
4797 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4798
4799 /* set the vmid for the queue */
4800 mqd->cp_hqd_vmid = 0;
4801
4802 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4803 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4804 mqd->cp_hqd_persistent_state = tmp;
4805
4806 /* activate the queue */
4807 mqd->cp_hqd_active = 1;
4808
4809 return 0;
4810}
4811
a2140e00 4812static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
4e638ae9 4813{
015c2360 4814 struct amdgpu_device *adev = ring->adev;
a2140e00 4815 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4816 int j;
4817
4818 /* disable wptr polling */
0ac642c5 4819 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4e638ae9
XY
4820
4821 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4822 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4823
4824 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4825 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4826
4827 /* enable doorbell? */
4828 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4829
4830 /* disable the queue if it's active */
699d12b7 4831 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4e638ae9
XY
4832 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4833 for (j = 0; j < adev->usec_timeout; j++) {
699d12b7 4834 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4e638ae9
XY
4835 break;
4836 udelay(1);
4837 }
4838 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4839 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4840 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4841 }
4842
4843 /* set the pointer to the MQD */
4844 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4845 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4846
4847 /* set MQD vmid to 0 */
4848 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4849
4850 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4851 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4852 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4853
4854 /* set up the HQD, this is similar to CP_RB0_CNTL */
4855 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4856
4857 /* set the wb address whether it's enabled or not */
4858 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4859 mqd->cp_hqd_pq_rptr_report_addr_lo);
4860 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4861 mqd->cp_hqd_pq_rptr_report_addr_hi);
4862
4863 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4864 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4865 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4866
4867 /* enable the doorbell if requested */
4868 if (ring->use_doorbell) {
4869 if ((adev->asic_type == CHIP_CARRIZO) ||
4870 (adev->asic_type == CHIP_FIJI) ||
4871 (adev->asic_type == CHIP_STONEY)) {
4872 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4873 AMDGPU_DOORBELL_KIQ << 2);
4874 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4875 AMDGPU_DOORBELL_MEC_RING7 << 2);
4876 }
4877 }
4878 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4879
4880 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4881 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4882
4883 /* set the vmid for the queue */
4884 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4885
4886 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4887
4888 /* activate the queue */
4889 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4890
0ac642c5
TSD
4891 if (ring->use_doorbell)
4892 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4e638ae9
XY
4893
4894 return 0;
4895}
4896
a2140e00 4897static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4898{
4899 struct amdgpu_device *adev = ring->adev;
4900 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
a2140e00 4901 struct vi_mqd *mqd = ring->mqd_ptr;
2da4da3c 4902 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
1fb37a3d 4903 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9
XY
4904
4905 if (is_kiq) {
4e638ae9 4906 gfx_v8_0_kiq_setting(&kiq->ring);
1fb37a3d 4907 } else {
1fb37a3d
ML
4908 mqd_idx = ring - &adev->gfx.compute_ring[0];
4909 }
4e638ae9 4910
1fb37a3d
ML
4911 if (!adev->gfx.in_reset) {
4912 memset((void *)mqd, 0, sizeof(*mqd));
4913 mutex_lock(&adev->srbm_mutex);
4914 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
a2140e00 4915 gfx_v8_0_mqd_init(ring);
1fb37a3d 4916 if (is_kiq)
a2140e00 4917 gfx_v8_0_kiq_init_register(ring);
1fb37a3d
ML
4918 vi_srbm_select(adev, 0, 0, 0, 0);
4919 mutex_unlock(&adev->srbm_mutex);
4920
4921 if (adev->gfx.mec.mqd_backup[mqd_idx])
4922 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4923 } else { /* for GPU_RESET case */
4924 /* reset MQD to a clean status */
4925 if (adev->gfx.mec.mqd_backup[mqd_idx])
4926 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4927
4928 /* reset ring buffer */
4929 ring->wptr = 0;
4930 amdgpu_ring_clear_ring(ring);
4931
4932 if (is_kiq) {
4933 mutex_lock(&adev->srbm_mutex);
4934 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
a2140e00 4935 gfx_v8_0_kiq_init_register(ring);
1fb37a3d
ML
4936 vi_srbm_select(adev, 0, 0, 0, 0);
4937 mutex_unlock(&adev->srbm_mutex);
4938 }
4939 }
4e638ae9
XY
4940
4941 if (is_kiq)
4942 gfx_v8_0_kiq_enable(ring);
4943 else
4944 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4945
4946 return 0;
4947}
4948
596c67d0 4949static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
4950{
4951 struct amdgpu_ring *ring = NULL;
596c67d0 4952 int r = 0, i;
4e638ae9 4953
596c67d0 4954 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
4955
4956 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
4957
4958 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4959 if (unlikely(r != 0))
4960 goto done;
4961
4962 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4963 if (!r) {
a2140e00 4964 r = gfx_v8_0_kiq_init_queue(ring);
596c67d0 4965 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4966 ring->mqd_ptr = NULL;
4e638ae9 4967 }
6a6f380f
AD
4968 amdgpu_bo_unreserve(ring->mqd_obj);
4969 if (r)
4970 goto done;
4e638ae9 4971
2e263c82
AD
4972 ring->ready = true;
4973 r = amdgpu_ring_test_ring(ring);
4974 if (r) {
4975 ring->ready = false;
4976 goto done;
4977 }
4978
4e638ae9
XY
4979 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4980 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
4981
4982 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4983 if (unlikely(r != 0))
4984 goto done;
4985 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4986 if (!r) {
a2140e00 4987 r = gfx_v8_0_kiq_init_queue(ring);
596c67d0 4988 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4989 ring->mqd_ptr = NULL;
596c67d0 4990 }
6a6f380f
AD
4991 amdgpu_bo_unreserve(ring->mqd_obj);
4992 if (r)
4993 goto done;
4e638ae9
XY
4994
4995 ring->ready = true;
4996 r = amdgpu_ring_test_ring(ring);
4997 if (r)
4998 ring->ready = false;
4999 }
5000
6a6f380f
AD
5001done:
5002 return r;
4e638ae9
XY
5003}
5004
aaa36a97
AD
5005static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5006{
5007 int r, i, j;
5008 u32 tmp;
5009 bool use_doorbell = true;
5010 u64 hqd_gpu_addr;
5011 u64 mqd_gpu_addr;
5012 u64 eop_gpu_addr;
5013 u64 wb_gpu_addr;
5014 u32 *buf;
5015 struct vi_mqd *mqd;
5016
ad3b9614 5017 /* init the queues. */
aaa36a97
AD
5018 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5019 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5020
5021 if (ring->mqd_obj == NULL) {
5022 r = amdgpu_bo_create(adev,
5023 sizeof(struct vi_mqd),
5024 PAGE_SIZE, true,
5025 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 5026 NULL, &ring->mqd_obj);
aaa36a97
AD
5027 if (r) {
5028 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5029 return r;
5030 }
5031 }
5032
5033 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5034 if (unlikely(r != 0)) {
5035 gfx_v8_0_cp_compute_fini(adev);
5036 return r;
5037 }
5038 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5039 &mqd_gpu_addr);
5040 if (r) {
5041 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5042 gfx_v8_0_cp_compute_fini(adev);
5043 return r;
5044 }
5045 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5046 if (r) {
5047 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5048 gfx_v8_0_cp_compute_fini(adev);
5049 return r;
5050 }
5051
5052 /* init the mqd struct */
5053 memset(buf, 0, sizeof(struct vi_mqd));
5054
5055 mqd = (struct vi_mqd *)buf;
5056 mqd->header = 0xC0310800;
5057 mqd->compute_pipelinestat_enable = 0x00000001;
5058 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5059 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5060 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5061 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5062 mqd->compute_misc_reserved = 0x00000003;
5063
5064 mutex_lock(&adev->srbm_mutex);
5065 vi_srbm_select(adev, ring->me,
5066 ring->pipe,
5067 ring->queue, 0);
5068
ad3b9614
AD
5069 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5070 eop_gpu_addr >>= 8;
5071
5072 /* write the EOP addr */
5073 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5074 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5075
5076 /* set the VMID assigned */
5077 WREG32(mmCP_HQD_VMID, 0);
5078
5079 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5080 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5081 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5082 (order_base_2(MEC_HPD_SIZE / 4) - 1));
5083 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5084
aaa36a97
AD
5085 /* disable wptr polling */
5086 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5087 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5088 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5089
5090 mqd->cp_hqd_eop_base_addr_lo =
5091 RREG32(mmCP_HQD_EOP_BASE_ADDR);
5092 mqd->cp_hqd_eop_base_addr_hi =
5093 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5094
5095 /* enable doorbell? */
5096 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5097 if (use_doorbell) {
5098 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5099 } else {
5100 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5101 }
5102 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5103 mqd->cp_hqd_pq_doorbell_control = tmp;
5104
5105 /* disable the queue if it's active */
5106 mqd->cp_hqd_dequeue_request = 0;
5107 mqd->cp_hqd_pq_rptr = 0;
5108 mqd->cp_hqd_pq_wptr= 0;
5109 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5110 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5111 for (j = 0; j < adev->usec_timeout; j++) {
5112 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5113 break;
5114 udelay(1);
5115 }
5116 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5117 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5118 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5119 }
5120
5121 /* set the pointer to the MQD */
5122 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5123 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5124 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5125 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5126
5127 /* set MQD vmid to 0 */
5128 tmp = RREG32(mmCP_MQD_CONTROL);
5129 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5130 WREG32(mmCP_MQD_CONTROL, tmp);
5131 mqd->cp_mqd_control = tmp;
5132
5133 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5134 hqd_gpu_addr = ring->gpu_addr >> 8;
5135 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5136 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5137 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5138 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5139
5140 /* set up the HQD, this is similar to CP_RB0_CNTL */
5141 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5142 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5143 (order_base_2(ring->ring_size / 4) - 1));
5144 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5145 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5146#ifdef __BIG_ENDIAN
5147 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5148#endif
5149 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5150 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5151 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5152 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5153 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5154 mqd->cp_hqd_pq_control = tmp;
5155
5156 /* set the wb address wether it's enabled or not */
5157 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5158 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5159 mqd->cp_hqd_pq_rptr_report_addr_hi =
5160 upper_32_bits(wb_gpu_addr) & 0xffff;
5161 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5162 mqd->cp_hqd_pq_rptr_report_addr_lo);
5163 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5164 mqd->cp_hqd_pq_rptr_report_addr_hi);
5165
5166 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5167 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
aeab2032 5168 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
aaa36a97 5169 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
aeab2032 5170 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
aaa36a97
AD
5171 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5172 mqd->cp_hqd_pq_wptr_poll_addr_hi);
5173
5174 /* enable the doorbell if requested */
5175 if (use_doorbell) {
bddf8026 5176 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c 5177 (adev->asic_type == CHIP_FIJI) ||
68182d90 5178 (adev->asic_type == CHIP_STONEY) ||
2cc0c0b5 5179 (adev->asic_type == CHIP_POLARIS11) ||
c4642a47
JZ
5180 (adev->asic_type == CHIP_POLARIS10) ||
5181 (adev->asic_type == CHIP_POLARIS12)) {
aaa36a97
AD
5182 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5183 AMDGPU_DOORBELL_KIQ << 2);
5184 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 5185 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
5186 }
5187 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5188 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5189 DOORBELL_OFFSET, ring->doorbell_index);
5190 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5192 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5193 mqd->cp_hqd_pq_doorbell_control = tmp;
5194
5195 } else {
5196 mqd->cp_hqd_pq_doorbell_control = 0;
5197 }
5198 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5199 mqd->cp_hqd_pq_doorbell_control);
5200
845253e7
SJ
5201 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5202 ring->wptr = 0;
536fbf94 5203 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
845253e7
SJ
5204 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5205 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5206
aaa36a97
AD
5207 /* set the vmid for the queue */
5208 mqd->cp_hqd_vmid = 0;
5209 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5210
5211 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5212 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5213 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5214 mqd->cp_hqd_persistent_state = tmp;
68182d90 5215 if (adev->asic_type == CHIP_STONEY ||
2cc0c0b5 5216 adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
5217 adev->asic_type == CHIP_POLARIS10 ||
5218 adev->asic_type == CHIP_POLARIS12) {
3b55ddad
FC
5219 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5220 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5221 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5222 }
aaa36a97
AD
5223
5224 /* activate the queue */
5225 mqd->cp_hqd_active = 1;
5226 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5227
5228 vi_srbm_select(adev, 0, 0, 0, 0);
5229 mutex_unlock(&adev->srbm_mutex);
5230
5231 amdgpu_bo_kunmap(ring->mqd_obj);
5232 amdgpu_bo_unreserve(ring->mqd_obj);
5233 }
5234
5235 if (use_doorbell) {
5236 tmp = RREG32(mmCP_PQ_STATUS);
5237 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5238 WREG32(mmCP_PQ_STATUS, tmp);
5239 }
5240
6e9821b2 5241 gfx_v8_0_cp_compute_enable(adev, true);
aaa36a97
AD
5242
5243 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5244 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5245
5246 ring->ready = true;
5247 r = amdgpu_ring_test_ring(ring);
5248 if (r)
5249 ring->ready = false;
5250 }
5251
5252 return 0;
5253}
5254
5255static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5256{
5257 int r;
5258
e3c7656c 5259 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5260 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5261
e61710c5 5262 if (!adev->pp_enabled) {
e635ee07 5263 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
5264 /* legacy firmware loading */
5265 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5266 if (r)
5267 return r;
aaa36a97 5268
ba5c2a87
RZ
5269 r = gfx_v8_0_cp_compute_load_microcode(adev);
5270 if (r)
5271 return r;
5272 } else {
5273 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5274 AMDGPU_UCODE_ID_CP_CE);
5275 if (r)
5276 return -EINVAL;
5277
5278 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5279 AMDGPU_UCODE_ID_CP_PFP);
5280 if (r)
5281 return -EINVAL;
5282
5283 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5284 AMDGPU_UCODE_ID_CP_ME);
5285 if (r)
5286 return -EINVAL;
5287
951e0962
AD
5288 if (adev->asic_type == CHIP_TOPAZ) {
5289 r = gfx_v8_0_cp_compute_load_microcode(adev);
5290 if (r)
5291 return r;
5292 } else {
5293 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5294 AMDGPU_UCODE_ID_CP_MEC1);
5295 if (r)
5296 return -EINVAL;
5297 }
ba5c2a87 5298 }
aaa36a97
AD
5299 }
5300
5301 r = gfx_v8_0_cp_gfx_resume(adev);
5302 if (r)
5303 return r;
5304
4e638ae9
XY
5305 if (amdgpu_sriov_vf(adev))
5306 r = gfx_v8_0_kiq_resume(adev);
5307 else
5308 r = gfx_v8_0_cp_compute_resume(adev);
aaa36a97
AD
5309 if (r)
5310 return r;
5311
5312 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5313
5314 return 0;
5315}
5316
5317static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5318{
5319 gfx_v8_0_cp_gfx_enable(adev, enable);
5320 gfx_v8_0_cp_compute_enable(adev, enable);
5321}
5322
5fc3aeeb 5323static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5324{
5325 int r;
5fc3aeeb 5326 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5327
5328 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5329 gfx_v8_0_gpu_init(adev);
5330
5331 r = gfx_v8_0_rlc_resume(adev);
5332 if (r)
5333 return r;
5334
5335 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5336
5337 return r;
5338}
5339
5fc3aeeb 5340static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5341{
5fc3aeeb 5342 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5343
1d22a454
AD
5344 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5345 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
84f3f05b
XY
5346 if (amdgpu_sriov_vf(adev)) {
5347 pr_debug("For SRIOV client, shouldn't do anything.\n");
5348 return 0;
5349 }
aaa36a97
AD
5350 gfx_v8_0_cp_enable(adev, false);
5351 gfx_v8_0_rlc_stop(adev);
5352 gfx_v8_0_cp_compute_fini(adev);
5353
62a86fc2
EH
5354 amdgpu_set_powergating_state(adev,
5355 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5356
aaa36a97
AD
5357 return 0;
5358}
5359
5fc3aeeb 5360static int gfx_v8_0_suspend(void *handle)
aaa36a97 5361{
5fc3aeeb 5362 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5363
aaa36a97
AD
5364 return gfx_v8_0_hw_fini(adev);
5365}
5366
5fc3aeeb 5367static int gfx_v8_0_resume(void *handle)
aaa36a97 5368{
5fc3aeeb 5369 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5370
aaa36a97
AD
5371 return gfx_v8_0_hw_init(adev);
5372}
5373
5fc3aeeb 5374static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5375{
5fc3aeeb 5376 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5377
aaa36a97
AD
5378 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5379 return false;
5380 else
5381 return true;
5382}
5383
5fc3aeeb 5384static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5385{
5386 unsigned i;
5fc3aeeb 5387 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5388
5389 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5390 if (gfx_v8_0_is_idle(handle))
aaa36a97 5391 return 0;
5003f278 5392
aaa36a97
AD
5393 udelay(1);
5394 }
5395 return -ETIMEDOUT;
5396}
5397
da146d3b 5398static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5399{
3d7c6384 5400 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5401 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5402 u32 tmp;
5403
5404 /* GRBM_STATUS */
5405 tmp = RREG32(mmGRBM_STATUS);
5406 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5407 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5408 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5409 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5410 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5411 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5412 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5413 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5414 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5415 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5416 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5417 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5418 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5419 }
5420
5421 /* GRBM_STATUS2 */
5422 tmp = RREG32(mmGRBM_STATUS2);
5423 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5424 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5425 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5426
3d7c6384
CZ
5427 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5428 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5429 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5430 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5431 SOFT_RESET_CPF, 1);
5432 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5433 SOFT_RESET_CPC, 1);
5434 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5435 SOFT_RESET_CPG, 1);
5436 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5437 SOFT_RESET_GRBM, 1);
5438 }
5439
aaa36a97
AD
5440 /* SRBM_STATUS */
5441 tmp = RREG32(mmSRBM_STATUS);
5442 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5443 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5444 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5445 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5446 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5447 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5448
5449 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5450 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5451 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5452 return true;
3d7c6384 5453 } else {
3d7c6384
CZ
5454 adev->gfx.grbm_soft_reset = 0;
5455 adev->gfx.srbm_soft_reset = 0;
da146d3b 5456 return false;
3d7c6384 5457 }
3d7c6384 5458}
aaa36a97 5459
1057f20c
CZ
5460static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5461 struct amdgpu_ring *ring)
5462{
5463 int i;
5464
d1a5b250 5465 mutex_lock(&adev->srbm_mutex);
1057f20c
CZ
5466 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5467 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
35e259d5 5468 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
1057f20c
CZ
5469 for (i = 0; i < adev->usec_timeout; i++) {
5470 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5471 break;
5472 udelay(1);
5473 }
5474 }
d1a5b250
TSD
5475 vi_srbm_select(adev, 0, 0, 0, 0);
5476 mutex_unlock(&adev->srbm_mutex);
1057f20c
CZ
5477}
5478
5479static int gfx_v8_0_pre_soft_reset(void *handle)
5480{
5481 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5482 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5483
da146d3b
AD
5484 if ((!adev->gfx.grbm_soft_reset) &&
5485 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5486 return 0;
5487
5488 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5489 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5490
5491 /* stop the rlc */
5492 gfx_v8_0_rlc_stop(adev);
5493
5494 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5495 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5496 /* Disable GFX parsing/prefetching */
5497 gfx_v8_0_cp_gfx_enable(adev, false);
5498
1057f20c
CZ
5499 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5500 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5501 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5502 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5503 int i;
5504
5505 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5506 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5507
5508 gfx_v8_0_inactive_hqd(adev, ring);
5509 }
aaa36a97 5510 /* Disable MEC parsing/prefetching */
7776a693 5511 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5512 }
7776a693 5513
1057f20c
CZ
5514 return 0;
5515}
7776a693 5516
3d7c6384
CZ
5517static int gfx_v8_0_soft_reset(void *handle)
5518{
5519 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5520 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5521 u32 tmp;
aaa36a97 5522
da146d3b
AD
5523 if ((!adev->gfx.grbm_soft_reset) &&
5524 (!adev->gfx.srbm_soft_reset))
3d7c6384 5525 return 0;
aaa36a97 5526
3d7c6384
CZ
5527 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5528 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5529
3d7c6384
CZ
5530 if (grbm_soft_reset || srbm_soft_reset) {
5531 tmp = RREG32(mmGMCON_DEBUG);
5532 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5533 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5534 WREG32(mmGMCON_DEBUG, tmp);
5535 udelay(50);
5536 }
aaa36a97 5537
3d7c6384
CZ
5538 if (grbm_soft_reset) {
5539 tmp = RREG32(mmGRBM_SOFT_RESET);
5540 tmp |= grbm_soft_reset;
5541 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5542 WREG32(mmGRBM_SOFT_RESET, tmp);
5543 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5544
3d7c6384 5545 udelay(50);
aaa36a97 5546
3d7c6384
CZ
5547 tmp &= ~grbm_soft_reset;
5548 WREG32(mmGRBM_SOFT_RESET, tmp);
5549 tmp = RREG32(mmGRBM_SOFT_RESET);
5550 }
7776a693 5551
3d7c6384
CZ
5552 if (srbm_soft_reset) {
5553 tmp = RREG32(mmSRBM_SOFT_RESET);
5554 tmp |= srbm_soft_reset;
5555 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5556 WREG32(mmSRBM_SOFT_RESET, tmp);
5557 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5558
aaa36a97 5559 udelay(50);
7776a693 5560
3d7c6384
CZ
5561 tmp &= ~srbm_soft_reset;
5562 WREG32(mmSRBM_SOFT_RESET, tmp);
5563 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5564 }
7776a693 5565
3d7c6384
CZ
5566 if (grbm_soft_reset || srbm_soft_reset) {
5567 tmp = RREG32(mmGMCON_DEBUG);
5568 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5569 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5570 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5571 }
3d7c6384
CZ
5572
5573 /* Wait a little for things to settle down */
5574 udelay(50);
5575
aaa36a97
AD
5576 return 0;
5577}
5578
e4ae0fc3
CZ
5579static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5580 struct amdgpu_ring *ring)
5581{
d1a5b250 5582 mutex_lock(&adev->srbm_mutex);
e4ae0fc3
CZ
5583 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5584 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5585 WREG32(mmCP_HQD_PQ_RPTR, 0);
5586 WREG32(mmCP_HQD_PQ_WPTR, 0);
5587 vi_srbm_select(adev, 0, 0, 0, 0);
d1a5b250 5588 mutex_unlock(&adev->srbm_mutex);
e4ae0fc3
CZ
5589}
5590
5591static int gfx_v8_0_post_soft_reset(void *handle)
5592{
5593 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5594 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5595
da146d3b
AD
5596 if ((!adev->gfx.grbm_soft_reset) &&
5597 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5598 return 0;
5599
5600 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5601 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5602
5603 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5604 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5605 gfx_v8_0_cp_gfx_resume(adev);
5606
5607 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5608 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5609 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5610 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5611 int i;
5612
5613 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5614 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5615
5616 gfx_v8_0_init_hqd(adev, ring);
5617 }
5618 gfx_v8_0_cp_compute_resume(adev);
5619 }
5620 gfx_v8_0_rlc_start(adev);
5621
aaa36a97
AD
5622 return 0;
5623}
5624
5625/**
5626 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5627 *
5628 * @adev: amdgpu_device pointer
5629 *
5630 * Fetches a GPU clock counter snapshot.
5631 * Returns the 64 bit clock counter snapshot.
5632 */
b95e31fd 5633static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5634{
5635 uint64_t clock;
5636
5637 mutex_lock(&adev->gfx.gpu_clock_mutex);
5638 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5639 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5640 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5641 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5642 return clock;
5643}
5644
5645static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5646 uint32_t vmid,
5647 uint32_t gds_base, uint32_t gds_size,
5648 uint32_t gws_base, uint32_t gws_size,
5649 uint32_t oa_base, uint32_t oa_size)
5650{
5651 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5652 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5653
5654 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5655 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5656
5657 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5658 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5659
5660 /* GDS Base */
5661 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5663 WRITE_DATA_DST_SEL(0)));
5664 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5665 amdgpu_ring_write(ring, 0);
5666 amdgpu_ring_write(ring, gds_base);
5667
5668 /* GDS Size */
5669 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5670 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5671 WRITE_DATA_DST_SEL(0)));
5672 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5673 amdgpu_ring_write(ring, 0);
5674 amdgpu_ring_write(ring, gds_size);
5675
5676 /* GWS */
5677 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5678 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5679 WRITE_DATA_DST_SEL(0)));
5680 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5681 amdgpu_ring_write(ring, 0);
5682 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5683
5684 /* OA */
5685 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5686 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5687 WRITE_DATA_DST_SEL(0)));
5688 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5689 amdgpu_ring_write(ring, 0);
5690 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5691}
5692
472259f0
TSD
5693static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5694{
bc24fbe9
TSD
5695 WREG32(mmSQ_IND_INDEX,
5696 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5697 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5698 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5699 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5700 return RREG32(mmSQ_IND_DATA);
5701}
5702
c5a60ce8
TSD
5703static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5704 uint32_t wave, uint32_t thread,
5705 uint32_t regno, uint32_t num, uint32_t *out)
5706{
5707 WREG32(mmSQ_IND_INDEX,
5708 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5709 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5710 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5711 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5712 (SQ_IND_INDEX__FORCE_READ_MASK) |
5713 (SQ_IND_INDEX__AUTO_INCR_MASK));
5714 while (num--)
5715 *(out++) = RREG32(mmSQ_IND_DATA);
5716}
5717
472259f0
TSD
5718static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5719{
5720 /* type 0 wave data */
5721 dst[(*no_fields)++] = 0;
5722 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5723 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5724 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5725 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5726 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5727 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5728 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5729 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5730 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5731 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5732 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5733 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5740}
5741
c5a60ce8
TSD
5742static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5743 uint32_t wave, uint32_t start,
5744 uint32_t size, uint32_t *dst)
5745{
5746 wave_read_regs(
5747 adev, simd, wave, 0,
5748 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5749}
5750
472259f0 5751
b95e31fd
AD
5752static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5753 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5754 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5755 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5756 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
b95e31fd
AD
5757};
5758
5fc3aeeb 5759static int gfx_v8_0_early_init(void *handle)
aaa36a97 5760{
5fc3aeeb 5761 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5762
5763 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5764 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
b95e31fd 5765 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5766 gfx_v8_0_set_ring_funcs(adev);
5767 gfx_v8_0_set_irq_funcs(adev);
5768 gfx_v8_0_set_gds_init(adev);
dbff57bc 5769 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5770
5771 return 0;
5772}
5773
ccba7691
AD
5774static int gfx_v8_0_late_init(void *handle)
5775{
5776 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5777 int r;
5778
1d22a454
AD
5779 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5780 if (r)
5781 return r;
5782
5783 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5784 if (r)
5785 return r;
5786
ccba7691
AD
5787 /* requires IBs so do in late init after IB pool is initialized */
5788 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5789 if (r)
5790 return r;
5791
62a86fc2
EH
5792 amdgpu_set_powergating_state(adev,
5793 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5794
ccba7691
AD
5795 return 0;
5796}
5797
c2546f55
AD
5798static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5799 bool enable)
62a86fc2 5800{
c4642a47
JZ
5801 if ((adev->asic_type == CHIP_POLARIS11) ||
5802 (adev->asic_type == CHIP_POLARIS12))
c2546f55
AD
5803 /* Send msg to SMU via Powerplay */
5804 amdgpu_set_powergating_state(adev,
5805 AMD_IP_BLOCK_TYPE_SMC,
5806 enable ?
5807 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5808
61cb8cef 5809 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5810}
5811
c2546f55
AD
5812static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5813 bool enable)
62a86fc2 5814{
61cb8cef 5815 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5816}
5817
2cc0c0b5 5818static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5819 bool enable)
5820{
61cb8cef 5821 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5822}
5823
2c547165
AD
5824static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5825 bool enable)
5826{
61cb8cef 5827 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5828}
5829
5830static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5831 bool enable)
5832{
61cb8cef 5833 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5834
5835 /* Read any GFX register to wake up GFX. */
5836 if (!enable)
61cb8cef 5837 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5838}
5839
5840static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5841 bool enable)
5842{
5843 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5844 cz_enable_gfx_cg_power_gating(adev, true);
5845 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5846 cz_enable_gfx_pipeline_power_gating(adev, true);
5847 } else {
5848 cz_enable_gfx_cg_power_gating(adev, false);
5849 cz_enable_gfx_pipeline_power_gating(adev, false);
5850 }
5851}
5852
5fc3aeeb 5853static int gfx_v8_0_set_powergating_state(void *handle,
5854 enum amd_powergating_state state)
aaa36a97 5855{
62a86fc2 5856 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5857 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5858
ce137c04
ML
5859 if (amdgpu_sriov_vf(adev))
5860 return 0;
5861
62a86fc2 5862 switch (adev->asic_type) {
2c547165
AD
5863 case CHIP_CARRIZO:
5864 case CHIP_STONEY:
ad1830d5 5865
5c964221
RZ
5866 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5867 cz_enable_sck_slow_down_on_power_up(adev, true);
5868 cz_enable_sck_slow_down_on_power_down(adev, true);
5869 } else {
5870 cz_enable_sck_slow_down_on_power_up(adev, false);
5871 cz_enable_sck_slow_down_on_power_down(adev, false);
5872 }
5873 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5874 cz_enable_cp_power_gating(adev, true);
5875 else
5876 cz_enable_cp_power_gating(adev, false);
5877
ad1830d5 5878 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5879
5880 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5881 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5882 else
5883 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5884
5885 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5886 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5887 else
5888 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5889 break;
2cc0c0b5 5890 case CHIP_POLARIS11:
c4642a47 5891 case CHIP_POLARIS12:
7ba0eb6d
AD
5892 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5893 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5894 else
5895 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5896
5897 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5898 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5899 else
5900 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5901
5902 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5903 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5904 else
7ba0eb6d 5905 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5906 break;
5907 default:
5908 break;
5909 }
5910
aaa36a97
AD
5911 return 0;
5912}
5913
ebd843d6
HR
5914static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5915{
5916 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5917 int data;
5918
ce137c04
ML
5919 if (amdgpu_sriov_vf(adev))
5920 *flags = 0;
5921
ebd843d6
HR
5922 /* AMD_CG_SUPPORT_GFX_MGCG */
5923 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5925 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5926
5927 /* AMD_CG_SUPPORT_GFX_CGLG */
5928 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5929 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5930 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5931
5932 /* AMD_CG_SUPPORT_GFX_CGLS */
5933 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5934 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5935
5936 /* AMD_CG_SUPPORT_GFX_CGTS */
5937 data = RREG32(mmCGTS_SM_CTRL_REG);
5938 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5939 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5940
5941 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5942 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5943 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5944
5945 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5946 data = RREG32(mmRLC_MEM_SLP_CNTL);
5947 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5948 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5949
5950 /* AMD_CG_SUPPORT_GFX_CP_LS */
5951 data = RREG32(mmCP_MEM_SLP_CNTL);
5952 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5953 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5954}
5955
79deaaf4 5956static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5957 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5958{
5959 uint32_t data;
5960
9559ef5b 5961 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5962
5963 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5964 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5965
5966 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5967 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5968 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5969 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5970 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5971 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5972 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5973 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5974 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5975 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5976 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5977 else
5978 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5979 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5980 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5981 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5982 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5983 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5984 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5985 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5986 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5987 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5988 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5989 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5990 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5991 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5992 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5993
5994 WREG32(mmRLC_SERDES_WR_CTRL, data);
5995}
5996
dbff57bc
AD
5997#define MSG_ENTER_RLC_SAFE_MODE 1
5998#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
5999#define RLC_GPR_REG2__REQ_MASK 0x00000001
6000#define RLC_GPR_REG2__REQ__SHIFT 0
6001#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6002#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 6003
dbff57bc
AD
6004static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6005{
6006 u32 data;
6007 unsigned i;
6008
6009 data = RREG32(mmRLC_CNTL);
6010 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6011 return;
6012
6013 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6014 data |= RLC_SAFE_MODE__CMD_MASK;
6015 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6016 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6017 WREG32(mmRLC_SAFE_MODE, data);
6018
6019 for (i = 0; i < adev->usec_timeout; i++) {
6020 if ((RREG32(mmRLC_GPM_STAT) &
6021 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6022 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6023 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6024 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6025 break;
6026 udelay(1);
6027 }
6028
6029 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6030 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6031 break;
6032 udelay(1);
6033 }
6034 adev->gfx.rlc.in_safe_mode = true;
6035 }
6036}
6037
6038static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6039{
6040 u32 data = 0;
6041 unsigned i;
6042
6043 data = RREG32(mmRLC_CNTL);
6044 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6045 return;
6046
6047 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6048 if (adev->gfx.rlc.in_safe_mode) {
6049 data |= RLC_SAFE_MODE__CMD_MASK;
6050 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6051 WREG32(mmRLC_SAFE_MODE, data);
6052 adev->gfx.rlc.in_safe_mode = false;
6053 }
6054 }
6055
6056 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6057 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6058 break;
6059 udelay(1);
6060 }
6061}
6062
dbff57bc
AD
6063static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6064 .enter_safe_mode = iceland_enter_rlc_safe_mode,
6065 .exit_safe_mode = iceland_exit_rlc_safe_mode
6066};
6067
dbff57bc
AD
6068static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6069 bool enable)
6e378858
EH
6070{
6071 uint32_t temp, data;
6072
dbff57bc
AD
6073 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6074
6e378858 6075 /* It is disabled by HW by default */
14698b6c
AD
6076 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6077 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 6078 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 6079 /* 1 - RLC memory Light sleep */
61cb8cef 6080 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 6081
61cb8cef
TSD
6082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6083 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 6084 }
6e378858
EH
6085
6086 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6087 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
6088 if (adev->flags & AMD_IS_APU)
6089 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6090 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6091 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6092 else
6093 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6094 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6095 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6096 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
6097
6098 if (temp != data)
6099 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6100
6101 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6102 gfx_v8_0_wait_for_rlc_serdes(adev);
6103
6104 /* 5 - clear mgcg override */
79deaaf4 6105 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 6106
14698b6c
AD
6107 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6108 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6109 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6110 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6111 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6112 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6113 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6114 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6115 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6116 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6117 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6118 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6119 if (temp != data)
6120 WREG32(mmCGTS_SM_CTRL_REG, data);
6121 }
6e378858
EH
6122 udelay(50);
6123
6124 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6125 gfx_v8_0_wait_for_rlc_serdes(adev);
6126 } else {
6127 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6128 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6129 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6130 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6131 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6132 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6133 if (temp != data)
6134 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6135
6136 /* 2 - disable MGLS in RLC */
6137 data = RREG32(mmRLC_MEM_SLP_CNTL);
6138 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6139 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6140 WREG32(mmRLC_MEM_SLP_CNTL, data);
6141 }
6142
6143 /* 3 - disable MGLS in CP */
6144 data = RREG32(mmCP_MEM_SLP_CNTL);
6145 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6146 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6147 WREG32(mmCP_MEM_SLP_CNTL, data);
6148 }
6149
6150 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6151 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6152 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6153 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6154 if (temp != data)
6155 WREG32(mmCGTS_SM_CTRL_REG, data);
6156
6157 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6158 gfx_v8_0_wait_for_rlc_serdes(adev);
6159
6160 /* 6 - set mgcg override */
79deaaf4 6161 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6162
6163 udelay(50);
6164
6165 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6166 gfx_v8_0_wait_for_rlc_serdes(adev);
6167 }
dbff57bc
AD
6168
6169 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
6170}
6171
dbff57bc
AD
6172static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6173 bool enable)
6e378858
EH
6174{
6175 uint32_t temp, temp1, data, data1;
6176
6177 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6178
dbff57bc
AD
6179 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6180
14698b6c 6181 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
6182 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6183 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6184 if (temp1 != data1)
6185 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6186
dd31ae9a 6187 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
6188 gfx_v8_0_wait_for_rlc_serdes(adev);
6189
dd31ae9a 6190 /* 2 - clear cgcg override */
79deaaf4 6191 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
6192
6193 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6194 gfx_v8_0_wait_for_rlc_serdes(adev);
6195
dd31ae9a 6196 /* 3 - write cmd to set CGLS */
79deaaf4 6197 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 6198
dd31ae9a 6199 /* 4 - enable cgcg */
6e378858
EH
6200 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6201
14698b6c
AD
6202 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6203 /* enable cgls*/
6204 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 6205
14698b6c
AD
6206 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6207 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 6208
14698b6c
AD
6209 if (temp1 != data1)
6210 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6211 } else {
6212 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6213 }
6e378858
EH
6214
6215 if (temp != data)
6216 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
6217
6218 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6219 * Cmp_busy/GFX_Idle interrupts
6220 */
6221 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
6222 } else {
6223 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6224 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6225
6226 /* TEST CGCG */
6227 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6228 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6229 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6230 if (temp1 != data1)
6231 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6232
6233 /* read gfx register to wake up cgcg */
6234 RREG32(mmCB_CGTT_SCLK_CTRL);
6235 RREG32(mmCB_CGTT_SCLK_CTRL);
6236 RREG32(mmCB_CGTT_SCLK_CTRL);
6237 RREG32(mmCB_CGTT_SCLK_CTRL);
6238
6239 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6240 gfx_v8_0_wait_for_rlc_serdes(adev);
6241
6242 /* write cmd to Set CGCG Overrride */
79deaaf4 6243 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6244
6245 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6246 gfx_v8_0_wait_for_rlc_serdes(adev);
6247
6248 /* write cmd to Clear CGLS */
79deaaf4 6249 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6250
6251 /* disable cgcg, cgls should be disabled too. */
6252 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6253 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6254 if (temp != data)
6255 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6256 }
dbff57bc 6257
7894745a
TSD
6258 gfx_v8_0_wait_for_rlc_serdes(adev);
6259
dbff57bc 6260 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6261}
dbff57bc
AD
6262static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6263 bool enable)
6e378858
EH
6264{
6265 if (enable) {
6266 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6267 * === MGCG + MGLS + TS(CG/LS) ===
6268 */
dbff57bc
AD
6269 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6270 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6271 } else {
6272 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6273 * === CGCG + CGLS ===
6274 */
dbff57bc
AD
6275 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6276 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6277 }
6278 return 0;
6279}
6280
a8ca3413
RZ
6281static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6282 enum amd_clockgating_state state)
6283{
8a19e7fa
RZ
6284 uint32_t msg_id, pp_state = 0;
6285 uint32_t pp_support_state = 0;
a8ca3413
RZ
6286 void *pp_handle = adev->powerplay.pp_handle;
6287
8a19e7fa
RZ
6288 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6289 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6290 pp_support_state = PP_STATE_SUPPORT_LS;
6291 pp_state = PP_STATE_LS;
6292 }
6293 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6294 pp_support_state |= PP_STATE_SUPPORT_CG;
6295 pp_state |= PP_STATE_CG;
6296 }
6297 if (state == AMD_CG_STATE_UNGATE)
6298 pp_state = 0;
6299
6300 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6301 PP_BLOCK_GFX_CG,
6302 pp_support_state,
6303 pp_state);
6304 amd_set_clockgating_by_smu(pp_handle, msg_id);
6305 }
a8ca3413 6306
8a19e7fa
RZ
6307 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6308 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6309 pp_support_state = PP_STATE_SUPPORT_LS;
6310 pp_state = PP_STATE_LS;
6311 }
a8ca3413 6312
8a19e7fa
RZ
6313 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6314 pp_support_state |= PP_STATE_SUPPORT_CG;
6315 pp_state |= PP_STATE_CG;
6316 }
6317
6318 if (state == AMD_CG_STATE_UNGATE)
6319 pp_state = 0;
6320
6321 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6322 PP_BLOCK_GFX_MG,
6323 pp_support_state,
6324 pp_state);
6325 amd_set_clockgating_by_smu(pp_handle, msg_id);
6326 }
a8ca3413
RZ
6327
6328 return 0;
6329}
6330
6331static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6332 enum amd_clockgating_state state)
6333{
8a19e7fa
RZ
6334
6335 uint32_t msg_id, pp_state = 0;
6336 uint32_t pp_support_state = 0;
a8ca3413
RZ
6337 void *pp_handle = adev->powerplay.pp_handle;
6338
8a19e7fa
RZ
6339 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6340 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6341 pp_support_state = PP_STATE_SUPPORT_LS;
6342 pp_state = PP_STATE_LS;
6343 }
6344 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6345 pp_support_state |= PP_STATE_SUPPORT_CG;
6346 pp_state |= PP_STATE_CG;
6347 }
6348 if (state == AMD_CG_STATE_UNGATE)
6349 pp_state = 0;
6350
6351 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6352 PP_BLOCK_GFX_CG,
6353 pp_support_state,
6354 pp_state);
6355 amd_set_clockgating_by_smu(pp_handle, msg_id);
6356 }
a8ca3413 6357
8a19e7fa
RZ
6358 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6359 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6360 pp_support_state = PP_STATE_SUPPORT_LS;
6361 pp_state = PP_STATE_LS;
6362 }
6363 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6364 pp_support_state |= PP_STATE_SUPPORT_CG;
6365 pp_state |= PP_STATE_CG;
6366 }
6367 if (state == AMD_CG_STATE_UNGATE)
6368 pp_state = 0;
6369
6370 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6371 PP_BLOCK_GFX_3D,
6372 pp_support_state,
6373 pp_state);
6374 amd_set_clockgating_by_smu(pp_handle, msg_id);
6375 }
a8ca3413 6376
8a19e7fa
RZ
6377 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6378 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6379 pp_support_state = PP_STATE_SUPPORT_LS;
6380 pp_state = PP_STATE_LS;
6381 }
a8ca3413 6382
8a19e7fa
RZ
6383 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6384 pp_support_state |= PP_STATE_SUPPORT_CG;
6385 pp_state |= PP_STATE_CG;
6386 }
a8ca3413 6387
8a19e7fa
RZ
6388 if (state == AMD_CG_STATE_UNGATE)
6389 pp_state = 0;
a8ca3413 6390
8a19e7fa
RZ
6391 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6392 PP_BLOCK_GFX_MG,
6393 pp_support_state,
6394 pp_state);
6395 amd_set_clockgating_by_smu(pp_handle, msg_id);
6396 }
6397
6398 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6399 pp_support_state = PP_STATE_SUPPORT_LS;
6400
6401 if (state == AMD_CG_STATE_UNGATE)
6402 pp_state = 0;
6403 else
6404 pp_state = PP_STATE_LS;
6405
6406 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6407 PP_BLOCK_GFX_RLC,
6408 pp_support_state,
6409 pp_state);
6410 amd_set_clockgating_by_smu(pp_handle, msg_id);
6411 }
6412
6413 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6414 pp_support_state = PP_STATE_SUPPORT_LS;
6415
6416 if (state == AMD_CG_STATE_UNGATE)
6417 pp_state = 0;
6418 else
6419 pp_state = PP_STATE_LS;
6420 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6421 PP_BLOCK_GFX_CP,
8a19e7fa 6422 pp_support_state,
a8ca3413 6423 pp_state);
8a19e7fa
RZ
6424 amd_set_clockgating_by_smu(pp_handle, msg_id);
6425 }
a8ca3413
RZ
6426
6427 return 0;
6428}
6429
5fc3aeeb 6430static int gfx_v8_0_set_clockgating_state(void *handle,
6431 enum amd_clockgating_state state)
aaa36a97 6432{
6e378858
EH
6433 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6434
ce137c04
ML
6435 if (amdgpu_sriov_vf(adev))
6436 return 0;
6437
6e378858
EH
6438 switch (adev->asic_type) {
6439 case CHIP_FIJI:
dbff57bc
AD
6440 case CHIP_CARRIZO:
6441 case CHIP_STONEY:
6442 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6443 state == AMD_CG_STATE_GATE);
6e378858 6444 break;
a8ca3413
RZ
6445 case CHIP_TONGA:
6446 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6447 break;
6448 case CHIP_POLARIS10:
6449 case CHIP_POLARIS11:
739e9fff 6450 case CHIP_POLARIS12:
a8ca3413
RZ
6451 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6452 break;
6e378858
EH
6453 default:
6454 break;
6455 }
aaa36a97
AD
6456 return 0;
6457}
6458
536fbf94 6459static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6460{
5003f278 6461 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6462}
6463
536fbf94 6464static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6465{
6466 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6467
6468 if (ring->use_doorbell)
6469 /* XXX check if swapping is necessary on BE */
5003f278 6470 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6471 else
5003f278 6472 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6473}
6474
6475static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6476{
6477 struct amdgpu_device *adev = ring->adev;
6478
6479 if (ring->use_doorbell) {
6480 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6481 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6482 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6483 } else {
536fbf94 6484 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6485 (void)RREG32(mmCP_RB0_WPTR);
6486 }
6487}
6488
d2edb07b 6489static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6490{
6491 u32 ref_and_mask, reg_mem_engine;
6492
4e638ae9
XY
6493 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6494 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6495 switch (ring->me) {
6496 case 1:
6497 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6498 break;
6499 case 2:
6500 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6501 break;
6502 default:
6503 return;
6504 }
6505 reg_mem_engine = 0;
6506 } else {
6507 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6508 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6509 }
6510
6511 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6512 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6513 WAIT_REG_MEM_FUNCTION(3) | /* == */
6514 reg_mem_engine));
6515 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6516 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6517 amdgpu_ring_write(ring, ref_and_mask);
6518 amdgpu_ring_write(ring, ref_and_mask);
6519 amdgpu_ring_write(ring, 0x20); /* poll interval */
6520}
6521
45682886
ML
6522static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6523{
6524 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6525 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6526 EVENT_INDEX(4));
6527
6528 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6529 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6530 EVENT_INDEX(0));
6531}
6532
6533
d35db561
CZ
6534static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6535{
6536 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6537 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6538 WRITE_DATA_DST_SEL(0) |
6539 WR_CONFIRM));
6540 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6541 amdgpu_ring_write(ring, 0);
6542 amdgpu_ring_write(ring, 1);
6543
6544}
6545
93323131 6546static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
6547 struct amdgpu_ib *ib,
6548 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
6549{
6550 u32 header, control = 0;
aaa36a97 6551
de807f81 6552 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6553 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6554 else
6555 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6556
d88bf583 6557 control |= ib->length_dw | (vm_id << 24);
aaa36a97 6558
2e2e3c7f
ML
6559 if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
6560 control |= INDIRECT_BUFFER_PRE_ENB(1);
6561
aaa36a97
AD
6562 amdgpu_ring_write(ring, header);
6563 amdgpu_ring_write(ring,
6564#ifdef __BIG_ENDIAN
6565 (2 << 0) |
6566#endif
6567 (ib->gpu_addr & 0xFFFFFFFC));
6568 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6569 amdgpu_ring_write(ring, control);
6570}
6571
93323131 6572static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
6573 struct amdgpu_ib *ib,
6574 unsigned vm_id, bool ctx_switch)
93323131 6575{
33b7ed01 6576 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 6577
33b7ed01 6578 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6579 amdgpu_ring_write(ring,
6580#ifdef __BIG_ENDIAN
62d2ce4b 6581 (2 << 0) |
93323131 6582#endif
62d2ce4b 6583 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6584 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6585 amdgpu_ring_write(ring, control);
6586}
6587
aaa36a97 6588static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6589 u64 seq, unsigned flags)
aaa36a97 6590{
890ee23f
CZ
6591 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6592 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6593
aaa36a97
AD
6594 /* EVENT_WRITE_EOP - flush caches, send int */
6595 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6596 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6597 EOP_TC_ACTION_EN |
f84e63f2 6598 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6599 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6600 EVENT_INDEX(5)));
6601 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6602 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6603 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6604 amdgpu_ring_write(ring, lower_32_bits(seq));
6605 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6606
aaa36a97
AD
6607}
6608
b8c7b39e 6609static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6610{
21cd942e 6611 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6612 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6613 uint64_t addr = ring->fence_drv.gpu_addr;
6614
6615 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6616 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6617 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6618 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6619 amdgpu_ring_write(ring, addr & 0xfffffffc);
6620 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6621 amdgpu_ring_write(ring, seq);
6622 amdgpu_ring_write(ring, 0xffffffff);
6623 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6624}
6625
6626static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6627 unsigned vm_id, uint64_t pd_addr)
6628{
21cd942e 6629 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6630
aaa36a97
AD
6631 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6632 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6633 WRITE_DATA_DST_SEL(0)) |
6634 WR_CONFIRM);
aaa36a97
AD
6635 if (vm_id < 8) {
6636 amdgpu_ring_write(ring,
6637 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6638 } else {
6639 amdgpu_ring_write(ring,
6640 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6641 }
6642 amdgpu_ring_write(ring, 0);
6643 amdgpu_ring_write(ring, pd_addr >> 12);
6644
aaa36a97
AD
6645 /* bits 0-15 are the VM contexts0-15 */
6646 /* invalidate the cache */
6647 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6648 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6649 WRITE_DATA_DST_SEL(0)));
6650 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6651 amdgpu_ring_write(ring, 0);
6652 amdgpu_ring_write(ring, 1 << vm_id);
6653
6654 /* wait for the invalidate to complete */
6655 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6656 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6657 WAIT_REG_MEM_FUNCTION(0) | /* always */
6658 WAIT_REG_MEM_ENGINE(0))); /* me */
6659 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6660 amdgpu_ring_write(ring, 0);
6661 amdgpu_ring_write(ring, 0); /* ref */
6662 amdgpu_ring_write(ring, 0); /* mask */
6663 amdgpu_ring_write(ring, 0x20); /* poll interval */
6664
6665 /* compute doesn't have PFP */
6666 if (usepfp) {
6667 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6668 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6669 amdgpu_ring_write(ring, 0x0);
aaa36a97
AD
6670 }
6671}
6672
536fbf94 6673static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6674{
6675 return ring->adev->wb.wb[ring->wptr_offs];
6676}
6677
6678static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6679{
6680 struct amdgpu_device *adev = ring->adev;
6681
6682 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6683 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6684 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6685}
6686
6687static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6688 u64 addr, u64 seq,
890ee23f 6689 unsigned flags)
aaa36a97 6690{
890ee23f
CZ
6691 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6692 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6693
aaa36a97
AD
6694 /* RELEASE_MEM - flush caches, send int */
6695 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6696 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6697 EOP_TC_ACTION_EN |
a3d5aaa8 6698 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6699 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6700 EVENT_INDEX(5)));
890ee23f 6701 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6702 amdgpu_ring_write(ring, addr & 0xfffffffc);
6703 amdgpu_ring_write(ring, upper_32_bits(addr));
6704 amdgpu_ring_write(ring, lower_32_bits(seq));
6705 amdgpu_ring_write(ring, upper_32_bits(seq));
6706}
6707
4e638ae9
XY
6708static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6709 u64 seq, unsigned int flags)
6710{
6711 /* we only allocate 32bit for each seq wb address */
f10b478d 6712 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6713
6714 /* write fence seq to the "addr" */
6715 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6716 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6717 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6718 amdgpu_ring_write(ring, lower_32_bits(addr));
6719 amdgpu_ring_write(ring, upper_32_bits(addr));
6720 amdgpu_ring_write(ring, lower_32_bits(seq));
6721
6722 if (flags & AMDGPU_FENCE_FLAG_INT) {
6723 /* set register to trigger INT */
6724 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6725 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6726 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6727 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6728 amdgpu_ring_write(ring, 0);
6729 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6730 }
6731}
6732
c2167a65
ML
6733static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6734{
6735 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6736 amdgpu_ring_write(ring, 0);
6737}
6738
753ad49c
ML
6739static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6740{
6741 uint32_t dw2 = 0;
6742
c2ce92fc
ML
6743 if (amdgpu_sriov_vf(ring->adev))
6744 gfx_v8_0_ring_emit_ce_meta_init(ring,
6745 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6746
753ad49c
ML
6747 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6748 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6749 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6750 /* set load_global_config & load_global_uconfig */
6751 dw2 |= 0x8001;
6752 /* set load_cs_sh_regs */
6753 dw2 |= 0x01000000;
6754 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6755 dw2 |= 0x10002;
6756
6757 /* set load_ce_ram if preamble presented */
6758 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6759 dw2 |= 0x10000000;
6760 } else {
6761 /* still load_ce_ram if this is the first time preamble presented
6762 * although there is no context switch happens.
6763 */
6764 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6765 dw2 |= 0x10000000;
6766 }
6767
6768 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6769 amdgpu_ring_write(ring, dw2);
6770 amdgpu_ring_write(ring, 0);
c2ce92fc
ML
6771
6772 if (amdgpu_sriov_vf(ring->adev))
6773 gfx_v8_0_ring_emit_de_meta_init(ring,
6774 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
753ad49c
ML
6775}
6776
806ba2d4
ML
6777static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6778{
6779 unsigned ret;
6780
6781 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6782 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6783 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6784 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6785 ret = ring->wptr & ring->buf_mask;
6786 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6787 return ret;
6788}
6789
6790static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6791{
6792 unsigned cur;
6793
6794 BUG_ON(offset > ring->buf_mask);
6795 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6796
6797 cur = (ring->wptr & ring->buf_mask) - 1;
6798 if (likely(cur > offset))
6799 ring->ring[offset] = cur - offset;
6800 else
6801 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6802}
6803
6804
880e87e3
XY
6805static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6806{
6807 struct amdgpu_device *adev = ring->adev;
6808
6809 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6810 amdgpu_ring_write(ring, 0 | /* src: register*/
6811 (5 << 8) | /* dst: memory */
6812 (1 << 20)); /* write confirm */
6813 amdgpu_ring_write(ring, reg);
6814 amdgpu_ring_write(ring, 0);
6815 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6816 adev->virt.reg_val_offs * 4));
6817 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6818 adev->virt.reg_val_offs * 4));
6819}
6820
6821static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6822 uint32_t val)
6823{
6824 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6825 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6826 amdgpu_ring_write(ring, reg);
6827 amdgpu_ring_write(ring, 0);
6828 amdgpu_ring_write(ring, val);
6829}
6830
aaa36a97
AD
6831static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6832 enum amdgpu_interrupt_state state)
6833{
61cb8cef
TSD
6834 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6835 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6836}
6837
6838static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6839 int me, int pipe,
6840 enum amdgpu_interrupt_state state)
6841{
aaa36a97
AD
6842 /*
6843 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6844 * handles the setting of interrupts for this specific pipe. All other
6845 * pipes' interrupts are set by amdkfd.
6846 */
6847
6848 if (me == 1) {
6849 switch (pipe) {
6850 case 0:
aaa36a97
AD
6851 break;
6852 default:
6853 DRM_DEBUG("invalid pipe %d\n", pipe);
6854 return;
6855 }
6856 } else {
6857 DRM_DEBUG("invalid me %d\n", me);
6858 return;
6859 }
6860
61cb8cef
TSD
6861 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6862 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6863}
6864
6865static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6866 struct amdgpu_irq_src *source,
6867 unsigned type,
6868 enum amdgpu_interrupt_state state)
6869{
61cb8cef
TSD
6870 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6871 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6872
6873 return 0;
6874}
6875
6876static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6877 struct amdgpu_irq_src *source,
6878 unsigned type,
6879 enum amdgpu_interrupt_state state)
6880{
61cb8cef
TSD
6881 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6882 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6883
6884 return 0;
6885}
6886
6887static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6888 struct amdgpu_irq_src *src,
6889 unsigned type,
6890 enum amdgpu_interrupt_state state)
6891{
6892 switch (type) {
6893 case AMDGPU_CP_IRQ_GFX_EOP:
6894 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6895 break;
6896 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6897 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6898 break;
6899 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6900 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6901 break;
6902 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6903 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6904 break;
6905 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6906 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6907 break;
6908 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6909 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6910 break;
6911 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6912 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6913 break;
6914 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6915 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6916 break;
6917 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6918 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6919 break;
6920 default:
6921 break;
6922 }
6923 return 0;
6924}
6925
6926static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6927 struct amdgpu_irq_src *source,
6928 struct amdgpu_iv_entry *entry)
6929{
6930 int i;
6931 u8 me_id, pipe_id, queue_id;
6932 struct amdgpu_ring *ring;
6933
6934 DRM_DEBUG("IH: CP EOP\n");
6935 me_id = (entry->ring_id & 0x0c) >> 2;
6936 pipe_id = (entry->ring_id & 0x03) >> 0;
6937 queue_id = (entry->ring_id & 0x70) >> 4;
6938
6939 switch (me_id) {
6940 case 0:
6941 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6942 break;
6943 case 1:
6944 case 2:
6945 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6946 ring = &adev->gfx.compute_ring[i];
6947 /* Per-queue interrupt is supported for MEC starting from VI.
6948 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6949 */
6950 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6951 amdgpu_fence_process(ring);
6952 }
6953 break;
6954 }
6955 return 0;
6956}
6957
6958static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6959 struct amdgpu_irq_src *source,
6960 struct amdgpu_iv_entry *entry)
6961{
6962 DRM_ERROR("Illegal register access in command stream\n");
6963 schedule_work(&adev->reset_work);
6964 return 0;
6965}
6966
6967static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6968 struct amdgpu_irq_src *source,
6969 struct amdgpu_iv_entry *entry)
6970{
6971 DRM_ERROR("Illegal instruction in command stream\n");
6972 schedule_work(&adev->reset_work);
6973 return 0;
6974}
6975
4e638ae9
XY
6976static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6977 struct amdgpu_irq_src *src,
6978 unsigned int type,
6979 enum amdgpu_interrupt_state state)
6980{
07c397f9 6981 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6982
07c397f9 6983 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9 6984
4e638ae9
XY
6985 switch (type) {
6986 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
ccaf3574
TSD
6987 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6988 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6989 if (ring->me == 1)
6990 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6991 ring->pipe,
6992 GENERIC2_INT_ENABLE,
6993 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6994 else
6995 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6996 ring->pipe,
6997 GENERIC2_INT_ENABLE,
6998 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
4e638ae9
XY
6999 break;
7000 default:
7001 BUG(); /* kiq only support GENERIC2_INT now */
7002 break;
7003 }
7004 return 0;
7005}
7006
7007static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7008 struct amdgpu_irq_src *source,
7009 struct amdgpu_iv_entry *entry)
7010{
7011 u8 me_id, pipe_id, queue_id;
07c397f9 7012 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 7013
07c397f9 7014 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9
XY
7015
7016 me_id = (entry->ring_id & 0x0c) >> 2;
7017 pipe_id = (entry->ring_id & 0x03) >> 0;
7018 queue_id = (entry->ring_id & 0x70) >> 4;
7019 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7020 me_id, pipe_id, queue_id);
7021
7022 amdgpu_fence_process(ring);
7023 return 0;
7024}
7025
a1255107 7026static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 7027 .name = "gfx_v8_0",
aaa36a97 7028 .early_init = gfx_v8_0_early_init,
ccba7691 7029 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
7030 .sw_init = gfx_v8_0_sw_init,
7031 .sw_fini = gfx_v8_0_sw_fini,
7032 .hw_init = gfx_v8_0_hw_init,
7033 .hw_fini = gfx_v8_0_hw_fini,
7034 .suspend = gfx_v8_0_suspend,
7035 .resume = gfx_v8_0_resume,
7036 .is_idle = gfx_v8_0_is_idle,
7037 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 7038 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 7039 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 7040 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 7041 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
7042 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7043 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 7044 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
7045};
7046
7047static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 7048 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
7049 .align_mask = 0xff,
7050 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7051 .support_64bit_ptrs = false,
e7706b42 7052 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7053 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7054 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e9d672b2
ML
7055 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7056 5 + /* COND_EXEC */
7057 7 + /* PIPELINE_SYNC */
7058 19 + /* VM_FLUSH */
7059 8 + /* FENCE for VM_FLUSH */
7060 20 + /* GDS switch */
7061 4 + /* double SWITCH_BUFFER,
7062 the first COND_EXEC jump to the place just
7063 prior to this double SWITCH_BUFFER */
7064 5 + /* COND_EXEC */
7065 7 + /* HDP_flush */
7066 4 + /* VGT_flush */
7067 14 + /* CE_META */
7068 31 + /* DE_META */
7069 3 + /* CNTX_CTRL */
7070 5 + /* HDP_INVL */
7071 8 + 8 + /* FENCE x2 */
7072 2, /* SWITCH_BUFFER */
e12f3d7a 7073 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 7074 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 7075 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 7076 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7077 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7078 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 7079 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7080 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7081 .test_ring = gfx_v8_0_ring_test_ring,
7082 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7083 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7084 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 7085 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 7086 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
806ba2d4
ML
7087 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7088 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
aaa36a97
AD
7089};
7090
7091static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 7092 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
7093 .align_mask = 0xff,
7094 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7095 .support_64bit_ptrs = false,
e7706b42 7096 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7097 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7098 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
7099 .emit_frame_size =
7100 20 + /* gfx_v8_0_ring_emit_gds_switch */
7101 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7102 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7103 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7104 17 + /* gfx_v8_0_ring_emit_vm_flush */
7105 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7106 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 7107 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 7108 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 7109 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7110 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7111 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 7112 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7113 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7114 .test_ring = gfx_v8_0_ring_test_ring,
7115 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7116 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7117 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
7118};
7119
4e638ae9
XY
7120static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7121 .type = AMDGPU_RING_TYPE_KIQ,
7122 .align_mask = 0xff,
7123 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7124 .support_64bit_ptrs = false,
4e638ae9
XY
7125 .get_rptr = gfx_v8_0_ring_get_rptr,
7126 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7127 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7128 .emit_frame_size =
7129 20 + /* gfx_v8_0_ring_emit_gds_switch */
7130 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7131 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7132 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7133 17 + /* gfx_v8_0_ring_emit_vm_flush */
7134 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7135 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7136 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7137 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
4e638ae9
XY
7138 .test_ring = gfx_v8_0_ring_test_ring,
7139 .test_ib = gfx_v8_0_ring_test_ib,
7140 .insert_nop = amdgpu_ring_insert_nop,
7141 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
7142 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7143 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7144};
7145
aaa36a97
AD
7146static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7147{
7148 int i;
7149
4e638ae9
XY
7150 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7151
aaa36a97
AD
7152 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7153 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7154
7155 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7156 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7157}
7158
7159static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7160 .set = gfx_v8_0_set_eop_interrupt_state,
7161 .process = gfx_v8_0_eop_irq,
7162};
7163
7164static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7165 .set = gfx_v8_0_set_priv_reg_fault_state,
7166 .process = gfx_v8_0_priv_reg_irq,
7167};
7168
7169static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7170 .set = gfx_v8_0_set_priv_inst_fault_state,
7171 .process = gfx_v8_0_priv_inst_irq,
7172};
7173
4e638ae9
XY
7174static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7175 .set = gfx_v8_0_kiq_set_interrupt_state,
7176 .process = gfx_v8_0_kiq_irq,
7177};
7178
aaa36a97
AD
7179static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7180{
7181 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7182 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7183
7184 adev->gfx.priv_reg_irq.num_types = 1;
7185 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7186
7187 adev->gfx.priv_inst_irq.num_types = 1;
7188 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7189
7190 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7191 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
7192}
7193
dbff57bc
AD
7194static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7195{
ae6a58e4 7196 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7197}
7198
aaa36a97
AD
7199static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7200{
7201 /* init asci gds info */
7202 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7203 adev->gds.gws.total_size = 64;
7204 adev->gds.oa.total_size = 16;
7205
7206 if (adev->gds.mem.total_size == 64 * 1024) {
7207 adev->gds.mem.gfx_partition_size = 4096;
7208 adev->gds.mem.cs_partition_size = 4096;
7209
7210 adev->gds.gws.gfx_partition_size = 4;
7211 adev->gds.gws.cs_partition_size = 4;
7212
7213 adev->gds.oa.gfx_partition_size = 4;
7214 adev->gds.oa.cs_partition_size = 1;
7215 } else {
7216 adev->gds.mem.gfx_partition_size = 1024;
7217 adev->gds.mem.cs_partition_size = 1024;
7218
7219 adev->gds.gws.gfx_partition_size = 16;
7220 adev->gds.gws.cs_partition_size = 16;
7221
7222 adev->gds.oa.gfx_partition_size = 4;
7223 adev->gds.oa.cs_partition_size = 4;
7224 }
7225}
7226
9de06de8
NH
7227static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7228 u32 bitmap)
7229{
7230 u32 data;
7231
7232 if (!bitmap)
7233 return;
7234
7235 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7236 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7237
7238 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7239}
7240
8f8e00c1 7241static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7242{
8f8e00c1 7243 u32 data, mask;
aaa36a97 7244
5003f278
TSD
7245 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7246 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7247
6157bd7a 7248 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7249
5003f278 7250 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7251}
7252
7dae69a2 7253static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7254{
7255 int i, j, k, counter, active_cu_number = 0;
7256 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7257 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7258 unsigned disable_masks[4 * 2];
aaa36a97 7259
6157bd7a
FC
7260 memset(cu_info, 0, sizeof(*cu_info));
7261
9de06de8
NH
7262 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7263
aaa36a97
AD
7264 mutex_lock(&adev->grbm_idx_mutex);
7265 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7266 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7267 mask = 1;
7268 ao_bitmap = 0;
7269 counter = 0;
9559ef5b 7270 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7271 if (i < 4 && j < 2)
7272 gfx_v8_0_set_user_cu_inactive_bitmap(
7273 adev, disable_masks[i * 2 + j]);
8f8e00c1 7274 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7275 cu_info->bitmap[i][j] = bitmap;
7276
8f8e00c1 7277 for (k = 0; k < 16; k ++) {
aaa36a97
AD
7278 if (bitmap & mask) {
7279 if (counter < 2)
7280 ao_bitmap |= mask;
7281 counter ++;
7282 }
7283 mask <<= 1;
7284 }
7285 active_cu_number += counter;
7286 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7287 }
7288 }
9559ef5b 7289 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7290 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7291
7292 cu_info->number = active_cu_number;
7293 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 7294}
a1255107
AD
7295
7296const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7297{
7298 .type = AMD_IP_BLOCK_TYPE_GFX,
7299 .major = 8,
7300 .minor = 0,
7301 .rev = 0,
7302 .funcs = &gfx_v8_0_ip_funcs,
7303};
7304
7305const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7306{
7307 .type = AMD_IP_BLOCK_TYPE_GFX,
7308 .major = 8,
7309 .minor = 1,
7310 .rev = 0,
7311 .funcs = &gfx_v8_0_ip_funcs,
7312};
acad2b2a
ML
7313
7314static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7315{
7316 uint64_t ce_payload_addr;
7317 int cnt_ce;
7318 static union {
49abb980
XY
7319 struct vi_ce_ib_state regular;
7320 struct vi_ce_ib_state_chained_ib chained;
e8411302 7321 } ce_payload = {};
acad2b2a
ML
7322
7323 if (ring->adev->virt.chained_ib_support) {
49abb980 7324 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7325 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7326 } else {
49abb980 7327 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7328 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7329 }
7330
7331 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7332 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7333 WRITE_DATA_DST_SEL(8) |
7334 WR_CONFIRM) |
7335 WRITE_DATA_CACHE_POLICY(0));
7336 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7337 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7338 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7339}
7340
7341static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7342{
7343 uint64_t de_payload_addr, gds_addr;
7344 int cnt_de;
7345 static union {
49abb980
XY
7346 struct vi_de_ib_state regular;
7347 struct vi_de_ib_state_chained_ib chained;
e8411302 7348 } de_payload = {};
acad2b2a
ML
7349
7350 gds_addr = csa_addr + 4096;
7351 if (ring->adev->virt.chained_ib_support) {
7352 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7353 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7354 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7355 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7356 } else {
7357 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7358 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7359 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7360 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7361 }
7362
7363 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7364 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7365 WRITE_DATA_DST_SEL(8) |
7366 WR_CONFIRM) |
7367 WRITE_DATA_CACHE_POLICY(0));
7368 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7369 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7370 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7371}
5ff98043
ML
7372
7373/* create MQD for each compute queue */
0875a242 7374static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
5ff98043
ML
7375{
7376 struct amdgpu_ring *ring = NULL;
7377 int r, i;
7378
7379 /* create MQD for KIQ */
7380 ring = &adev->gfx.kiq.ring;
7381 if (!ring->mqd_obj) {
7382 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7383 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7384 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7385 if (r) {
7386 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7387 return r;
7388 }
9b49c3ab
ML
7389
7390 /* prepare MQD backup */
7391 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7392 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7393 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7394 }
7395
7396 /* create MQD for each KCQ */
b0ac2a32 7397 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5ff98043
ML
7398 ring = &adev->gfx.compute_ring[i];
7399 if (!ring->mqd_obj) {
7400 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7401 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7402 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7403 if (r) {
7404 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7405 return r;
7406 }
9b49c3ab
ML
7407
7408 /* prepare MQD backup */
7409 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7410 if (!adev->gfx.mec.mqd_backup[i])
7411 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7412 }
7413 }
7414
7415 return 0;
7416}
7417
0875a242 7418static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
5ff98043
ML
7419{
7420 struct amdgpu_ring *ring = NULL;
7421 int i;
7422
7423 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7424 ring = &adev->gfx.compute_ring[i];
24de7515 7425 kfree(adev->gfx.mec.mqd_backup[i]);
59a82d7d
XY
7426 amdgpu_bo_free_kernel(&ring->mqd_obj,
7427 &ring->mqd_gpu_addr,
7428 &ring->mqd_ptr);
5ff98043
ML
7429 }
7430
7431 ring = &adev->gfx.kiq.ring;
24de7515 7432 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
59a82d7d
XY
7433 amdgpu_bo_free_kernel(&ring->mqd_obj,
7434 &ring->mqd_gpu_addr,
7435 &ring->mqd_ptr);
7436}