]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: implement si_read_bios_from_rom
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
248a1d6f 24#include <drm/drmP.h>
aaa36a97
AD
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
aeab2032 28#include "vi_structs.h"
aaa36a97
AD
29#include "vid.h"
30#include "amdgpu_ucode.h"
68182d90 31#include "amdgpu_atombios.h"
eeade25a 32#include "atombios_i2c.h"
aaa36a97
AD
33#include "clearstate_vi.h"
34
35#include "gmc/gmc_8_2_d.h"
36#include "gmc/gmc_8_2_sh_mask.h"
37
38#include "oss/oss_3_0_d.h"
39#include "oss/oss_3_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
aaa36a97
AD
43#include "gca/gfx_8_0_d.h"
44#include "gca/gfx_8_0_enum.h"
45#include "gca/gfx_8_0_sh_mask.h"
46#include "gca/gfx_8_0_enum.h"
47
aaa36a97
AD
48#include "dce/dce_10_0_d.h"
49#include "dce/dce_10_0_sh_mask.h"
50
d9d533c1
KW
51#include "smu/smu_7_1_3_d.h"
52
aaa36a97 53#define GFX8_NUM_GFX_RINGS 1
268cb4c7 54#define GFX8_MEC_HPD_SIZE 2048
aaa36a97
AD
55
56#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 58#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
59#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
6e378858
EH
71#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77
78/* BPM SERDES CMD */
79#define SET_BPM_SERDES_CMD 1
80#define CLE_BPM_SERDES_CMD 0
81
82/* BPM Register Address*/
83enum {
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 BPM_REG_FGCG_MAX
90};
91
2b6cd977
EH
92#define RLC_FormatDirectRegListLength 14
93
c65444fe
JZ
94MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
e3c7656c
SL
101MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
c65444fe
JZ
107MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 118MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 119
af15a2d5
DZ
120MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
2cc0c0b5
FC
127MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 133
2cc0c0b5
FC
134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 140
c4642a47
JZ
141MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147
aaa36a97
AD
148static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149{
150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166};
167
168static const u32 golden_settings_tonga_a11[] =
169{
170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173 mmGB_GPU_ID, 0x0000000f, 0x00000000,
174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
186};
187
188static const u32 tonga_golden_common_all[] =
189{
190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
198};
199
200static const u32 tonga_mgcg_cgcg_init[] =
201{
202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277};
278
2cc0c0b5 279static const u32 golden_settings_polaris11_a11[] =
68182d90 280{
9761bc53
HR
281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 291 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
298};
299
2cc0c0b5 300static const u32 polaris11_golden_common_all[] =
68182d90
FC
301{
302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
68182d90
FC
308};
309
2cc0c0b5 310static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
311{
312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323 mmSQ_CONFIG, 0x07f80000, 0x07180000,
324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
329};
330
2cc0c0b5 331static const u32 polaris10_golden_common_all[] =
68182d90
FC
332{
333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
68182d90
FC
341};
342
af15a2d5
DZ
343static const u32 fiji_golden_common_all[] =
344{
345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
a7ca8ef9
FC
353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
355};
356
357static const u32 golden_settings_fiji_a10[] =
358{
359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
366 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
370};
371
372static const u32 fiji_mgcg_cgcg_init[] =
373{
a7ca8ef9 374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409};
410
aaa36a97
AD
411static const u32 golden_settings_iceland_a11[] =
412{
413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416 mmGB_GPU_ID, 0x0000000f, 0x00000000,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429};
430
431static const u32 iceland_golden_common_all[] =
432{
433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
441};
442
443static const u32 iceland_mgcg_cgcg_init[] =
444{
445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509};
510
511static const u32 cz_golden_settings_a11[] =
512{
513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515 mmGB_GPU_ID, 0x0000000f, 0x00000000,
516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 521 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525};
526
527static const u32 cz_golden_common_all[] =
528{
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
537};
538
539static const u32 cz_mgcg_cgcg_init[] =
540{
541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616};
617
e3c7656c
SL
618static const u32 stoney_golden_settings_a11[] =
619{
620 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621 mmGB_GPU_ID, 0x0000000f, 0x00000000,
622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 626 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630};
631
632static const u32 stoney_golden_common_all[] =
633{
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
e3c7656c
SL
642};
643
644static const u32 stoney_mgcg_cgcg_init[] =
645{
646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
651};
652
aaa36a97
AD
653static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 656static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 657static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 658static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
95243543
ML
659static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
aaa36a97
AD
661
662static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663{
664 switch (adev->asic_type) {
665 case CHIP_TOPAZ:
666 amdgpu_program_register_sequence(adev,
667 iceland_mgcg_cgcg_init,
668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669 amdgpu_program_register_sequence(adev,
670 golden_settings_iceland_a11,
671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672 amdgpu_program_register_sequence(adev,
673 iceland_golden_common_all,
674 (const u32)ARRAY_SIZE(iceland_golden_common_all));
675 break;
af15a2d5
DZ
676 case CHIP_FIJI:
677 amdgpu_program_register_sequence(adev,
678 fiji_mgcg_cgcg_init,
679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680 amdgpu_program_register_sequence(adev,
681 golden_settings_fiji_a10,
682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683 amdgpu_program_register_sequence(adev,
684 fiji_golden_common_all,
685 (const u32)ARRAY_SIZE(fiji_golden_common_all));
686 break;
687
aaa36a97
AD
688 case CHIP_TONGA:
689 amdgpu_program_register_sequence(adev,
690 tonga_mgcg_cgcg_init,
691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692 amdgpu_program_register_sequence(adev,
693 golden_settings_tonga_a11,
694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695 amdgpu_program_register_sequence(adev,
696 tonga_golden_common_all,
697 (const u32)ARRAY_SIZE(tonga_golden_common_all));
698 break;
2cc0c0b5 699 case CHIP_POLARIS11:
c4642a47 700 case CHIP_POLARIS12:
68182d90 701 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
702 golden_settings_polaris11_a11,
703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 704 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
705 polaris11_golden_common_all,
706 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 707 break;
2cc0c0b5 708 case CHIP_POLARIS10:
68182d90 709 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
710 golden_settings_polaris10_a11,
711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 712 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
713 polaris10_golden_common_all,
714 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
716 if (adev->pdev->revision == 0xc7 &&
717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722 }
68182d90 723 break;
aaa36a97
AD
724 case CHIP_CARRIZO:
725 amdgpu_program_register_sequence(adev,
726 cz_mgcg_cgcg_init,
727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728 amdgpu_program_register_sequence(adev,
729 cz_golden_settings_a11,
730 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_common_all,
733 (const u32)ARRAY_SIZE(cz_golden_common_all));
734 break;
e3c7656c
SL
735 case CHIP_STONEY:
736 amdgpu_program_register_sequence(adev,
737 stoney_mgcg_cgcg_init,
738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739 amdgpu_program_register_sequence(adev,
740 stoney_golden_settings_a11,
741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_common_all,
744 (const u32)ARRAY_SIZE(stoney_golden_common_all));
745 break;
aaa36a97
AD
746 default:
747 break;
748 }
749}
750
751static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752{
aaa36a97
AD
753 adev->gfx.scratch.num_reg = 7;
754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
756}
757
758static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759{
760 struct amdgpu_device *adev = ring->adev;
761 uint32_t scratch;
762 uint32_t tmp = 0;
763 unsigned i;
764 int r;
765
766 r = amdgpu_gfx_scratch_get(adev, &scratch);
767 if (r) {
768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769 return r;
770 }
771 WREG32(scratch, 0xCAFEDEAD);
a27de35c 772 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
773 if (r) {
774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775 ring->idx, r);
776 amdgpu_gfx_scratch_free(adev, scratch);
777 return r;
778 }
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 782 amdgpu_ring_commit(ring);
aaa36a97
AD
783
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
787 break;
788 DRM_UDELAY(1);
789 }
790 if (i < adev->usec_timeout) {
791 DRM_INFO("ring test on %d succeeded in %d usecs\n",
792 ring->idx, i);
793 } else {
794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795 ring->idx, scratch, tmp);
796 r = -EINVAL;
797 }
798 amdgpu_gfx_scratch_free(adev, scratch);
799 return r;
800}
801
bbec97aa 802static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
803{
804 struct amdgpu_device *adev = ring->adev;
805 struct amdgpu_ib ib;
f54d1867 806 struct dma_fence *f = NULL;
aaa36a97
AD
807 uint32_t scratch;
808 uint32_t tmp = 0;
bbec97aa 809 long r;
aaa36a97
AD
810
811 r = amdgpu_gfx_scratch_get(adev, &scratch);
812 if (r) {
bbec97aa 813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
814 return r;
815 }
816 WREG32(scratch, 0xCAFEDEAD);
b203dd95 817 memset(&ib, 0, sizeof(ib));
b07c60c0 818 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 819 if (r) {
bbec97aa 820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 821 goto err1;
aaa36a97
AD
822 }
823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825 ib.ptr[2] = 0xDEADBEEF;
826 ib.length_dw = 3;
42d13693 827
50ddc75e 828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
829 if (r)
830 goto err2;
831
f54d1867 832 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
833 if (r == 0) {
834 DRM_ERROR("amdgpu: IB test timed out.\n");
835 r = -ETIMEDOUT;
836 goto err2;
837 } else if (r < 0) {
838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 839 goto err2;
aaa36a97 840 }
6d44565d
CK
841 tmp = RREG32(scratch);
842 if (tmp == 0xDEADBEEF) {
843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 844 r = 0;
aaa36a97
AD
845 } else {
846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847 scratch, tmp);
848 r = -EINVAL;
849 }
42d13693 850err2:
cc55c45d 851 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 852 dma_fence_put(f);
42d13693
CZ
853err1:
854 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
855 return r;
856}
857
13331ac3 858
d6b20c87
AD
859static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860{
13331ac3
ML
861 release_firmware(adev->gfx.pfp_fw);
862 adev->gfx.pfp_fw = NULL;
863 release_firmware(adev->gfx.me_fw);
864 adev->gfx.me_fw = NULL;
865 release_firmware(adev->gfx.ce_fw);
866 adev->gfx.ce_fw = NULL;
867 release_firmware(adev->gfx.rlc_fw);
868 adev->gfx.rlc_fw = NULL;
869 release_firmware(adev->gfx.mec_fw);
870 adev->gfx.mec_fw = NULL;
871 if ((adev->asic_type != CHIP_STONEY) &&
872 (adev->asic_type != CHIP_TOPAZ))
873 release_firmware(adev->gfx.mec2_fw);
874 adev->gfx.mec2_fw = NULL;
875
876 kfree(adev->gfx.rlc.register_list_format);
877}
878
aaa36a97
AD
879static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880{
881 const char *chip_name;
882 char fw_name[30];
883 int err;
884 struct amdgpu_firmware_info *info = NULL;
885 const struct common_firmware_header *header = NULL;
595fd013 886 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
887 const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 unsigned int *tmp = NULL, i;
aaa36a97
AD
889
890 DRM_DEBUG("\n");
891
892 switch (adev->asic_type) {
893 case CHIP_TOPAZ:
894 chip_name = "topaz";
895 break;
896 case CHIP_TONGA:
897 chip_name = "tonga";
898 break;
899 case CHIP_CARRIZO:
900 chip_name = "carrizo";
901 break;
af15a2d5
DZ
902 case CHIP_FIJI:
903 chip_name = "fiji";
904 break;
2cc0c0b5
FC
905 case CHIP_POLARIS11:
906 chip_name = "polaris11";
68182d90 907 break;
2cc0c0b5
FC
908 case CHIP_POLARIS10:
909 chip_name = "polaris10";
68182d90 910 break;
c4642a47
JZ
911 case CHIP_POLARIS12:
912 chip_name = "polaris12";
913 break;
e3c7656c
SL
914 case CHIP_STONEY:
915 chip_name = "stoney";
916 break;
aaa36a97
AD
917 default:
918 BUG();
919 }
920
c65444fe 921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923 if (err)
924 goto out;
925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926 if (err)
927 goto out;
595fd013
JZ
928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 931
c65444fe 932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934 if (err)
935 goto out;
936 err = amdgpu_ucode_validate(adev->gfx.me_fw);
937 if (err)
938 goto out;
595fd013
JZ
939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d 941
595fd013 942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 943
c65444fe 944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946 if (err)
947 goto out;
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949 if (err)
950 goto out;
595fd013
JZ
951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 954
63a7c748
TH
955 /*
956 * Support for MCBP/Virtualization in combination with chained IBs is
957 * formal released on feature version #46
958 */
959 if (adev->gfx.ce_feature_version >= 46 &&
960 adev->gfx.pfp_feature_version >= 46) {
961 adev->virt.chained_ib_support = true;
962 DRM_INFO("Chained IB support enabled!\n");
963 } else
964 adev->virt.chained_ib_support = false;
965
c65444fe 966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
968 if (err)
969 goto out;
970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
974
975 adev->gfx.rlc.save_and_restore_offset =
976 le32_to_cpu(rlc_hdr->save_and_restore_offset);
977 adev->gfx.rlc.clear_state_descriptor_offset =
978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979 adev->gfx.rlc.avail_scratch_ram_locations =
980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981 adev->gfx.rlc.reg_restore_list_size =
982 le32_to_cpu(rlc_hdr->reg_restore_list_size);
983 adev->gfx.rlc.reg_list_format_start =
984 le32_to_cpu(rlc_hdr->reg_list_format_start);
985 adev->gfx.rlc.reg_list_format_separate_start =
986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987 adev->gfx.rlc.starting_offsets_start =
988 le32_to_cpu(rlc_hdr->starting_offsets_start);
989 adev->gfx.rlc.reg_list_format_size_bytes =
990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991 adev->gfx.rlc.reg_list_size_bytes =
992 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
993
994 adev->gfx.rlc.register_list_format =
995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
997
998 if (!adev->gfx.rlc.register_list_format) {
999 err = -ENOMEM;
1000 goto out;
1001 }
1002
ae17c999 1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1007
1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009
ae17c999 1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1014
c65444fe 1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017 if (err)
1018 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020 if (err)
1021 goto out;
595fd013
JZ
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1025
97dde76a
AD
1026 if ((adev->asic_type != CHIP_STONEY) &&
1027 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030 if (!err) {
1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032 if (err)
1033 goto out;
1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035 adev->gfx.mec2_fw->data;
1036 adev->gfx.mec2_fw_version =
1037 le32_to_cpu(cp_hdr->header.ucode_version);
1038 adev->gfx.mec2_feature_version =
1039 le32_to_cpu(cp_hdr->ucode_feature_version);
1040 } else {
1041 err = 0;
1042 adev->gfx.mec2_fw = NULL;
1043 }
aaa36a97
AD
1044 }
1045
e635ee07 1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
aaa36a97
AD
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049 info->fw = adev->gfx.pfp_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056 info->fw = adev->gfx.me_fw;
1057 header = (const struct common_firmware_header *)info->fw->data;
1058 adev->firmware.fw_size +=
1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063 info->fw = adev->gfx.ce_fw;
1064 header = (const struct common_firmware_header *)info->fw->data;
1065 adev->firmware.fw_size +=
1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067
1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070 info->fw = adev->gfx.rlc_fw;
1071 header = (const struct common_firmware_header *)info->fw->data;
1072 adev->firmware.fw_size +=
1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074
1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077 info->fw = adev->gfx.mec_fw;
1078 header = (const struct common_firmware_header *)info->fw->data;
1079 adev->firmware.fw_size +=
1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081
4c2b2453
ML
1082 /* we need account JT in */
1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084 adev->firmware.fw_size +=
1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086
bed5712e
ML
1087 if (amdgpu_sriov_vf(adev)) {
1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090 info->fw = adev->gfx.mec_fw;
1091 adev->firmware.fw_size +=
1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093 }
1094
aaa36a97
AD
1095 if (adev->gfx.mec2_fw) {
1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098 info->fw = adev->gfx.mec2_fw;
1099 header = (const struct common_firmware_header *)info->fw->data;
1100 adev->firmware.fw_size +=
1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102 }
1103
1104 }
1105
1106out:
1107 if (err) {
1108 dev_err(adev->dev,
1109 "gfx8: Failed to load firmware \"%s\"\n",
1110 fw_name);
1111 release_firmware(adev->gfx.pfp_fw);
1112 adev->gfx.pfp_fw = NULL;
1113 release_firmware(adev->gfx.me_fw);
1114 adev->gfx.me_fw = NULL;
1115 release_firmware(adev->gfx.ce_fw);
1116 adev->gfx.ce_fw = NULL;
1117 release_firmware(adev->gfx.rlc_fw);
1118 adev->gfx.rlc_fw = NULL;
1119 release_firmware(adev->gfx.mec_fw);
1120 adev->gfx.mec_fw = NULL;
1121 release_firmware(adev->gfx.mec2_fw);
1122 adev->gfx.mec2_fw = NULL;
1123 }
1124 return err;
1125}
1126
2b6cd977
EH
1127static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128 volatile u32 *buffer)
1129{
1130 u32 count = 0, i;
1131 const struct cs_section_def *sect = NULL;
1132 const struct cs_extent_def *ext = NULL;
1133
1134 if (adev->gfx.rlc.cs_data == NULL)
1135 return;
1136 if (buffer == NULL)
1137 return;
1138
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141
1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143 buffer[count++] = cpu_to_le32(0x80000000);
1144 buffer[count++] = cpu_to_le32(0x80000000);
1145
1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148 if (sect->id == SECT_CONTEXT) {
1149 buffer[count++] =
1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151 buffer[count++] = cpu_to_le32(ext->reg_index -
1152 PACKET3_SET_CONTEXT_REG_START);
1153 for (i = 0; i < ext->reg_count; i++)
1154 buffer[count++] = cpu_to_le32(ext->extent[i]);
1155 } else {
1156 return;
1157 }
1158 }
1159 }
1160
1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1166
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169
1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171 buffer[count++] = cpu_to_le32(0);
1172}
1173
fb16007b
AD
1174static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175{
1176 const __le32 *fw_data;
1177 volatile u32 *dst_ptr;
1178 int me, i, max_me = 4;
1179 u32 bo_offset = 0;
1180 u32 table_offset, table_size;
1181
1182 if (adev->asic_type == CHIP_CARRIZO)
1183 max_me = 5;
1184
1185 /* write the cp table buffer */
1186 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187 for (me = 0; me < max_me; me++) {
1188 if (me == 0) {
1189 const struct gfx_firmware_header_v1_0 *hdr =
1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191 fw_data = (const __le32 *)
1192 (adev->gfx.ce_fw->data +
1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 table_offset = le32_to_cpu(hdr->jt_offset);
1195 table_size = le32_to_cpu(hdr->jt_size);
1196 } else if (me == 1) {
1197 const struct gfx_firmware_header_v1_0 *hdr =
1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199 fw_data = (const __le32 *)
1200 (adev->gfx.pfp_fw->data +
1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 table_offset = le32_to_cpu(hdr->jt_offset);
1203 table_size = le32_to_cpu(hdr->jt_size);
1204 } else if (me == 2) {
1205 const struct gfx_firmware_header_v1_0 *hdr =
1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207 fw_data = (const __le32 *)
1208 (adev->gfx.me_fw->data +
1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 table_offset = le32_to_cpu(hdr->jt_offset);
1211 table_size = le32_to_cpu(hdr->jt_size);
1212 } else if (me == 3) {
1213 const struct gfx_firmware_header_v1_0 *hdr =
1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215 fw_data = (const __le32 *)
1216 (adev->gfx.mec_fw->data +
1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 table_offset = le32_to_cpu(hdr->jt_offset);
1219 table_size = le32_to_cpu(hdr->jt_size);
1220 } else if (me == 4) {
1221 const struct gfx_firmware_header_v1_0 *hdr =
1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223 fw_data = (const __le32 *)
1224 (adev->gfx.mec2_fw->data +
1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226 table_offset = le32_to_cpu(hdr->jt_offset);
1227 table_size = le32_to_cpu(hdr->jt_size);
1228 }
1229
1230 for (i = 0; i < table_size; i ++) {
1231 dst_ptr[bo_offset + i] =
1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233 }
1234
1235 bo_offset += table_size;
1236 }
1237}
1238
2b6cd977
EH
1239static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240{
1241 int r;
1242
1243 /* clear state block */
1244 if (adev->gfx.rlc.clear_state_obj) {
c81a1a74 1245 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
2b6cd977 1246 if (unlikely(r != 0))
62d2ce4b 1247 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
2b6cd977
EH
1248 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1249 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2b6cd977
EH
1250 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1251 adev->gfx.rlc.clear_state_obj = NULL;
1252 }
fb16007b
AD
1253
1254 /* jump table block */
1255 if (adev->gfx.rlc.cp_table_obj) {
c81a1a74 1256 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
fb16007b
AD
1257 if (unlikely(r != 0))
1258 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1259 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1260 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1261 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1262 adev->gfx.rlc.cp_table_obj = NULL;
1263 }
2b6cd977
EH
1264}
1265
1266static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1267{
1268 volatile u32 *dst_ptr;
1269 u32 dws;
1270 const struct cs_section_def *cs_data;
1271 int r;
1272
1273 adev->gfx.rlc.cs_data = vi_cs_data;
1274
1275 cs_data = adev->gfx.rlc.cs_data;
1276
1277 if (cs_data) {
1278 /* clear state block */
1279 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1280
1281 if (adev->gfx.rlc.clear_state_obj == NULL) {
1282 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1283 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1284 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1285 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
2b6cd977
EH
1286 NULL, NULL,
1287 &adev->gfx.rlc.clear_state_obj);
1288 if (r) {
1289 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1290 gfx_v8_0_rlc_fini(adev);
1291 return r;
1292 }
1293 }
1294 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1295 if (unlikely(r != 0)) {
1296 gfx_v8_0_rlc_fini(adev);
1297 return r;
1298 }
1299 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1300 &adev->gfx.rlc.clear_state_gpu_addr);
1301 if (r) {
1302 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
62d2ce4b 1303 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
2b6cd977
EH
1304 gfx_v8_0_rlc_fini(adev);
1305 return r;
1306 }
1307
1308 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1309 if (r) {
62d2ce4b 1310 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
2b6cd977
EH
1311 gfx_v8_0_rlc_fini(adev);
1312 return r;
1313 }
1314 /* set up the cs buffer */
1315 dst_ptr = adev->gfx.rlc.cs_ptr;
1316 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1317 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1318 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1319 }
1320
fb16007b
AD
1321 if ((adev->asic_type == CHIP_CARRIZO) ||
1322 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1323 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1324 if (adev->gfx.rlc.cp_table_obj == NULL) {
1325 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1326 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1327 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1328 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
fb16007b
AD
1329 NULL, NULL,
1330 &adev->gfx.rlc.cp_table_obj);
1331 if (r) {
1332 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1333 return r;
1334 }
1335 }
1336
1337 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1338 if (unlikely(r != 0)) {
1339 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1340 return r;
1341 }
1342 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1343 &adev->gfx.rlc.cp_table_gpu_addr);
1344 if (r) {
1345 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
62d2ce4b 1346 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
fb16007b
AD
1347 return r;
1348 }
1349 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1350 if (r) {
1351 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1352 return r;
1353 }
1354
1355 cz_init_cp_jump_table(adev);
1356
1357 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1358 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1359 }
1360
2b6cd977
EH
1361 return 0;
1362}
1363
aaa36a97
AD
1364static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1365{
1366 int r;
1367
1368 if (adev->gfx.mec.hpd_eop_obj) {
c81a1a74 1369 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
aaa36a97
AD
1370 if (unlikely(r != 0))
1371 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1372 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1373 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
aaa36a97
AD
1374 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1375 adev->gfx.mec.hpd_eop_obj = NULL;
1376 }
1377}
1378
aaa36a97
AD
1379static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1380{
1381 int r;
1382 u32 *hpd;
42794b27 1383 size_t mec_hpd_size;
aaa36a97 1384
78c16834
AR
1385 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1386
78c16834 1387 /* take ownership of the relevant compute queues */
41f6a99a 1388 amdgpu_gfx_compute_queue_acquire(adev);
78c16834
AR
1389
1390 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
aaa36a97
AD
1391
1392 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1393 r = amdgpu_bo_create(adev,
42794b27 1394 mec_hpd_size,
aaa36a97 1395 PAGE_SIZE, true,
72d7668b 1396 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1397 &adev->gfx.mec.hpd_eop_obj);
1398 if (r) {
1399 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1400 return r;
1401 }
1402 }
1403
1404 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1405 if (unlikely(r != 0)) {
1406 gfx_v8_0_mec_fini(adev);
1407 return r;
1408 }
1409 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1410 &adev->gfx.mec.hpd_eop_gpu_addr);
1411 if (r) {
1412 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1413 gfx_v8_0_mec_fini(adev);
1414 return r;
1415 }
1416 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1417 if (r) {
1418 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1419 gfx_v8_0_mec_fini(adev);
1420 return r;
1421 }
1422
42794b27 1423 memset(hpd, 0, mec_hpd_size);
aaa36a97
AD
1424
1425 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1426 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1427
1428 return 0;
1429}
1430
ccba7691
AD
1431static const u32 vgpr_init_compute_shader[] =
1432{
1433 0x7e000209, 0x7e020208,
1434 0x7e040207, 0x7e060206,
1435 0x7e080205, 0x7e0a0204,
1436 0x7e0c0203, 0x7e0e0202,
1437 0x7e100201, 0x7e120200,
1438 0x7e140209, 0x7e160208,
1439 0x7e180207, 0x7e1a0206,
1440 0x7e1c0205, 0x7e1e0204,
1441 0x7e200203, 0x7e220202,
1442 0x7e240201, 0x7e260200,
1443 0x7e280209, 0x7e2a0208,
1444 0x7e2c0207, 0x7e2e0206,
1445 0x7e300205, 0x7e320204,
1446 0x7e340203, 0x7e360202,
1447 0x7e380201, 0x7e3a0200,
1448 0x7e3c0209, 0x7e3e0208,
1449 0x7e400207, 0x7e420206,
1450 0x7e440205, 0x7e460204,
1451 0x7e480203, 0x7e4a0202,
1452 0x7e4c0201, 0x7e4e0200,
1453 0x7e500209, 0x7e520208,
1454 0x7e540207, 0x7e560206,
1455 0x7e580205, 0x7e5a0204,
1456 0x7e5c0203, 0x7e5e0202,
1457 0x7e600201, 0x7e620200,
1458 0x7e640209, 0x7e660208,
1459 0x7e680207, 0x7e6a0206,
1460 0x7e6c0205, 0x7e6e0204,
1461 0x7e700203, 0x7e720202,
1462 0x7e740201, 0x7e760200,
1463 0x7e780209, 0x7e7a0208,
1464 0x7e7c0207, 0x7e7e0206,
1465 0xbf8a0000, 0xbf810000,
1466};
1467
1468static const u32 sgpr_init_compute_shader[] =
1469{
1470 0xbe8a0100, 0xbe8c0102,
1471 0xbe8e0104, 0xbe900106,
1472 0xbe920108, 0xbe940100,
1473 0xbe960102, 0xbe980104,
1474 0xbe9a0106, 0xbe9c0108,
1475 0xbe9e0100, 0xbea00102,
1476 0xbea20104, 0xbea40106,
1477 0xbea60108, 0xbea80100,
1478 0xbeaa0102, 0xbeac0104,
1479 0xbeae0106, 0xbeb00108,
1480 0xbeb20100, 0xbeb40102,
1481 0xbeb60104, 0xbeb80106,
1482 0xbeba0108, 0xbebc0100,
1483 0xbebe0102, 0xbec00104,
1484 0xbec20106, 0xbec40108,
1485 0xbec60100, 0xbec80102,
1486 0xbee60004, 0xbee70005,
1487 0xbeea0006, 0xbeeb0007,
1488 0xbee80008, 0xbee90009,
1489 0xbefc0000, 0xbf8a0000,
1490 0xbf810000, 0x00000000,
1491};
1492
1493static const u32 vgpr_init_regs[] =
1494{
1495 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1496 mmCOMPUTE_RESOURCE_LIMITS, 0,
1497 mmCOMPUTE_NUM_THREAD_X, 256*4,
1498 mmCOMPUTE_NUM_THREAD_Y, 1,
1499 mmCOMPUTE_NUM_THREAD_Z, 1,
1500 mmCOMPUTE_PGM_RSRC2, 20,
1501 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1502 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1503 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1504 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1505 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1506 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1507 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1508 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1509 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1510 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1511};
1512
1513static const u32 sgpr1_init_regs[] =
1514{
1515 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1516 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1517 mmCOMPUTE_NUM_THREAD_X, 256*5,
1518 mmCOMPUTE_NUM_THREAD_Y, 1,
1519 mmCOMPUTE_NUM_THREAD_Z, 1,
1520 mmCOMPUTE_PGM_RSRC2, 20,
1521 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1522 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1523 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1524 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1525 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1526 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1527 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1528 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1529 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1530 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1531};
1532
1533static const u32 sgpr2_init_regs[] =
1534{
1535 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1536 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1537 mmCOMPUTE_NUM_THREAD_X, 256*5,
1538 mmCOMPUTE_NUM_THREAD_Y, 1,
1539 mmCOMPUTE_NUM_THREAD_Z, 1,
1540 mmCOMPUTE_PGM_RSRC2, 20,
1541 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1542 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1543 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1544 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1545 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1546 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1547 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1548 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1549 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1550 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1551};
1552
1553static const u32 sec_ded_counter_registers[] =
1554{
1555 mmCPC_EDC_ATC_CNT,
1556 mmCPC_EDC_SCRATCH_CNT,
1557 mmCPC_EDC_UCODE_CNT,
1558 mmCPF_EDC_ATC_CNT,
1559 mmCPF_EDC_ROQ_CNT,
1560 mmCPF_EDC_TAG_CNT,
1561 mmCPG_EDC_ATC_CNT,
1562 mmCPG_EDC_DMA_CNT,
1563 mmCPG_EDC_TAG_CNT,
1564 mmDC_EDC_CSINVOC_CNT,
1565 mmDC_EDC_RESTORE_CNT,
1566 mmDC_EDC_STATE_CNT,
1567 mmGDS_EDC_CNT,
1568 mmGDS_EDC_GRBM_CNT,
1569 mmGDS_EDC_OA_DED,
1570 mmSPI_EDC_CNT,
1571 mmSQC_ATC_EDC_GATCL1_CNT,
1572 mmSQC_EDC_CNT,
1573 mmSQ_EDC_DED_CNT,
1574 mmSQ_EDC_INFO,
1575 mmSQ_EDC_SEC_CNT,
1576 mmTCC_EDC_CNT,
1577 mmTCP_ATC_EDC_GATCL1_CNT,
1578 mmTCP_EDC_CNT,
1579 mmTD_EDC_CNT
1580};
1581
1582static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1583{
1584 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1585 struct amdgpu_ib ib;
f54d1867 1586 struct dma_fence *f = NULL;
ccba7691
AD
1587 int r, i;
1588 u32 tmp;
1589 unsigned total_size, vgpr_offset, sgpr_offset;
1590 u64 gpu_addr;
1591
1592 /* only supported on CZ */
1593 if (adev->asic_type != CHIP_CARRIZO)
1594 return 0;
1595
1596 /* bail if the compute ring is not ready */
1597 if (!ring->ready)
1598 return 0;
1599
1600 tmp = RREG32(mmGB_EDC_MODE);
1601 WREG32(mmGB_EDC_MODE, 0);
1602
1603 total_size =
1604 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1605 total_size +=
1606 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1607 total_size +=
1608 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1609 total_size = ALIGN(total_size, 256);
1610 vgpr_offset = total_size;
1611 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1612 sgpr_offset = total_size;
1613 total_size += sizeof(sgpr_init_compute_shader);
1614
1615 /* allocate an indirect buffer to put the commands in */
1616 memset(&ib, 0, sizeof(ib));
b07c60c0 1617 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1618 if (r) {
1619 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1620 return r;
1621 }
1622
1623 /* load the compute shaders */
1624 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1625 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1626
1627 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1628 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1629
1630 /* init the ib length to 0 */
1631 ib.length_dw = 0;
1632
1633 /* VGPR */
1634 /* write the register state for the compute dispatch */
1635 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1636 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1637 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1638 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1639 }
1640 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1641 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1642 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1643 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1644 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1645 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1646
1647 /* write dispatch packet */
1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1649 ib.ptr[ib.length_dw++] = 8; /* x */
1650 ib.ptr[ib.length_dw++] = 1; /* y */
1651 ib.ptr[ib.length_dw++] = 1; /* z */
1652 ib.ptr[ib.length_dw++] =
1653 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1654
1655 /* write CS partial flush packet */
1656 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1657 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1658
1659 /* SGPR1 */
1660 /* write the register state for the compute dispatch */
1661 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1662 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1663 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1664 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1665 }
1666 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1667 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1668 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1669 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1670 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1671 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1672
1673 /* write dispatch packet */
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1675 ib.ptr[ib.length_dw++] = 8; /* x */
1676 ib.ptr[ib.length_dw++] = 1; /* y */
1677 ib.ptr[ib.length_dw++] = 1; /* z */
1678 ib.ptr[ib.length_dw++] =
1679 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1680
1681 /* write CS partial flush packet */
1682 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1683 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1684
1685 /* SGPR2 */
1686 /* write the register state for the compute dispatch */
1687 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1688 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1689 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1690 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1691 }
1692 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1693 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1695 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1696 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1697 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1698
1699 /* write dispatch packet */
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1701 ib.ptr[ib.length_dw++] = 8; /* x */
1702 ib.ptr[ib.length_dw++] = 1; /* y */
1703 ib.ptr[ib.length_dw++] = 1; /* z */
1704 ib.ptr[ib.length_dw++] =
1705 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1706
1707 /* write CS partial flush packet */
1708 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1709 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1710
1711 /* shedule the ib on the ring */
50ddc75e 1712 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1713 if (r) {
1714 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1715 goto fail;
1716 }
1717
1718 /* wait for the GPU to finish processing the IB */
f54d1867 1719 r = dma_fence_wait(f, false);
ccba7691
AD
1720 if (r) {
1721 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1722 goto fail;
1723 }
1724
1725 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1726 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1727 WREG32(mmGB_EDC_MODE, tmp);
1728
1729 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1730 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1731 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1732
1733
1734 /* read back registers to clear the counters */
1735 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1736 RREG32(sec_ded_counter_registers[i]);
1737
1738fail:
cc55c45d 1739 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1740 dma_fence_put(f);
ccba7691
AD
1741
1742 return r;
1743}
1744
68182d90 1745static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1746{
1747 u32 gb_addr_config;
1748 u32 mc_shared_chmap, mc_arb_ramcfg;
1749 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1750 u32 tmp;
68182d90 1751 int ret;
0bde3a95
AD
1752
1753 switch (adev->asic_type) {
1754 case CHIP_TOPAZ:
1755 adev->gfx.config.max_shader_engines = 1;
1756 adev->gfx.config.max_tile_pipes = 2;
1757 adev->gfx.config.max_cu_per_sh = 6;
1758 adev->gfx.config.max_sh_per_se = 1;
1759 adev->gfx.config.max_backends_per_se = 2;
1760 adev->gfx.config.max_texture_channel_caches = 2;
1761 adev->gfx.config.max_gprs = 256;
1762 adev->gfx.config.max_gs_threads = 32;
1763 adev->gfx.config.max_hw_contexts = 8;
1764
1765 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1766 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1767 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1768 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1769 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1770 break;
1771 case CHIP_FIJI:
1772 adev->gfx.config.max_shader_engines = 4;
1773 adev->gfx.config.max_tile_pipes = 16;
1774 adev->gfx.config.max_cu_per_sh = 16;
1775 adev->gfx.config.max_sh_per_se = 1;
1776 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1777 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1778 adev->gfx.config.max_gprs = 256;
1779 adev->gfx.config.max_gs_threads = 32;
1780 adev->gfx.config.max_hw_contexts = 8;
1781
68182d90
FC
1782 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1783 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1784 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1785 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1786 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1787 break;
2cc0c0b5 1788 case CHIP_POLARIS11:
c4642a47 1789 case CHIP_POLARIS12:
68182d90
FC
1790 ret = amdgpu_atombios_get_gfx_info(adev);
1791 if (ret)
1792 return ret;
1793 adev->gfx.config.max_gprs = 256;
1794 adev->gfx.config.max_gs_threads = 32;
1795 adev->gfx.config.max_hw_contexts = 8;
1796
1797 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1801 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1802 break;
2cc0c0b5 1803 case CHIP_POLARIS10:
68182d90
FC
1804 ret = amdgpu_atombios_get_gfx_info(adev);
1805 if (ret)
1806 return ret;
1807 adev->gfx.config.max_gprs = 256;
1808 adev->gfx.config.max_gs_threads = 32;
1809 adev->gfx.config.max_hw_contexts = 8;
1810
0bde3a95
AD
1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1816 break;
1817 case CHIP_TONGA:
1818 adev->gfx.config.max_shader_engines = 4;
1819 adev->gfx.config.max_tile_pipes = 8;
1820 adev->gfx.config.max_cu_per_sh = 8;
1821 adev->gfx.config.max_sh_per_se = 1;
1822 adev->gfx.config.max_backends_per_se = 2;
1823 adev->gfx.config.max_texture_channel_caches = 8;
1824 adev->gfx.config.max_gprs = 256;
1825 adev->gfx.config.max_gs_threads = 32;
1826 adev->gfx.config.max_hw_contexts = 8;
1827
1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1833 break;
1834 case CHIP_CARRIZO:
1835 adev->gfx.config.max_shader_engines = 1;
1836 adev->gfx.config.max_tile_pipes = 2;
1837 adev->gfx.config.max_sh_per_se = 1;
1838 adev->gfx.config.max_backends_per_se = 2;
943c05bd 1839 adev->gfx.config.max_cu_per_sh = 8;
0bde3a95
AD
1840 adev->gfx.config.max_texture_channel_caches = 2;
1841 adev->gfx.config.max_gprs = 256;
1842 adev->gfx.config.max_gs_threads = 32;
1843 adev->gfx.config.max_hw_contexts = 8;
1844
e3c7656c
SL
1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1850 break;
1851 case CHIP_STONEY:
1852 adev->gfx.config.max_shader_engines = 1;
1853 adev->gfx.config.max_tile_pipes = 2;
1854 adev->gfx.config.max_sh_per_se = 1;
1855 adev->gfx.config.max_backends_per_se = 1;
943c05bd 1856 adev->gfx.config.max_cu_per_sh = 3;
e3c7656c
SL
1857 adev->gfx.config.max_texture_channel_caches = 2;
1858 adev->gfx.config.max_gprs = 256;
1859 adev->gfx.config.max_gs_threads = 16;
1860 adev->gfx.config.max_hw_contexts = 8;
1861
0bde3a95
AD
1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1867 break;
1868 default:
1869 adev->gfx.config.max_shader_engines = 2;
1870 adev->gfx.config.max_tile_pipes = 4;
1871 adev->gfx.config.max_cu_per_sh = 2;
1872 adev->gfx.config.max_sh_per_se = 1;
1873 adev->gfx.config.max_backends_per_se = 2;
1874 adev->gfx.config.max_texture_channel_caches = 4;
1875 adev->gfx.config.max_gprs = 256;
1876 adev->gfx.config.max_gs_threads = 32;
1877 adev->gfx.config.max_hw_contexts = 8;
1878
1879 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1883 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1884 break;
1885 }
1886
1887 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1888 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1889 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1890
1891 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1892 adev->gfx.config.mem_max_burst_length_bytes = 256;
1893 if (adev->flags & AMD_IS_APU) {
1894 /* Get memory bank mapping mode. */
1895 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1896 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1897 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1898
1899 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1900 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1901 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1902
1903 /* Validate settings in case only one DIMM installed. */
1904 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1905 dimm00_addr_map = 0;
1906 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1907 dimm01_addr_map = 0;
1908 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1909 dimm10_addr_map = 0;
1910 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1911 dimm11_addr_map = 0;
1912
1913 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1914 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1915 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1916 adev->gfx.config.mem_row_size_in_kb = 2;
1917 else
1918 adev->gfx.config.mem_row_size_in_kb = 1;
1919 } else {
1920 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1921 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1922 if (adev->gfx.config.mem_row_size_in_kb > 4)
1923 adev->gfx.config.mem_row_size_in_kb = 4;
1924 }
1925
1926 adev->gfx.config.shader_engine_tile_size = 32;
1927 adev->gfx.config.num_gpus = 1;
1928 adev->gfx.config.multi_gpu_tile_size = 64;
1929
1930 /* fix up row size */
1931 switch (adev->gfx.config.mem_row_size_in_kb) {
1932 case 1:
1933 default:
1934 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1935 break;
1936 case 2:
1937 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1938 break;
1939 case 4:
1940 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1941 break;
1942 }
1943 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
1944
1945 return 0;
0bde3a95
AD
1946}
1947
e33fec48
AR
1948static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1949 int mec, int pipe, int queue)
1950{
1951 int r;
1952 unsigned irq_type;
1953 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1954
1955 ring = &adev->gfx.compute_ring[ring_id];
1956
1957 /* mec0 is me1 */
1958 ring->me = mec + 1;
1959 ring->pipe = pipe;
1960 ring->queue = queue;
1961
1962 ring->ring_obj = NULL;
1963 ring->use_doorbell = true;
1964 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1965 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1966 + (ring_id * GFX8_MEC_HPD_SIZE);
1967 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1968
1969 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1970 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1971 + ring->pipe;
1972
1973 /* type-2 packets are deprecated on MEC, use type-3 instead */
1974 r = amdgpu_ring_init(adev, ring, 1024,
1975 &adev->gfx.eop_irq, irq_type);
1976 if (r)
1977 return r;
1978
1979
1980 return 0;
1981}
1982
5fc3aeeb 1983static int gfx_v8_0_sw_init(void *handle)
aaa36a97 1984{
e33fec48 1985 int i, j, k, r, ring_id;
aaa36a97 1986 struct amdgpu_ring *ring;
4e638ae9 1987 struct amdgpu_kiq *kiq;
5fc3aeeb 1988 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 1989
4853bbb6
AD
1990 switch (adev->asic_type) {
1991 case CHIP_FIJI:
1992 case CHIP_TONGA:
1993 case CHIP_POLARIS11:
1994 case CHIP_POLARIS12:
1995 case CHIP_POLARIS10:
1996 case CHIP_CARRIZO:
1997 adev->gfx.mec.num_mec = 2;
1998 break;
1999 case CHIP_TOPAZ:
2000 case CHIP_STONEY:
2001 default:
2002 adev->gfx.mec.num_mec = 1;
2003 break;
2004 }
2005
2006 adev->gfx.mec.num_pipe_per_mec = 4;
2007 adev->gfx.mec.num_queue_per_pipe = 8;
2008
4e638ae9 2009 /* KIQ event */
d766e6a3 2010 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2011 if (r)
2012 return r;
2013
aaa36a97 2014 /* EOP Event */
d766e6a3 2015 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2016 if (r)
2017 return r;
2018
2019 /* Privileged reg */
d766e6a3
AD
2020 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2021 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2022 if (r)
2023 return r;
2024
2025 /* Privileged inst */
d766e6a3
AD
2026 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2027 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2028 if (r)
2029 return r;
2030
2031 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2032
2033 gfx_v8_0_scratch_init(adev);
2034
2035 r = gfx_v8_0_init_microcode(adev);
2036 if (r) {
2037 DRM_ERROR("Failed to load gfx firmware!\n");
2038 return r;
2039 }
2040
2b6cd977
EH
2041 r = gfx_v8_0_rlc_init(adev);
2042 if (r) {
2043 DRM_ERROR("Failed to init rlc BOs!\n");
2044 return r;
2045 }
2046
aaa36a97
AD
2047 r = gfx_v8_0_mec_init(adev);
2048 if (r) {
2049 DRM_ERROR("Failed to init MEC BOs!\n");
2050 return r;
2051 }
2052
aaa36a97
AD
2053 /* set up the gfx ring */
2054 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2055 ring = &adev->gfx.gfx_ring[i];
2056 ring->ring_obj = NULL;
2057 sprintf(ring->name, "gfx");
2058 /* no gfx doorbells on iceland */
2059 if (adev->asic_type != CHIP_TOPAZ) {
2060 ring->use_doorbell = true;
2061 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2062 }
2063
79887142
CK
2064 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2065 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2066 if (r)
2067 return r;
2068 }
2069
aaa36a97 2070
e33fec48
AR
2071 /* set up the compute queues - allocate horizontally across pipes */
2072 ring_id = 0;
2073 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2074 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2075 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2db0cdbe 2076 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
e33fec48 2077 continue;
78c16834 2078
e33fec48
AR
2079 r = gfx_v8_0_compute_ring_init(adev,
2080 ring_id,
2081 i, k, j);
2082 if (r)
2083 return r;
78c16834 2084
e33fec48
AR
2085 ring_id++;
2086 }
aaa36a97 2087 }
aaa36a97
AD
2088 }
2089
71c37505 2090 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
b4e40676
DP
2091 if (r) {
2092 DRM_ERROR("Failed to init KIQ BOs!\n");
2093 return r;
2094 }
596c67d0 2095
b4e40676 2096 kiq = &adev->gfx.kiq;
71c37505 2097 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
b4e40676
DP
2098 if (r)
2099 return r;
596c67d0 2100
b4e40676 2101 /* create MQD for all compute queues as well as KIQ for SRIOV case */
6b0fa871 2102 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
b4e40676
DP
2103 if (r)
2104 return r;
596c67d0 2105
aaa36a97 2106 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2107 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2108 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2109 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2110 if (r)
2111 return r;
2112
78bbbd9c
CK
2113 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2114 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2115 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2116 if (r)
2117 return r;
2118
78bbbd9c
CK
2119 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2120 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2121 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2122 if (r)
2123 return r;
2124
a101a899
KW
2125 adev->gfx.ce_ram_size = 0x8000;
2126
68182d90
FC
2127 r = gfx_v8_0_gpu_early_init(adev);
2128 if (r)
2129 return r;
0bde3a95 2130
aaa36a97
AD
2131 return 0;
2132}
2133
5fc3aeeb 2134static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2135{
2136 int i;
5fc3aeeb 2137 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2138
8640faed
JZ
2139 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2140 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2141 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2142
2143 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2144 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2145 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2146 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2147
b9683c21 2148 amdgpu_gfx_compute_mqd_sw_fini(adev);
71c37505
AD
2149 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2150 amdgpu_gfx_kiq_fini(adev);
596c67d0 2151
aaa36a97 2152 gfx_v8_0_mec_fini(adev);
2b6cd977 2153 gfx_v8_0_rlc_fini(adev);
13331ac3 2154 gfx_v8_0_free_microcode(adev);
2b6cd977 2155
aaa36a97
AD
2156 return 0;
2157}
2158
2159static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2160{
90bea0ab 2161 uint32_t *modearray, *mod2array;
eb64526f
TSD
2162 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2163 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2164 u32 reg_offset;
aaa36a97 2165
90bea0ab
TSD
2166 modearray = adev->gfx.config.tile_mode_array;
2167 mod2array = adev->gfx.config.macrotile_mode_array;
2168
2169 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2170 modearray[reg_offset] = 0;
2171
2172 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2173 mod2array[reg_offset] = 0;
aaa36a97
AD
2174
2175 switch (adev->asic_type) {
2176 case CHIP_TOPAZ:
90bea0ab
TSD
2177 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2181 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 PIPE_CONFIG(ADDR_SURF_P2) |
2183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2185 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2186 PIPE_CONFIG(ADDR_SURF_P2) |
2187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2189 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190 PIPE_CONFIG(ADDR_SURF_P2) |
2191 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2193 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 PIPE_CONFIG(ADDR_SURF_P2) |
2195 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2197 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198 PIPE_CONFIG(ADDR_SURF_P2) |
2199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2201 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202 PIPE_CONFIG(ADDR_SURF_P2) |
2203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2205 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2206 PIPE_CONFIG(ADDR_SURF_P2));
2207 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2215 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2219 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2220 PIPE_CONFIG(ADDR_SURF_P2) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2223 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 PIPE_CONFIG(ADDR_SURF_P2) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2227 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2235 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2236 PIPE_CONFIG(ADDR_SURF_P2) |
2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2240 PIPE_CONFIG(ADDR_SURF_P2) |
2241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2243 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2244 PIPE_CONFIG(ADDR_SURF_P2) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2247 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2248 PIPE_CONFIG(ADDR_SURF_P2) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2251 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2252 PIPE_CONFIG(ADDR_SURF_P2) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2255 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2256 PIPE_CONFIG(ADDR_SURF_P2) |
2257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2259 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2260 PIPE_CONFIG(ADDR_SURF_P2) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2263 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2264 PIPE_CONFIG(ADDR_SURF_P2) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2267 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272 PIPE_CONFIG(ADDR_SURF_P2) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2275 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P2) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2279
2280 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283 NUM_BANKS(ADDR_SURF_8_BANK));
2284 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2287 NUM_BANKS(ADDR_SURF_8_BANK));
2288 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2291 NUM_BANKS(ADDR_SURF_8_BANK));
2292 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295 NUM_BANKS(ADDR_SURF_8_BANK));
2296 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2299 NUM_BANKS(ADDR_SURF_8_BANK));
2300 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2303 NUM_BANKS(ADDR_SURF_8_BANK));
2304 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2307 NUM_BANKS(ADDR_SURF_8_BANK));
2308 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2311 NUM_BANKS(ADDR_SURF_16_BANK));
2312 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2315 NUM_BANKS(ADDR_SURF_16_BANK));
2316 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2319 NUM_BANKS(ADDR_SURF_16_BANK));
2320 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2323 NUM_BANKS(ADDR_SURF_16_BANK));
2324 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2327 NUM_BANKS(ADDR_SURF_16_BANK));
2328 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2331 NUM_BANKS(ADDR_SURF_16_BANK));
2332 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335 NUM_BANKS(ADDR_SURF_8_BANK));
2336
2337 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2338 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2339 reg_offset != 23)
2340 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2341
2342 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2343 if (reg_offset != 7)
2344 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2345
8cdacf44 2346 break;
af15a2d5 2347 case CHIP_FIJI:
90bea0ab
TSD
2348 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2360 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2364 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2372 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2376 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2378 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2382 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2395 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2398 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2414 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2418 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2434 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2438 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2439 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2442 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2446 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2450 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2454 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2470
2471 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2474 NUM_BANKS(ADDR_SURF_8_BANK));
2475 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478 NUM_BANKS(ADDR_SURF_8_BANK));
2479 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 NUM_BANKS(ADDR_SURF_8_BANK));
2483 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2486 NUM_BANKS(ADDR_SURF_8_BANK));
2487 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2490 NUM_BANKS(ADDR_SURF_8_BANK));
2491 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494 NUM_BANKS(ADDR_SURF_8_BANK));
2495 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498 NUM_BANKS(ADDR_SURF_8_BANK));
2499 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2502 NUM_BANKS(ADDR_SURF_8_BANK));
2503 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 NUM_BANKS(ADDR_SURF_8_BANK));
2507 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2510 NUM_BANKS(ADDR_SURF_8_BANK));
2511 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514 NUM_BANKS(ADDR_SURF_8_BANK));
2515 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 NUM_BANKS(ADDR_SURF_8_BANK));
2519 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_8_BANK));
2523 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526 NUM_BANKS(ADDR_SURF_4_BANK));
2527
2528 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2529 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2530
2531 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2532 if (reg_offset != 7)
2533 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2534
5f2e816b 2535 break;
aaa36a97 2536 case CHIP_TONGA:
90bea0ab
TSD
2537 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2545 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2549 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2552 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2553 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2557 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2561 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2564 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2565 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2567 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2568 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2569 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2571 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2583 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2587 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2603 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2607 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2619 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2623 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2627 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2628 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2631 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2635 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2638 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2639 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2643 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2659
2660 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2663 NUM_BANKS(ADDR_SURF_16_BANK));
2664 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667 NUM_BANKS(ADDR_SURF_16_BANK));
2668 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671 NUM_BANKS(ADDR_SURF_16_BANK));
2672 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2675 NUM_BANKS(ADDR_SURF_16_BANK));
2676 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2679 NUM_BANKS(ADDR_SURF_16_BANK));
2680 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683 NUM_BANKS(ADDR_SURF_16_BANK));
2684 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 NUM_BANKS(ADDR_SURF_16_BANK));
2688 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2691 NUM_BANKS(ADDR_SURF_16_BANK));
2692 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2695 NUM_BANKS(ADDR_SURF_16_BANK));
2696 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2699 NUM_BANKS(ADDR_SURF_16_BANK));
2700 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2703 NUM_BANKS(ADDR_SURF_16_BANK));
2704 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707 NUM_BANKS(ADDR_SURF_8_BANK));
2708 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2711 NUM_BANKS(ADDR_SURF_4_BANK));
2712 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2715 NUM_BANKS(ADDR_SURF_4_BANK));
2716
2717 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2718 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2719
2720 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2721 if (reg_offset != 7)
2722 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2723
68182d90 2724 break;
2cc0c0b5 2725 case CHIP_POLARIS11:
c4642a47 2726 case CHIP_POLARIS12:
68182d90
FC
2727 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2735 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2739 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2743 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2750 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2751 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2754 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2755 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2758 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2760 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2761 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2769 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2773 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2777 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2789 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2793 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2797 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2809 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2813 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2817 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2821 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2825 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2829 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2833 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2849
2850 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2852 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2853 NUM_BANKS(ADDR_SURF_16_BANK));
2854
2855 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2858 NUM_BANKS(ADDR_SURF_16_BANK));
2859
2860 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863 NUM_BANKS(ADDR_SURF_16_BANK));
2864
2865 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2868 NUM_BANKS(ADDR_SURF_16_BANK));
2869
2870 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2872 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2873 NUM_BANKS(ADDR_SURF_16_BANK));
2874
2875 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2878 NUM_BANKS(ADDR_SURF_16_BANK));
2879
2880 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883 NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888 NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893 NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898 NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2903 NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908 NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913 NUM_BANKS(ADDR_SURF_8_BANK));
2914
2915 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2918 NUM_BANKS(ADDR_SURF_4_BANK));
2919
2920 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2921 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2922
2923 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2924 if (reg_offset != 7)
2925 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2926
2927 break;
2cc0c0b5 2928 case CHIP_POLARIS10:
68182d90
FC
2929 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2941 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2944 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2945 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2948 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2949 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2950 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2952 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2953 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2957 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2959 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2960 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2961 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2963 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2971 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2977 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2979 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2995 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2996 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2999 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3015 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3019 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3020 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3023 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3027 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3030 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3031 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3035 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3045 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3049 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3051
3052 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3055 NUM_BANKS(ADDR_SURF_16_BANK));
3056
3057 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060 NUM_BANKS(ADDR_SURF_16_BANK));
3061
3062 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065 NUM_BANKS(ADDR_SURF_16_BANK));
3066
3067 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3068 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3069 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3070 NUM_BANKS(ADDR_SURF_16_BANK));
3071
3072 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3075 NUM_BANKS(ADDR_SURF_16_BANK));
3076
3077 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3080 NUM_BANKS(ADDR_SURF_16_BANK));
3081
3082 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3085 NUM_BANKS(ADDR_SURF_16_BANK));
3086
3087 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 NUM_BANKS(ADDR_SURF_16_BANK));
3091
3092 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095 NUM_BANKS(ADDR_SURF_16_BANK));
3096
3097 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100 NUM_BANKS(ADDR_SURF_16_BANK));
3101
3102 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 NUM_BANKS(ADDR_SURF_16_BANK));
3106
3107 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110 NUM_BANKS(ADDR_SURF_8_BANK));
3111
3112 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115 NUM_BANKS(ADDR_SURF_4_BANK));
3116
3117 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 NUM_BANKS(ADDR_SURF_4_BANK));
3121
3122 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3123 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3124
3125 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3126 if (reg_offset != 7)
3127 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3128
aaa36a97 3129 break;
e3c7656c 3130 case CHIP_STONEY:
90bea0ab
TSD
3131 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3132 PIPE_CONFIG(ADDR_SURF_P2) |
3133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3135 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136 PIPE_CONFIG(ADDR_SURF_P2) |
3137 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3139 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140 PIPE_CONFIG(ADDR_SURF_P2) |
3141 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3143 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144 PIPE_CONFIG(ADDR_SURF_P2) |
3145 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3147 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3148 PIPE_CONFIG(ADDR_SURF_P2) |
3149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3150 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3151 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152 PIPE_CONFIG(ADDR_SURF_P2) |
3153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3154 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3155 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3156 PIPE_CONFIG(ADDR_SURF_P2) |
3157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3159 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3160 PIPE_CONFIG(ADDR_SURF_P2));
3161 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3162 PIPE_CONFIG(ADDR_SURF_P2) |
3163 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3165 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3173 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3185 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186 PIPE_CONFIG(ADDR_SURF_P2) |
3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3189 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3190 PIPE_CONFIG(ADDR_SURF_P2) |
3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3194 PIPE_CONFIG(ADDR_SURF_P2) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3197 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3198 PIPE_CONFIG(ADDR_SURF_P2) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3201 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3205 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3209 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3210 PIPE_CONFIG(ADDR_SURF_P2) |
3211 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3213 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3217 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3221 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3225 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226 PIPE_CONFIG(ADDR_SURF_P2) |
3227 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3233
3234 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237 NUM_BANKS(ADDR_SURF_8_BANK));
3238 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 NUM_BANKS(ADDR_SURF_8_BANK));
3242 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3245 NUM_BANKS(ADDR_SURF_8_BANK));
3246 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3249 NUM_BANKS(ADDR_SURF_8_BANK));
3250 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3253 NUM_BANKS(ADDR_SURF_8_BANK));
3254 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3257 NUM_BANKS(ADDR_SURF_8_BANK));
3258 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261 NUM_BANKS(ADDR_SURF_8_BANK));
3262 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 NUM_BANKS(ADDR_SURF_16_BANK));
3266 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3269 NUM_BANKS(ADDR_SURF_16_BANK));
3270 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273 NUM_BANKS(ADDR_SURF_16_BANK));
3274 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 NUM_BANKS(ADDR_SURF_16_BANK));
3278 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3281 NUM_BANKS(ADDR_SURF_16_BANK));
3282 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3285 NUM_BANKS(ADDR_SURF_16_BANK));
3286 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3289 NUM_BANKS(ADDR_SURF_8_BANK));
3290
3291 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3292 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3293 reg_offset != 23)
3294 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3295
3296 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3297 if (reg_offset != 7)
3298 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3299
e3c7656c 3300 break;
aaa36a97 3301 default:
90bea0ab
TSD
3302 dev_warn(adev->dev,
3303 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3304 adev->asic_type);
3305
3306 case CHIP_CARRIZO:
3307 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3311 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3314 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3315 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316 PIPE_CONFIG(ADDR_SURF_P2) |
3317 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3319 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320 PIPE_CONFIG(ADDR_SURF_P2) |
3321 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3322 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3323 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3324 PIPE_CONFIG(ADDR_SURF_P2) |
3325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3327 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328 PIPE_CONFIG(ADDR_SURF_P2) |
3329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3331 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3332 PIPE_CONFIG(ADDR_SURF_P2) |
3333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3335 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3336 PIPE_CONFIG(ADDR_SURF_P2));
3337 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3338 PIPE_CONFIG(ADDR_SURF_P2) |
3339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3341 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342 PIPE_CONFIG(ADDR_SURF_P2) |
3343 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3345 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3346 PIPE_CONFIG(ADDR_SURF_P2) |
3347 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3349 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3350 PIPE_CONFIG(ADDR_SURF_P2) |
3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3353 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354 PIPE_CONFIG(ADDR_SURF_P2) |
3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3357 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3358 PIPE_CONFIG(ADDR_SURF_P2) |
3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3361 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362 PIPE_CONFIG(ADDR_SURF_P2) |
3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3365 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3366 PIPE_CONFIG(ADDR_SURF_P2) |
3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3370 PIPE_CONFIG(ADDR_SURF_P2) |
3371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3373 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3374 PIPE_CONFIG(ADDR_SURF_P2) |
3375 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3377 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3378 PIPE_CONFIG(ADDR_SURF_P2) |
3379 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3381 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3382 PIPE_CONFIG(ADDR_SURF_P2) |
3383 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3385 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3386 PIPE_CONFIG(ADDR_SURF_P2) |
3387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3389 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3390 PIPE_CONFIG(ADDR_SURF_P2) |
3391 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3393 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3394 PIPE_CONFIG(ADDR_SURF_P2) |
3395 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3397 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3398 PIPE_CONFIG(ADDR_SURF_P2) |
3399 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3401 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402 PIPE_CONFIG(ADDR_SURF_P2) |
3403 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3405 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3406 PIPE_CONFIG(ADDR_SURF_P2) |
3407 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3409
3410 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413 NUM_BANKS(ADDR_SURF_8_BANK));
3414 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 NUM_BANKS(ADDR_SURF_8_BANK));
3418 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3421 NUM_BANKS(ADDR_SURF_8_BANK));
3422 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3425 NUM_BANKS(ADDR_SURF_8_BANK));
3426 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3429 NUM_BANKS(ADDR_SURF_8_BANK));
3430 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3433 NUM_BANKS(ADDR_SURF_8_BANK));
3434 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437 NUM_BANKS(ADDR_SURF_8_BANK));
3438 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3441 NUM_BANKS(ADDR_SURF_16_BANK));
3442 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3445 NUM_BANKS(ADDR_SURF_16_BANK));
3446 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3449 NUM_BANKS(ADDR_SURF_16_BANK));
3450 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3453 NUM_BANKS(ADDR_SURF_16_BANK));
3454 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3457 NUM_BANKS(ADDR_SURF_16_BANK));
3458 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3461 NUM_BANKS(ADDR_SURF_16_BANK));
3462 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3465 NUM_BANKS(ADDR_SURF_8_BANK));
3466
3467 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3468 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3469 reg_offset != 23)
3470 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3471
3472 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3473 if (reg_offset != 7)
3474 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3475
3476 break;
aaa36a97
AD
3477 }
3478}
3479
05fb7291 3480static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3481 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3482{
9559ef5b
TSD
3483 u32 data;
3484
3485 if (instance == 0xffffffff)
3486 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3487 else
3488 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3489
5003f278 3490 if (se_num == 0xffffffff)
aaa36a97 3491 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3492 else
aaa36a97 3493 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3494
3495 if (sh_num == 0xffffffff)
3496 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3497 else
aaa36a97 3498 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3499
aaa36a97
AD
3500 WREG32(mmGRBM_GFX_INDEX, data);
3501}
3502
8f8e00c1 3503static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3504{
3505 u32 data, mask;
3506
5003f278
TSD
3507 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3508 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3509
5003f278 3510 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3511
378506a7
AD
3512 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3513 adev->gfx.config.max_sh_per_se);
aaa36a97 3514
8f8e00c1 3515 return (~data) & mask;
aaa36a97
AD
3516}
3517
167ac573
HR
3518static void
3519gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3520{
3521 switch (adev->asic_type) {
3522 case CHIP_FIJI:
3523 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3524 RB_XSEL2(1) | PKR_MAP(2) |
3525 PKR_XSEL(1) | PKR_YSEL(1) |
3526 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3527 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3528 SE_PAIR_YSEL(2);
3529 break;
3530 case CHIP_TONGA:
3531 case CHIP_POLARIS10:
3532 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3533 SE_XSEL(1) | SE_YSEL(1);
3534 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3535 SE_PAIR_YSEL(2);
3536 break;
3537 case CHIP_TOPAZ:
3538 case CHIP_CARRIZO:
3539 *rconf |= RB_MAP_PKR0(2);
3540 *rconf1 |= 0x0;
3541 break;
3542 case CHIP_POLARIS11:
c4642a47 3543 case CHIP_POLARIS12:
167ac573
HR
3544 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3545 SE_XSEL(1) | SE_YSEL(1);
3546 *rconf1 |= 0x0;
3547 break;
3548 case CHIP_STONEY:
3549 *rconf |= 0x0;
3550 *rconf1 |= 0x0;
3551 break;
3552 default:
3553 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3554 break;
3555 }
3556}
3557
3558static void
3559gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3560 u32 raster_config, u32 raster_config_1,
3561 unsigned rb_mask, unsigned num_rb)
3562{
3563 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3564 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3565 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3566 unsigned rb_per_se = num_rb / num_se;
3567 unsigned se_mask[4];
3568 unsigned se;
3569
3570 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3571 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3572 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3573 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3574
3575 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3576 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3577 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3578
3579 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3580 (!se_mask[2] && !se_mask[3]))) {
3581 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3582
3583 if (!se_mask[0] && !se_mask[1]) {
3584 raster_config_1 |=
3585 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3586 } else {
3587 raster_config_1 |=
3588 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3589 }
3590 }
3591
3592 for (se = 0; se < num_se; se++) {
3593 unsigned raster_config_se = raster_config;
3594 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3595 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3596 int idx = (se / 2) * 2;
3597
3598 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3599 raster_config_se &= ~SE_MAP_MASK;
3600
3601 if (!se_mask[idx]) {
3602 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3603 } else {
3604 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3605 }
3606 }
3607
3608 pkr0_mask &= rb_mask;
3609 pkr1_mask &= rb_mask;
3610 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3611 raster_config_se &= ~PKR_MAP_MASK;
3612
3613 if (!pkr0_mask) {
3614 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3615 } else {
3616 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3617 }
3618 }
3619
3620 if (rb_per_se >= 2) {
3621 unsigned rb0_mask = 1 << (se * rb_per_se);
3622 unsigned rb1_mask = rb0_mask << 1;
3623
3624 rb0_mask &= rb_mask;
3625 rb1_mask &= rb_mask;
3626 if (!rb0_mask || !rb1_mask) {
3627 raster_config_se &= ~RB_MAP_PKR0_MASK;
3628
3629 if (!rb0_mask) {
3630 raster_config_se |=
3631 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3632 } else {
3633 raster_config_se |=
3634 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3635 }
3636 }
3637
3638 if (rb_per_se > 2) {
3639 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3640 rb1_mask = rb0_mask << 1;
3641 rb0_mask &= rb_mask;
3642 rb1_mask &= rb_mask;
3643 if (!rb0_mask || !rb1_mask) {
3644 raster_config_se &= ~RB_MAP_PKR1_MASK;
3645
3646 if (!rb0_mask) {
3647 raster_config_se |=
3648 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3649 } else {
3650 raster_config_se |=
3651 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3652 }
3653 }
3654 }
3655 }
3656
3657 /* GRBM_GFX_INDEX has a different offset on VI */
3658 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3659 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3660 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3661 }
3662
3663 /* GRBM_GFX_INDEX has a different offset on VI */
3664 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3665}
3666
8f8e00c1 3667static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3668{
3669 int i, j;
aac1e3ca 3670 u32 data;
167ac573 3671 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3672 u32 active_rbs = 0;
6157bd7a
FC
3673 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3674 adev->gfx.config.max_sh_per_se;
167ac573 3675 unsigned num_rb_pipes;
aaa36a97
AD
3676
3677 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3678 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3679 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3680 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3681 data = gfx_v8_0_get_rb_active_bitmap(adev);
3682 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3683 rb_bitmap_width_per_sh);
aaa36a97
AD
3684 }
3685 }
9559ef5b 3686 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3687
8f8e00c1 3688 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3689 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3690
3691 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3692 adev->gfx.config.max_shader_engines, 16);
3693
3694 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3695
3696 if (!adev->gfx.config.backend_enable_mask ||
3697 adev->gfx.config.num_rbs >= num_rb_pipes) {
3698 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3699 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3700 } else {
3701 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3702 adev->gfx.config.backend_enable_mask,
3703 num_rb_pipes);
3704 }
3705
392f0c77
AD
3706 /* cache the values for userspace */
3707 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3708 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3709 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3710 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3711 RREG32(mmCC_RB_BACKEND_DISABLE);
3712 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3713 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3714 adev->gfx.config.rb_config[i][j].raster_config =
3715 RREG32(mmPA_SC_RASTER_CONFIG);
3716 adev->gfx.config.rb_config[i][j].raster_config_1 =
3717 RREG32(mmPA_SC_RASTER_CONFIG_1);
3718 }
3719 }
3720 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3721 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3722}
3723
cd06bf68 3724/**
35c7a952 3725 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68 3726 *
dc102c43 3727 * @adev: amdgpu_device pointer
cd06bf68
BG
3728 *
3729 * Initialize compute vmid sh_mem registers
3730 *
3731 */
3732#define DEFAULT_SH_MEM_BASES (0x6000)
3733#define FIRST_COMPUTE_VMID (8)
3734#define LAST_COMPUTE_VMID (16)
35c7a952 3735static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3736{
3737 int i;
3738 uint32_t sh_mem_config;
3739 uint32_t sh_mem_bases;
3740
3741 /*
3742 * Configure apertures:
3743 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3744 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3745 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3746 */
3747 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3748
3749 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3750 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3751 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3752 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3753 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3754 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3755
3756 mutex_lock(&adev->srbm_mutex);
3757 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3758 vi_srbm_select(adev, 0, 0, 0, i);
3759 /* CP and shaders */
3760 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3761 WREG32(mmSH_MEM_APE1_BASE, 1);
3762 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3763 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3764 }
3765 vi_srbm_select(adev, 0, 0, 0, 0);
3766 mutex_unlock(&adev->srbm_mutex);
3767}
3768
df6e2c4a
JZ
3769static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3770{
3771 switch (adev->asic_type) {
3772 default:
3773 adev->gfx.config.double_offchip_lds_buf = 1;
3774 break;
3775 case CHIP_CARRIZO:
3776 case CHIP_STONEY:
3777 adev->gfx.config.double_offchip_lds_buf = 0;
3778 break;
3779 }
3780}
3781
aaa36a97
AD
3782static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3783{
8fe73328 3784 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3785 int i;
3786
61cb8cef 3787 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3788 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3789 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3791
3792 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3793 gfx_v8_0_setup_rb(adev);
7dae69a2 3794 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3795 gfx_v8_0_config_init(adev);
aaa36a97
AD
3796
3797 /* XXX SH_MEM regs */
3798 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3799 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3800 SWIZZLE_ENABLE, 1);
3801 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3802 ELEMENT_SIZE, 1);
3803 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3804 INDEX_STRIDE, 3);
aaa36a97 3805 mutex_lock(&adev->srbm_mutex);
7645670d 3806 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
aaa36a97
AD
3807 vi_srbm_select(adev, 0, 0, 0, i);
3808 /* CP and shaders */
3809 if (i == 0) {
3810 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3811 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3812 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3813 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3814 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3815 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3816 } else {
3817 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3818 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3819 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3820 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3821 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328
JZ
3822 tmp = adev->mc.shared_aperture_start >> 48;
3823 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3824 }
3825
3826 WREG32(mmSH_MEM_APE1_BASE, 1);
3827 WREG32(mmSH_MEM_APE1_LIMIT, 0);
8fe73328 3828 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
aaa36a97
AD
3829 }
3830 vi_srbm_select(adev, 0, 0, 0, 0);
3831 mutex_unlock(&adev->srbm_mutex);
3832
35c7a952 3833 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3834
aaa36a97
AD
3835 mutex_lock(&adev->grbm_idx_mutex);
3836 /*
3837 * making sure that the following register writes will be broadcasted
3838 * to all the shaders
3839 */
9559ef5b 3840 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3841
3842 WREG32(mmPA_SC_FIFO_SIZE,
3843 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3844 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3845 (adev->gfx.config.sc_prim_fifo_size_backend <<
3846 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3847 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3848 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3849 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3850 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3851
3852 tmp = RREG32(mmSPI_ARB_PRIORITY);
3853 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3854 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3855 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3856 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3857 WREG32(mmSPI_ARB_PRIORITY, tmp);
3858
aaa36a97
AD
3859 mutex_unlock(&adev->grbm_idx_mutex);
3860
3861}
3862
3863static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3864{
3865 u32 i, j, k;
3866 u32 mask;
3867
3868 mutex_lock(&adev->grbm_idx_mutex);
3869 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3870 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3871 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3872 for (k = 0; k < adev->usec_timeout; k++) {
3873 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3874 break;
3875 udelay(1);
3876 }
3877 }
3878 }
9559ef5b 3879 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3880 mutex_unlock(&adev->grbm_idx_mutex);
3881
3882 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3883 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3884 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3885 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3886 for (k = 0; k < adev->usec_timeout; k++) {
3887 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3888 break;
3889 udelay(1);
3890 }
3891}
3892
3893static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3894 bool enable)
3895{
3896 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3897
0d07db7e
TSD
3898 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3899 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3900 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3901 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3902
aaa36a97
AD
3903 WREG32(mmCP_INT_CNTL_RING0, tmp);
3904}
3905
2b6cd977
EH
3906static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3907{
3908 /* csib */
3909 WREG32(mmRLC_CSIB_ADDR_HI,
3910 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3911 WREG32(mmRLC_CSIB_ADDR_LO,
3912 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3913 WREG32(mmRLC_CSIB_LENGTH,
3914 adev->gfx.rlc.clear_state_size);
3915}
3916
3917static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3918 int ind_offset,
3919 int list_size,
3920 int *unique_indices,
3921 int *indices_count,
3922 int max_indices,
3923 int *ind_start_offsets,
3924 int *offset_count,
3925 int max_offset)
3926{
3927 int indices;
3928 bool new_entry = true;
3929
3930 for (; ind_offset < list_size; ind_offset++) {
3931
3932 if (new_entry) {
3933 new_entry = false;
3934 ind_start_offsets[*offset_count] = ind_offset;
3935 *offset_count = *offset_count + 1;
3936 BUG_ON(*offset_count >= max_offset);
3937 }
3938
3939 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3940 new_entry = true;
3941 continue;
3942 }
3943
3944 ind_offset += 2;
3945
3946 /* look for the matching indice */
3947 for (indices = 0;
3948 indices < *indices_count;
3949 indices++) {
3950 if (unique_indices[indices] ==
3951 register_list_format[ind_offset])
3952 break;
3953 }
3954
3955 if (indices >= *indices_count) {
3956 unique_indices[*indices_count] =
3957 register_list_format[ind_offset];
3958 indices = *indices_count;
3959 *indices_count = *indices_count + 1;
3960 BUG_ON(*indices_count >= max_indices);
3961 }
3962
3963 register_list_format[ind_offset] = indices;
3964 }
3965}
3966
3967static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3968{
3969 int i, temp, data;
3970 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3971 int indices_count = 0;
3972 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3973 int offset_count = 0;
3974
3975 int list_size;
3976 unsigned int *register_list_format =
3977 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 3978 if (!register_list_format)
2b6cd977
EH
3979 return -ENOMEM;
3980 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3981 adev->gfx.rlc.reg_list_format_size_bytes);
3982
3983 gfx_v8_0_parse_ind_reg_list(register_list_format,
3984 RLC_FormatDirectRegListLength,
3985 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3986 unique_indices,
3987 &indices_count,
3988 sizeof(unique_indices) / sizeof(int),
3989 indirect_start_offsets,
3990 &offset_count,
3991 sizeof(indirect_start_offsets)/sizeof(int));
3992
3993 /* save and restore list */
61cb8cef 3994 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
3995
3996 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3997 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3998 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3999
4000 /* indirect list */
4001 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4002 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4003 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4004
4005 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4006 list_size = list_size >> 1;
4007 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4008 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4009
4010 /* starting offsets starts */
4011 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4012 adev->gfx.rlc.starting_offsets_start);
4013 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4014 WREG32(mmRLC_GPM_SCRATCH_DATA,
4015 indirect_start_offsets[i]);
4016
4017 /* unique indices */
4018 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4019 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4020 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
202e0b22 4021 if (unique_indices[i] != 0) {
b85c9d2a
ML
4022 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4023 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4024 }
2b6cd977
EH
4025 }
4026 kfree(register_list_format);
4027
4028 return 0;
4029}
4030
4031static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4032{
61cb8cef 4033 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4034}
4035
fb16007b 4036static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4037{
4038 uint32_t data;
4039
c4d17b81
RZ
4040 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4041
4042 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4043 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4044 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4045 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4046 WREG32(mmRLC_PG_DELAY, data);
4047
4048 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4049 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4050
f4bfffdd
EH
4051}
4052
2c547165
AD
4053static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4054 bool enable)
4055{
61cb8cef 4056 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4057}
4058
4059static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4060 bool enable)
4061{
61cb8cef 4062 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4063}
4064
4065static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4066{
eb584241 4067 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4068}
4069
2b6cd977
EH
4070static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4071{
c4d17b81
RZ
4072 if ((adev->asic_type == CHIP_CARRIZO) ||
4073 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4074 gfx_v8_0_init_csb(adev);
4075 gfx_v8_0_init_save_restore_list(adev);
4076 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4077 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4078 gfx_v8_0_init_power_gating(adev);
4079 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47
JZ
4080 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4081 (adev->asic_type == CHIP_POLARIS12)) {
c4d17b81
RZ
4082 gfx_v8_0_init_csb(adev);
4083 gfx_v8_0_init_save_restore_list(adev);
4084 gfx_v8_0_enable_save_restore_machine(adev);
4085 gfx_v8_0_init_power_gating(adev);
2b6cd977 4086 }
c4d17b81 4087
2b6cd977
EH
4088}
4089
761c2e82 4090static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4091{
61cb8cef 4092 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4093
4094 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4095 gfx_v8_0_wait_for_rlc_serdes(adev);
4096}
4097
4098static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4099{
61cb8cef 4100 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4101 udelay(50);
61cb8cef
TSD
4102
4103 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4104 udelay(50);
4105}
4106
4107static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4108{
61cb8cef 4109 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4110
4111 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4112 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4113 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4114
4115 udelay(50);
4116}
4117
4118static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4119{
4120 const struct rlc_firmware_header_v2_0 *hdr;
4121 const __le32 *fw_data;
4122 unsigned i, fw_size;
4123
4124 if (!adev->gfx.rlc_fw)
4125 return -EINVAL;
4126
4127 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4128 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4129
4130 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4131 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4132 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4133
4134 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4135 for (i = 0; i < fw_size; i++)
4136 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4137 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4138
4139 return 0;
4140}
4141
4142static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4143{
4144 int r;
6ae81452 4145 u32 tmp;
aaa36a97
AD
4146
4147 gfx_v8_0_rlc_stop(adev);
4148
4149 /* disable CG */
6ae81452
AD
4150 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4151 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4152 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4153 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4154 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
4155 adev->asic_type == CHIP_POLARIS10 ||
4156 adev->asic_type == CHIP_POLARIS12) {
6ae81452
AD
4157 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4158 tmp &= ~0x3;
4159 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4160 }
aaa36a97
AD
4161
4162 /* disable PG */
4163 WREG32(mmRLC_PG_CNTL, 0);
4164
4165 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4166 gfx_v8_0_init_pg(adev);
4167
e61710c5 4168 if (!adev->pp_enabled) {
e635ee07 4169 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
4170 /* legacy rlc firmware loading */
4171 r = gfx_v8_0_rlc_load_microcode(adev);
4172 if (r)
4173 return r;
4174 } else {
4175 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4176 AMDGPU_UCODE_ID_RLC_G);
4177 if (r)
4178 return -EINVAL;
4179 }
aaa36a97
AD
4180 }
4181
4182 gfx_v8_0_rlc_start(adev);
4183
4184 return 0;
4185}
4186
4187static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4188{
4189 int i;
4190 u32 tmp = RREG32(mmCP_ME_CNTL);
4191
4192 if (enable) {
4193 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4194 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4195 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4196 } else {
4197 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4198 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4199 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4200 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4201 adev->gfx.gfx_ring[i].ready = false;
4202 }
4203 WREG32(mmCP_ME_CNTL, tmp);
4204 udelay(50);
4205}
4206
4207static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4208{
4209 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4210 const struct gfx_firmware_header_v1_0 *ce_hdr;
4211 const struct gfx_firmware_header_v1_0 *me_hdr;
4212 const __le32 *fw_data;
4213 unsigned i, fw_size;
4214
4215 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4216 return -EINVAL;
4217
4218 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4219 adev->gfx.pfp_fw->data;
4220 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4221 adev->gfx.ce_fw->data;
4222 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4223 adev->gfx.me_fw->data;
4224
4225 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4226 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4227 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4228
4229 gfx_v8_0_cp_gfx_enable(adev, false);
4230
4231 /* PFP */
4232 fw_data = (const __le32 *)
4233 (adev->gfx.pfp_fw->data +
4234 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4235 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4236 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4237 for (i = 0; i < fw_size; i++)
4238 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4239 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4240
4241 /* CE */
4242 fw_data = (const __le32 *)
4243 (adev->gfx.ce_fw->data +
4244 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4245 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4246 WREG32(mmCP_CE_UCODE_ADDR, 0);
4247 for (i = 0; i < fw_size; i++)
4248 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4249 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4250
4251 /* ME */
4252 fw_data = (const __le32 *)
4253 (adev->gfx.me_fw->data +
4254 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4255 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4256 WREG32(mmCP_ME_RAM_WADDR, 0);
4257 for (i = 0; i < fw_size; i++)
4258 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4259 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4260
4261 return 0;
4262}
4263
4264static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4265{
4266 u32 count = 0;
4267 const struct cs_section_def *sect = NULL;
4268 const struct cs_extent_def *ext = NULL;
4269
4270 /* begin clear state */
4271 count += 2;
4272 /* context control state */
4273 count += 3;
4274
4275 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4276 for (ext = sect->section; ext->extent != NULL; ++ext) {
4277 if (sect->id == SECT_CONTEXT)
4278 count += 2 + ext->reg_count;
4279 else
4280 return 0;
4281 }
4282 }
4283 /* pa_sc_raster_config/pa_sc_raster_config1 */
4284 count += 4;
4285 /* end clear state */
4286 count += 2;
4287 /* clear state */
4288 count += 2;
4289
4290 return count;
4291}
4292
4293static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4294{
4295 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4296 const struct cs_section_def *sect = NULL;
4297 const struct cs_extent_def *ext = NULL;
4298 int r, i;
4299
4300 /* init the CP */
4301 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4302 WREG32(mmCP_ENDIAN_SWAP, 0);
4303 WREG32(mmCP_DEVICE_ID, 1);
4304
4305 gfx_v8_0_cp_gfx_enable(adev, true);
4306
a27de35c 4307 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4308 if (r) {
4309 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4310 return r;
4311 }
4312
4313 /* clear state buffer */
4314 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4315 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4316
4317 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4318 amdgpu_ring_write(ring, 0x80000000);
4319 amdgpu_ring_write(ring, 0x80000000);
4320
4321 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4322 for (ext = sect->section; ext->extent != NULL; ++ext) {
4323 if (sect->id == SECT_CONTEXT) {
4324 amdgpu_ring_write(ring,
4325 PACKET3(PACKET3_SET_CONTEXT_REG,
4326 ext->reg_count));
4327 amdgpu_ring_write(ring,
4328 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4329 for (i = 0; i < ext->reg_count; i++)
4330 amdgpu_ring_write(ring, ext->extent[i]);
4331 }
4332 }
4333 }
4334
4335 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4336 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4337 switch (adev->asic_type) {
4338 case CHIP_TONGA:
2cc0c0b5 4339 case CHIP_POLARIS10:
aaa36a97
AD
4340 amdgpu_ring_write(ring, 0x16000012);
4341 amdgpu_ring_write(ring, 0x0000002A);
4342 break;
2cc0c0b5 4343 case CHIP_POLARIS11:
c4642a47 4344 case CHIP_POLARIS12:
68182d90
FC
4345 amdgpu_ring_write(ring, 0x16000012);
4346 amdgpu_ring_write(ring, 0x00000000);
4347 break;
fa676048
FC
4348 case CHIP_FIJI:
4349 amdgpu_ring_write(ring, 0x3a00161a);
4350 amdgpu_ring_write(ring, 0x0000002e);
4351 break;
aaa36a97
AD
4352 case CHIP_CARRIZO:
4353 amdgpu_ring_write(ring, 0x00000002);
4354 amdgpu_ring_write(ring, 0x00000000);
4355 break;
d1a7f7aa
KW
4356 case CHIP_TOPAZ:
4357 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4358 0x00000000 : 0x00000002);
4359 amdgpu_ring_write(ring, 0x00000000);
4360 break;
e3c7656c
SL
4361 case CHIP_STONEY:
4362 amdgpu_ring_write(ring, 0x00000000);
4363 amdgpu_ring_write(ring, 0x00000000);
4364 break;
aaa36a97
AD
4365 default:
4366 BUG();
4367 }
4368
4369 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4370 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4371
4372 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4373 amdgpu_ring_write(ring, 0);
4374
4375 /* init the CE partitions */
4376 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4377 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4378 amdgpu_ring_write(ring, 0x8000);
4379 amdgpu_ring_write(ring, 0x8000);
4380
a27de35c 4381 amdgpu_ring_commit(ring);
aaa36a97
AD
4382
4383 return 0;
4384}
4f339b29
RZ
4385static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4386{
4387 u32 tmp;
4388 /* no gfx doorbells on iceland */
4389 if (adev->asic_type == CHIP_TOPAZ)
4390 return;
4391
4392 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4393
4394 if (ring->use_doorbell) {
4395 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4396 DOORBELL_OFFSET, ring->doorbell_index);
4397 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4398 DOORBELL_HIT, 0);
4399 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4400 DOORBELL_EN, 1);
4401 } else {
4402 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4403 }
4404
4405 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4406
4407 if (adev->flags & AMD_IS_APU)
4408 return;
4409
4410 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4411 DOORBELL_RANGE_LOWER,
4412 AMDGPU_DOORBELL_GFX_RING0);
4413 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4414
4415 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4416 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4417}
aaa36a97
AD
4418
4419static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4420{
4421 struct amdgpu_ring *ring;
4422 u32 tmp;
4423 u32 rb_bufsz;
42e8cb50 4424 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4425 int r;
4426
4427 /* Set the write pointer delay */
4428 WREG32(mmCP_RB_WPTR_DELAY, 0);
4429
4430 /* set the RB to use vmid 0 */
4431 WREG32(mmCP_RB_VMID, 0);
4432
4433 /* Set ring buffer size */
4434 ring = &adev->gfx.gfx_ring[0];
4435 rb_bufsz = order_base_2(ring->ring_size / 8);
4436 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4437 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4438 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4439 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4440#ifdef __BIG_ENDIAN
4441 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4442#endif
4443 WREG32(mmCP_RB0_CNTL, tmp);
4444
4445 /* Initialize the ring buffer's read and write pointers */
4446 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4447 ring->wptr = 0;
536fbf94 4448 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4449
4450 /* set the wb address wether it's enabled or not */
4451 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4452 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4453 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4454
42e8cb50
FM
4455 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4456 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4457 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4458 mdelay(1);
4459 WREG32(mmCP_RB0_CNTL, tmp);
4460
4461 rb_addr = ring->gpu_addr >> 8;
4462 WREG32(mmCP_RB0_BASE, rb_addr);
4463 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4464
4f339b29 4465 gfx_v8_0_set_cpg_door_bell(adev, ring);
aaa36a97 4466 /* start the ring */
f6bd7942 4467 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4468 gfx_v8_0_cp_gfx_start(adev);
4469 ring->ready = true;
4470 r = amdgpu_ring_test_ring(ring);
5003f278 4471 if (r)
aaa36a97 4472 ring->ready = false;
aaa36a97 4473
5003f278 4474 return r;
aaa36a97
AD
4475}
4476
4477static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4478{
4479 int i;
4480
4481 if (enable) {
4482 WREG32(mmCP_MEC_CNTL, 0);
4483 } else {
4484 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4485 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4486 adev->gfx.compute_ring[i].ready = false;
fcf17a43 4487 adev->gfx.kiq.ring.ready = false;
aaa36a97
AD
4488 }
4489 udelay(50);
4490}
4491
aaa36a97
AD
4492static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4493{
4494 const struct gfx_firmware_header_v1_0 *mec_hdr;
4495 const __le32 *fw_data;
4496 unsigned i, fw_size;
4497
4498 if (!adev->gfx.mec_fw)
4499 return -EINVAL;
4500
4501 gfx_v8_0_cp_compute_enable(adev, false);
4502
4503 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4504 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4505
4506 fw_data = (const __le32 *)
4507 (adev->gfx.mec_fw->data +
4508 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4509 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4510
4511 /* MEC1 */
4512 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4513 for (i = 0; i < fw_size; i++)
4514 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4515 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4516
4517 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4518 if (adev->gfx.mec2_fw) {
4519 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4520
4521 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4522 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4523
4524 fw_data = (const __le32 *)
4525 (adev->gfx.mec2_fw->data +
4526 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4527 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4528
4529 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4530 for (i = 0; i < fw_size; i++)
4531 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4532 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4533 }
4534
4535 return 0;
4536}
4537
4e638ae9
XY
4538/* KIQ functions */
4539static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4540{
4541 uint32_t tmp;
4542 struct amdgpu_device *adev = ring->adev;
4543
4544 /* tell RLC which is KIQ queue */
4545 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4546 tmp &= 0xffffff00;
4547 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4548 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4549 tmp |= 0x80;
4550 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4551}
4552
346586d5 4553static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4e638ae9 4554{
c3a49ab5 4555 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
f776952b 4556 uint32_t scratch, tmp = 0;
de65513a 4557 uint64_t queue_mask = 0;
f776952b
AD
4558 int r, i;
4559
de65513a
AR
4560 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4561 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4562 continue;
4563
4564 /* This situation may be hit in the future if a new HW
4565 * generation exposes more than 64 queues. If so, the
4566 * definition of queue_mask needs updating */
4567 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
4568 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4569 break;
4570 }
4571
4572 queue_mask |= (1ull << i);
4573 }
4574
f776952b
AD
4575 r = amdgpu_gfx_scratch_get(adev, &scratch);
4576 if (r) {
4577 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4578 return r;
4579 }
4580 WREG32(scratch, 0xCAFEDEAD);
4e638ae9 4581
346586d5 4582 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
f776952b
AD
4583 if (r) {
4584 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4585 amdgpu_gfx_scratch_free(adev, scratch);
4586 return r;
4587 }
4e638ae9 4588 /* set resources */
346586d5
AD
4589 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4590 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
de65513a
AR
4591 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4592 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
346586d5
AD
4593 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4594 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4595 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4596 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
c3a49ab5
AD
4597 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4598 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4599 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4600 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4601
4602 /* map queues */
4603 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4604 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3d7e30b3
AD
4605 amdgpu_ring_write(kiq_ring,
4606 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4607 amdgpu_ring_write(kiq_ring,
4608 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4609 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4610 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4611 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
c3a49ab5
AD
4612 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4613 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4614 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4615 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4616 }
f776952b
AD
4617 /* write to scratch for completion */
4618 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4619 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4620 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4e638ae9 4621 amdgpu_ring_commit(kiq_ring);
f776952b
AD
4622
4623 for (i = 0; i < adev->usec_timeout; i++) {
4624 tmp = RREG32(scratch);
4625 if (tmp == 0xDEADBEEF)
4626 break;
4627 DRM_UDELAY(1);
4628 }
4629 if (i >= adev->usec_timeout) {
c3a49ab5
AD
4630 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4631 scratch, tmp);
f776952b
AD
4632 r = -EINVAL;
4633 }
4634 amdgpu_gfx_scratch_free(adev, scratch);
4635
4636 return r;
4e638ae9
XY
4637}
4638
34130fb1
AR
4639static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4640{
4641 int i, r = 0;
4642
4643 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4644 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4645 for (i = 0; i < adev->usec_timeout; i++) {
4646 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4647 break;
4648 udelay(1);
4649 }
4650 if (i == adev->usec_timeout)
4651 r = -ETIMEDOUT;
4652 }
4653 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4654 WREG32(mmCP_HQD_PQ_RPTR, 0);
4655 WREG32(mmCP_HQD_PQ_WPTR, 0);
4656
4657 return r;
4e638ae9
XY
4658}
4659
a2140e00 4660static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4e638ae9 4661{
015c2360 4662 struct amdgpu_device *adev = ring->adev;
a2140e00 4663 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4664 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4665 uint32_t tmp;
4666
4667 mqd->header = 0xC0310800;
4668 mqd->compute_pipelinestat_enable = 0x00000001;
4669 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4670 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4671 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4672 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4673 mqd->compute_misc_reserved = 0x00000003;
6b0fa871
RZ
4674 if (!(adev->flags & AMD_IS_APU)) {
4675 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4676 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4677 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4678 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4679 }
34534610 4680 eop_base_addr = ring->eop_gpu_addr >> 8;
4e638ae9
XY
4681 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4682 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4683
4684 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4685 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4686 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
268cb4c7 4687 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4e638ae9
XY
4688
4689 mqd->cp_hqd_eop_control = tmp;
4690
4691 /* enable doorbell? */
bb215962
TSD
4692 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4693 CP_HQD_PQ_DOORBELL_CONTROL,
4694 DOORBELL_EN,
4695 ring->use_doorbell ? 1 : 0);
4e638ae9
XY
4696
4697 mqd->cp_hqd_pq_doorbell_control = tmp;
4698
4e638ae9 4699 /* set the pointer to the MQD */
015c2360
AD
4700 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4701 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4702
4703 /* set MQD vmid to 0 */
4704 tmp = RREG32(mmCP_MQD_CONTROL);
4705 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4706 mqd->cp_mqd_control = tmp;
4707
4708 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4709 hqd_gpu_addr = ring->gpu_addr >> 8;
4710 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4711 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4712
4713 /* set up the HQD, this is similar to CP_RB0_CNTL */
4714 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4715 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4716 (order_base_2(ring->ring_size / 4) - 1));
4717 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4718 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4719#ifdef __BIG_ENDIAN
4720 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4721#endif
4722 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4723 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4724 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4726 mqd->cp_hqd_pq_control = tmp;
4727
4728 /* set the wb address whether it's enabled or not */
4729 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4730 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4731 mqd->cp_hqd_pq_rptr_report_addr_hi =
4732 upper_32_bits(wb_gpu_addr) & 0xffff;
4733
4734 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4735 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4736 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4737 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4738
4739 tmp = 0;
4740 /* enable the doorbell if requested */
4741 if (ring->use_doorbell) {
4742 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4743 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4744 DOORBELL_OFFSET, ring->doorbell_index);
4745
4746 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4747 DOORBELL_EN, 1);
4748 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4749 DOORBELL_SOURCE, 0);
4750 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4751 DOORBELL_HIT, 0);
4752 }
4753
4754 mqd->cp_hqd_pq_doorbell_control = tmp;
4755
4756 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4757 ring->wptr = 0;
4758 mqd->cp_hqd_pq_wptr = ring->wptr;
4759 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4760
4761 /* set the vmid for the queue */
4762 mqd->cp_hqd_vmid = 0;
4763
4764 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4765 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4766 mqd->cp_hqd_persistent_state = tmp;
4767
ed6f55d1
AD
4768 /* set MTYPE */
4769 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4770 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4771 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4772 mqd->cp_hqd_ib_control = tmp;
4773
4774 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4775 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4776 mqd->cp_hqd_iq_timer = tmp;
4777
4778 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4779 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4780 mqd->cp_hqd_ctx_save_control = tmp;
4781
97bf47b2
AR
4782 /* defaults */
4783 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4784 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4785 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4786 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4787 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4788 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4789 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4790 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4791 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4792 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4793 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4794 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4795 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4796 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4797 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4798
4e638ae9
XY
4799 /* activate the queue */
4800 mqd->cp_hqd_active = 1;
4801
4802 return 0;
4803}
4804
97bf47b2
AR
4805int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4806 struct vi_mqd *mqd)
4e638ae9 4807{
894700f3
AR
4808 uint32_t mqd_reg;
4809 uint32_t *mqd_data;
4e638ae9 4810
894700f3
AR
4811 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4812 mqd_data = &mqd->cp_mqd_base_addr_lo;
4e638ae9
XY
4813
4814 /* disable wptr polling */
0ac642c5 4815 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4e638ae9 4816
894700f3 4817 /* program all HQD registers */
ecd910eb
AR
4818 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4819 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4820
ecd910eb
AR
4821 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4822 * This is safe since EOP RPTR==WPTR for any inactive HQD
4823 * on ASICs that do not support context-save.
4824 * EOP writes/reads can start anywhere in the ring.
4825 */
4826 if (adev->asic_type != CHIP_TONGA) {
4827 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4828 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4829 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4e638ae9
XY
4830 }
4831
ecd910eb 4832 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
894700f3 4833 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4834
894700f3
AR
4835 /* activate the HQD */
4836 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4837 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4838
4e638ae9
XY
4839 return 0;
4840}
4e638ae9 4841
a2140e00 4842static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4843{
4844 struct amdgpu_device *adev = ring->adev;
a2140e00 4845 struct vi_mqd *mqd = ring->mqd_ptr;
1fb37a3d 4846 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9 4847
39300115 4848 gfx_v8_0_kiq_setting(ring);
4e638ae9 4849
a545e491 4850 if (adev->gfx.in_reset) { /* for GPU_RESET case */
1fb37a3d
ML
4851 /* reset MQD to a clean status */
4852 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4853 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4e638ae9 4854
1fb37a3d
ML
4855 /* reset ring buffer */
4856 ring->wptr = 0;
4857 amdgpu_ring_clear_ring(ring);
39300115
AD
4858 mutex_lock(&adev->srbm_mutex);
4859 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
97bf47b2 4860 gfx_v8_0_mqd_commit(adev, mqd);
39300115
AD
4861 vi_srbm_select(adev, 0, 0, 0, 0);
4862 mutex_unlock(&adev->srbm_mutex);
a545e491 4863 } else {
6b0fa871
RZ
4864 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4865 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4866 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
a545e491
AD
4867 mutex_lock(&adev->srbm_mutex);
4868 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4869 gfx_v8_0_mqd_init(ring);
97bf47b2 4870 gfx_v8_0_mqd_commit(adev, mqd);
a545e491
AD
4871 vi_srbm_select(adev, 0, 0, 0, 0);
4872 mutex_unlock(&adev->srbm_mutex);
4e638ae9 4873
a545e491 4874 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4875 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
1fb37a3d 4876 }
4e638ae9 4877
dcf75843 4878 return 0;
4e638ae9
XY
4879}
4880
39300115 4881static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4882{
4883 struct amdgpu_device *adev = ring->adev;
a2140e00 4884 struct vi_mqd *mqd = ring->mqd_ptr;
39300115 4885 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4e638ae9 4886
39300115 4887 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
6b0fa871
RZ
4888 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4889 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4890 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
1fb37a3d
ML
4891 mutex_lock(&adev->srbm_mutex);
4892 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
a2140e00 4893 gfx_v8_0_mqd_init(ring);
1fb37a3d
ML
4894 vi_srbm_select(adev, 0, 0, 0, 0);
4895 mutex_unlock(&adev->srbm_mutex);
4896
4897 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4898 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
a545e491 4899 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
1fb37a3d
ML
4900 /* reset MQD to a clean status */
4901 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4902 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
1fb37a3d
ML
4903 /* reset ring buffer */
4904 ring->wptr = 0;
4905 amdgpu_ring_clear_ring(ring);
94c9cead
RZ
4906 } else {
4907 amdgpu_ring_clear_ring(ring);
1fb37a3d 4908 }
4e638ae9
XY
4909 return 0;
4910}
4911
4f339b29
RZ
4912static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4913{
4914 if (adev->asic_type > CHIP_TONGA) {
4915 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4916 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4917 }
6a124e67
AD
4918 /* enable doorbells */
4919 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4f339b29
RZ
4920}
4921
596c67d0 4922static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
4923{
4924 struct amdgpu_ring *ring = NULL;
596c67d0 4925 int r = 0, i;
4e638ae9 4926
596c67d0 4927 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
4928
4929 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
4930
4931 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4932 if (unlikely(r != 0))
4933 goto done;
4934
4935 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4936 if (!r) {
a2140e00 4937 r = gfx_v8_0_kiq_init_queue(ring);
596c67d0 4938 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4939 ring->mqd_ptr = NULL;
4e638ae9 4940 }
6a6f380f
AD
4941 amdgpu_bo_unreserve(ring->mqd_obj);
4942 if (r)
4943 goto done;
4e638ae9 4944
4e638ae9
XY
4945 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4946 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
4947
4948 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4949 if (unlikely(r != 0))
4950 goto done;
4951 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4952 if (!r) {
39300115 4953 r = gfx_v8_0_kcq_init_queue(ring);
596c67d0 4954 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4955 ring->mqd_ptr = NULL;
596c67d0 4956 }
6a6f380f
AD
4957 amdgpu_bo_unreserve(ring->mqd_obj);
4958 if (r)
4959 goto done;
4e638ae9
XY
4960 }
4961
4f339b29 4962 gfx_v8_0_set_mec_doorbell_range(adev);
4e638ae9 4963
346586d5 4964 r = gfx_v8_0_kiq_kcq_enable(adev);
c3a49ab5
AD
4965 if (r)
4966 goto done;
aaa36a97 4967
346586d5
AD
4968 /* Test KIQ */
4969 ring = &adev->gfx.kiq.ring;
4970 ring->ready = true;
4971 r = amdgpu_ring_test_ring(ring);
4972 if (r) {
4973 ring->ready = false;
4974 goto done;
aaa36a97
AD
4975 }
4976
346586d5 4977 /* Test KCQs */
aaa36a97 4978 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
c3a49ab5 4979 ring = &adev->gfx.compute_ring[i];
aaa36a97
AD
4980 ring->ready = true;
4981 r = amdgpu_ring_test_ring(ring);
4982 if (r)
4983 ring->ready = false;
4984 }
4985
6a6f380f
AD
4986done:
4987 return r;
aaa36a97
AD
4988}
4989
4990static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4991{
4992 int r;
4993
e3c7656c 4994 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4995 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4996
e61710c5 4997 if (!adev->pp_enabled) {
e635ee07 4998 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
4999 /* legacy firmware loading */
5000 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5001 if (r)
5002 return r;
aaa36a97 5003
ba5c2a87
RZ
5004 r = gfx_v8_0_cp_compute_load_microcode(adev);
5005 if (r)
5006 return r;
5007 } else {
5008 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5009 AMDGPU_UCODE_ID_CP_CE);
5010 if (r)
5011 return -EINVAL;
5012
5013 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5014 AMDGPU_UCODE_ID_CP_PFP);
5015 if (r)
5016 return -EINVAL;
5017
5018 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5019 AMDGPU_UCODE_ID_CP_ME);
5020 if (r)
5021 return -EINVAL;
5022
951e0962
AD
5023 if (adev->asic_type == CHIP_TOPAZ) {
5024 r = gfx_v8_0_cp_compute_load_microcode(adev);
5025 if (r)
5026 return r;
5027 } else {
5028 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5029 AMDGPU_UCODE_ID_CP_MEC1);
5030 if (r)
5031 return -EINVAL;
5032 }
ba5c2a87 5033 }
aaa36a97
AD
5034 }
5035
5036 r = gfx_v8_0_cp_gfx_resume(adev);
5037 if (r)
5038 return r;
5039
b4e40676 5040 r = gfx_v8_0_kiq_resume(adev);
aaa36a97
AD
5041 if (r)
5042 return r;
5043
5044 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5045
5046 return 0;
5047}
5048
5049static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5050{
5051 gfx_v8_0_cp_gfx_enable(adev, enable);
5052 gfx_v8_0_cp_compute_enable(adev, enable);
5053}
5054
5fc3aeeb 5055static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5056{
5057 int r;
5fc3aeeb 5058 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5059
5060 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5061 gfx_v8_0_gpu_init(adev);
5062
5063 r = gfx_v8_0_rlc_resume(adev);
5064 if (r)
5065 return r;
5066
5067 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5068
5069 return r;
5070}
5071
5fc3aeeb 5072static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5073{
5fc3aeeb 5074 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075
1d22a454
AD
5076 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5077 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
84f3f05b
XY
5078 if (amdgpu_sriov_vf(adev)) {
5079 pr_debug("For SRIOV client, shouldn't do anything.\n");
5080 return 0;
5081 }
aaa36a97
AD
5082 gfx_v8_0_cp_enable(adev, false);
5083 gfx_v8_0_rlc_stop(adev);
aaa36a97 5084
62a86fc2
EH
5085 amdgpu_set_powergating_state(adev,
5086 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5087
aaa36a97
AD
5088 return 0;
5089}
5090
5fc3aeeb 5091static int gfx_v8_0_suspend(void *handle)
aaa36a97 5092{
5fc3aeeb 5093 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
b4e40676 5094 adev->gfx.in_suspend = true;
aaa36a97
AD
5095 return gfx_v8_0_hw_fini(adev);
5096}
5097
5fc3aeeb 5098static int gfx_v8_0_resume(void *handle)
aaa36a97 5099{
b4e40676 5100 int r;
5fc3aeeb 5101 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5102
b4e40676
DP
5103 r = gfx_v8_0_hw_init(adev);
5104 adev->gfx.in_suspend = false;
5105 return r;
aaa36a97
AD
5106}
5107
5fc3aeeb 5108static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5109{
5fc3aeeb 5110 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111
aaa36a97
AD
5112 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5113 return false;
5114 else
5115 return true;
5116}
5117
5fc3aeeb 5118static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5119{
5120 unsigned i;
5fc3aeeb 5121 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5122
5123 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5124 if (gfx_v8_0_is_idle(handle))
aaa36a97 5125 return 0;
5003f278 5126
aaa36a97
AD
5127 udelay(1);
5128 }
5129 return -ETIMEDOUT;
5130}
5131
da146d3b 5132static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5133{
3d7c6384 5134 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5135 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5136 u32 tmp;
5137
5138 /* GRBM_STATUS */
5139 tmp = RREG32(mmGRBM_STATUS);
5140 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5141 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5142 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5143 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5144 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5145 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5146 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5149 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5150 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5151 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5152 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5153 }
5154
5155 /* GRBM_STATUS2 */
5156 tmp = RREG32(mmGRBM_STATUS2);
5157 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5158 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5159 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5160
3d7c6384
CZ
5161 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5162 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5163 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5165 SOFT_RESET_CPF, 1);
5166 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5167 SOFT_RESET_CPC, 1);
5168 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5169 SOFT_RESET_CPG, 1);
5170 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5171 SOFT_RESET_GRBM, 1);
5172 }
5173
aaa36a97
AD
5174 /* SRBM_STATUS */
5175 tmp = RREG32(mmSRBM_STATUS);
5176 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5177 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5178 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5179 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5180 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5181 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5182
5183 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5184 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5185 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5186 return true;
3d7c6384 5187 } else {
3d7c6384
CZ
5188 adev->gfx.grbm_soft_reset = 0;
5189 adev->gfx.srbm_soft_reset = 0;
da146d3b 5190 return false;
3d7c6384 5191 }
3d7c6384 5192}
aaa36a97 5193
1057f20c
CZ
5194static int gfx_v8_0_pre_soft_reset(void *handle)
5195{
5196 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5197 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5198
da146d3b
AD
5199 if ((!adev->gfx.grbm_soft_reset) &&
5200 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5201 return 0;
5202
5203 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5204 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5205
5206 /* stop the rlc */
5207 gfx_v8_0_rlc_stop(adev);
5208
5209 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5210 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5211 /* Disable GFX parsing/prefetching */
5212 gfx_v8_0_cp_gfx_enable(adev, false);
5213
1057f20c
CZ
5214 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5215 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5216 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5217 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5218 int i;
5219
5220 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5221 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5222
a99f249d
AD
5223 mutex_lock(&adev->srbm_mutex);
5224 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5225 gfx_v8_0_deactivate_hqd(adev, 2);
5226 vi_srbm_select(adev, 0, 0, 0, 0);
5227 mutex_unlock(&adev->srbm_mutex);
1057f20c 5228 }
aaa36a97 5229 /* Disable MEC parsing/prefetching */
7776a693 5230 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5231 }
7776a693 5232
1057f20c
CZ
5233 return 0;
5234}
7776a693 5235
3d7c6384
CZ
5236static int gfx_v8_0_soft_reset(void *handle)
5237{
5238 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5240 u32 tmp;
aaa36a97 5241
da146d3b
AD
5242 if ((!adev->gfx.grbm_soft_reset) &&
5243 (!adev->gfx.srbm_soft_reset))
3d7c6384 5244 return 0;
aaa36a97 5245
3d7c6384
CZ
5246 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5247 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5248
3d7c6384
CZ
5249 if (grbm_soft_reset || srbm_soft_reset) {
5250 tmp = RREG32(mmGMCON_DEBUG);
5251 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5252 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5253 WREG32(mmGMCON_DEBUG, tmp);
5254 udelay(50);
5255 }
aaa36a97 5256
3d7c6384
CZ
5257 if (grbm_soft_reset) {
5258 tmp = RREG32(mmGRBM_SOFT_RESET);
5259 tmp |= grbm_soft_reset;
5260 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5261 WREG32(mmGRBM_SOFT_RESET, tmp);
5262 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5263
3d7c6384 5264 udelay(50);
aaa36a97 5265
3d7c6384
CZ
5266 tmp &= ~grbm_soft_reset;
5267 WREG32(mmGRBM_SOFT_RESET, tmp);
5268 tmp = RREG32(mmGRBM_SOFT_RESET);
5269 }
7776a693 5270
3d7c6384
CZ
5271 if (srbm_soft_reset) {
5272 tmp = RREG32(mmSRBM_SOFT_RESET);
5273 tmp |= srbm_soft_reset;
5274 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5275 WREG32(mmSRBM_SOFT_RESET, tmp);
5276 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5277
aaa36a97 5278 udelay(50);
7776a693 5279
3d7c6384
CZ
5280 tmp &= ~srbm_soft_reset;
5281 WREG32(mmSRBM_SOFT_RESET, tmp);
5282 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5283 }
7776a693 5284
3d7c6384
CZ
5285 if (grbm_soft_reset || srbm_soft_reset) {
5286 tmp = RREG32(mmGMCON_DEBUG);
5287 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5288 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5289 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5290 }
3d7c6384
CZ
5291
5292 /* Wait a little for things to settle down */
5293 udelay(50);
5294
aaa36a97
AD
5295 return 0;
5296}
5297
e4ae0fc3
CZ
5298static int gfx_v8_0_post_soft_reset(void *handle)
5299{
5300 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5301 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5302
da146d3b
AD
5303 if ((!adev->gfx.grbm_soft_reset) &&
5304 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5305 return 0;
5306
5307 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5308 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5309
5310 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5311 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5312 gfx_v8_0_cp_gfx_resume(adev);
5313
5314 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5315 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5316 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5317 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5318 int i;
5319
5320 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5321 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5322
a99f249d
AD
5323 mutex_lock(&adev->srbm_mutex);
5324 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5325 gfx_v8_0_deactivate_hqd(adev, 2);
5326 vi_srbm_select(adev, 0, 0, 0, 0);
5327 mutex_unlock(&adev->srbm_mutex);
e4ae0fc3 5328 }
b4e40676 5329 gfx_v8_0_kiq_resume(adev);
e4ae0fc3
CZ
5330 }
5331 gfx_v8_0_rlc_start(adev);
5332
aaa36a97
AD
5333 return 0;
5334}
5335
5336/**
5337 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5338 *
5339 * @adev: amdgpu_device pointer
5340 *
5341 * Fetches a GPU clock counter snapshot.
5342 * Returns the 64 bit clock counter snapshot.
5343 */
b95e31fd 5344static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5345{
5346 uint64_t clock;
5347
5348 mutex_lock(&adev->gfx.gpu_clock_mutex);
5349 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5350 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5351 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5352 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5353 return clock;
5354}
5355
5356static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5357 uint32_t vmid,
5358 uint32_t gds_base, uint32_t gds_size,
5359 uint32_t gws_base, uint32_t gws_size,
5360 uint32_t oa_base, uint32_t oa_size)
5361{
5362 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5363 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5364
5365 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5366 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5367
5368 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5369 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5370
5371 /* GDS Base */
5372 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5373 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5374 WRITE_DATA_DST_SEL(0)));
5375 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5376 amdgpu_ring_write(ring, 0);
5377 amdgpu_ring_write(ring, gds_base);
5378
5379 /* GDS Size */
5380 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5381 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5382 WRITE_DATA_DST_SEL(0)));
5383 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5384 amdgpu_ring_write(ring, 0);
5385 amdgpu_ring_write(ring, gds_size);
5386
5387 /* GWS */
5388 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5389 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5390 WRITE_DATA_DST_SEL(0)));
5391 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5392 amdgpu_ring_write(ring, 0);
5393 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5394
5395 /* OA */
5396 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5397 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5398 WRITE_DATA_DST_SEL(0)));
5399 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5400 amdgpu_ring_write(ring, 0);
5401 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5402}
5403
472259f0
TSD
5404static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5405{
bc24fbe9
TSD
5406 WREG32(mmSQ_IND_INDEX,
5407 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5408 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5409 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5410 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5411 return RREG32(mmSQ_IND_DATA);
5412}
5413
c5a60ce8
TSD
5414static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5415 uint32_t wave, uint32_t thread,
5416 uint32_t regno, uint32_t num, uint32_t *out)
5417{
5418 WREG32(mmSQ_IND_INDEX,
5419 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5420 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5421 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5422 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5423 (SQ_IND_INDEX__FORCE_READ_MASK) |
5424 (SQ_IND_INDEX__AUTO_INCR_MASK));
5425 while (num--)
5426 *(out++) = RREG32(mmSQ_IND_DATA);
5427}
5428
472259f0
TSD
5429static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5430{
5431 /* type 0 wave data */
5432 dst[(*no_fields)++] = 0;
5433 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5434 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5435 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5436 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5437 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5438 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5439 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5440 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5441 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5442 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5449 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5450 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5451}
5452
c5a60ce8
TSD
5453static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5454 uint32_t wave, uint32_t start,
5455 uint32_t size, uint32_t *dst)
5456{
5457 wave_read_regs(
5458 adev, simd, wave, 0,
5459 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5460}
5461
472259f0 5462
b95e31fd
AD
5463static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5464 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5465 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5466 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5467 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
b95e31fd
AD
5468};
5469
5fc3aeeb 5470static int gfx_v8_0_early_init(void *handle)
aaa36a97 5471{
5fc3aeeb 5472 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5473
5474 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
78c16834 5475 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
b95e31fd 5476 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5477 gfx_v8_0_set_ring_funcs(adev);
5478 gfx_v8_0_set_irq_funcs(adev);
5479 gfx_v8_0_set_gds_init(adev);
dbff57bc 5480 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5481
5482 return 0;
5483}
5484
ccba7691
AD
5485static int gfx_v8_0_late_init(void *handle)
5486{
5487 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5488 int r;
5489
1d22a454
AD
5490 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5491 if (r)
5492 return r;
5493
5494 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5495 if (r)
5496 return r;
5497
ccba7691
AD
5498 /* requires IBs so do in late init after IB pool is initialized */
5499 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5500 if (r)
5501 return r;
5502
62a86fc2
EH
5503 amdgpu_set_powergating_state(adev,
5504 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5505
ccba7691
AD
5506 return 0;
5507}
5508
c2546f55
AD
5509static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5510 bool enable)
62a86fc2 5511{
c4642a47
JZ
5512 if ((adev->asic_type == CHIP_POLARIS11) ||
5513 (adev->asic_type == CHIP_POLARIS12))
c2546f55
AD
5514 /* Send msg to SMU via Powerplay */
5515 amdgpu_set_powergating_state(adev,
5516 AMD_IP_BLOCK_TYPE_SMC,
5517 enable ?
5518 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5519
61cb8cef 5520 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5521}
5522
c2546f55
AD
5523static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5524 bool enable)
62a86fc2 5525{
61cb8cef 5526 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5527}
5528
2cc0c0b5 5529static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5530 bool enable)
5531{
61cb8cef 5532 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5533}
5534
2c547165
AD
5535static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5536 bool enable)
5537{
61cb8cef 5538 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5539}
5540
5541static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5542 bool enable)
5543{
61cb8cef 5544 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5545
5546 /* Read any GFX register to wake up GFX. */
5547 if (!enable)
61cb8cef 5548 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5549}
5550
5551static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5552 bool enable)
5553{
5554 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5555 cz_enable_gfx_cg_power_gating(adev, true);
5556 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5557 cz_enable_gfx_pipeline_power_gating(adev, true);
5558 } else {
5559 cz_enable_gfx_cg_power_gating(adev, false);
5560 cz_enable_gfx_pipeline_power_gating(adev, false);
5561 }
5562}
5563
5fc3aeeb 5564static int gfx_v8_0_set_powergating_state(void *handle,
5565 enum amd_powergating_state state)
aaa36a97 5566{
62a86fc2 5567 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5568 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5569
ce137c04
ML
5570 if (amdgpu_sriov_vf(adev))
5571 return 0;
5572
62a86fc2 5573 switch (adev->asic_type) {
2c547165
AD
5574 case CHIP_CARRIZO:
5575 case CHIP_STONEY:
ad1830d5 5576
5c964221
RZ
5577 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5578 cz_enable_sck_slow_down_on_power_up(adev, true);
5579 cz_enable_sck_slow_down_on_power_down(adev, true);
5580 } else {
5581 cz_enable_sck_slow_down_on_power_up(adev, false);
5582 cz_enable_sck_slow_down_on_power_down(adev, false);
5583 }
5584 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5585 cz_enable_cp_power_gating(adev, true);
5586 else
5587 cz_enable_cp_power_gating(adev, false);
5588
ad1830d5 5589 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5590
5591 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5593 else
5594 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5595
5596 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5597 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5598 else
5599 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5600 break;
2cc0c0b5 5601 case CHIP_POLARIS11:
c4642a47 5602 case CHIP_POLARIS12:
7ba0eb6d
AD
5603 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5605 else
5606 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5607
5608 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5610 else
5611 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5612
5613 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5614 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5615 else
7ba0eb6d 5616 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5617 break;
5618 default:
5619 break;
5620 }
5621
aaa36a97
AD
5622 return 0;
5623}
5624
ebd843d6
HR
5625static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5626{
5627 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5628 int data;
5629
ce137c04
ML
5630 if (amdgpu_sriov_vf(adev))
5631 *flags = 0;
5632
ebd843d6
HR
5633 /* AMD_CG_SUPPORT_GFX_MGCG */
5634 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5635 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5636 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5637
5638 /* AMD_CG_SUPPORT_GFX_CGLG */
5639 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5640 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5641 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5642
5643 /* AMD_CG_SUPPORT_GFX_CGLS */
5644 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5645 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5646
5647 /* AMD_CG_SUPPORT_GFX_CGTS */
5648 data = RREG32(mmCGTS_SM_CTRL_REG);
5649 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5650 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5651
5652 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5653 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5654 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5655
5656 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5657 data = RREG32(mmRLC_MEM_SLP_CNTL);
5658 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5659 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5660
5661 /* AMD_CG_SUPPORT_GFX_CP_LS */
5662 data = RREG32(mmCP_MEM_SLP_CNTL);
5663 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5664 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5665}
5666
79deaaf4 5667static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5668 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5669{
5670 uint32_t data;
5671
9559ef5b 5672 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5673
5674 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5675 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5676
5677 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5678 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5679 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5680 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5681 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5682 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5683 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5684 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5685 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5686 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5687 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5688 else
5689 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5690 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5691 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5692 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5693 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5694 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5695 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5696 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5697 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5698 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5699 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5700 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5701 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5702 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5703 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5704
5705 WREG32(mmRLC_SERDES_WR_CTRL, data);
5706}
5707
dbff57bc
AD
5708#define MSG_ENTER_RLC_SAFE_MODE 1
5709#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
5710#define RLC_GPR_REG2__REQ_MASK 0x00000001
5711#define RLC_GPR_REG2__REQ__SHIFT 0
5712#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5713#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 5714
dbff57bc
AD
5715static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5716{
5717 u32 data;
5718 unsigned i;
5719
5720 data = RREG32(mmRLC_CNTL);
5721 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5722 return;
5723
5724 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5725 data |= RLC_SAFE_MODE__CMD_MASK;
5726 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5727 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5728 WREG32(mmRLC_SAFE_MODE, data);
5729
5730 for (i = 0; i < adev->usec_timeout; i++) {
5731 if ((RREG32(mmRLC_GPM_STAT) &
5732 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5733 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5734 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5735 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5736 break;
5737 udelay(1);
5738 }
5739
5740 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5741 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5742 break;
5743 udelay(1);
5744 }
5745 adev->gfx.rlc.in_safe_mode = true;
5746 }
5747}
5748
5749static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5750{
5751 u32 data = 0;
5752 unsigned i;
5753
5754 data = RREG32(mmRLC_CNTL);
5755 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5756 return;
5757
5758 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5759 if (adev->gfx.rlc.in_safe_mode) {
5760 data |= RLC_SAFE_MODE__CMD_MASK;
5761 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5762 WREG32(mmRLC_SAFE_MODE, data);
5763 adev->gfx.rlc.in_safe_mode = false;
5764 }
5765 }
5766
5767 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5768 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5769 break;
5770 udelay(1);
5771 }
5772}
5773
dbff57bc
AD
5774static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5775 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5776 .exit_safe_mode = iceland_exit_rlc_safe_mode
5777};
5778
dbff57bc
AD
5779static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5780 bool enable)
6e378858
EH
5781{
5782 uint32_t temp, data;
5783
dbff57bc
AD
5784 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5785
6e378858 5786 /* It is disabled by HW by default */
14698b6c
AD
5787 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5788 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 5789 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 5790 /* 1 - RLC memory Light sleep */
61cb8cef 5791 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 5792
61cb8cef
TSD
5793 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5794 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 5795 }
6e378858
EH
5796
5797 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5798 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
5799 if (adev->flags & AMD_IS_APU)
5800 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5801 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5802 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5803 else
5804 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5805 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5806 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5807 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
5808
5809 if (temp != data)
5810 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5811
5812 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5813 gfx_v8_0_wait_for_rlc_serdes(adev);
5814
5815 /* 5 - clear mgcg override */
79deaaf4 5816 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 5817
14698b6c
AD
5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5819 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5820 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5821 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5822 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5823 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5824 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5825 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5826 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5827 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5828 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5829 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5830 if (temp != data)
5831 WREG32(mmCGTS_SM_CTRL_REG, data);
5832 }
6e378858
EH
5833 udelay(50);
5834
5835 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5836 gfx_v8_0_wait_for_rlc_serdes(adev);
5837 } else {
5838 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5839 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5840 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5841 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5842 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5843 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5844 if (temp != data)
5845 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5846
5847 /* 2 - disable MGLS in RLC */
5848 data = RREG32(mmRLC_MEM_SLP_CNTL);
5849 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5850 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5851 WREG32(mmRLC_MEM_SLP_CNTL, data);
5852 }
5853
5854 /* 3 - disable MGLS in CP */
5855 data = RREG32(mmCP_MEM_SLP_CNTL);
5856 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5857 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5858 WREG32(mmCP_MEM_SLP_CNTL, data);
5859 }
5860
5861 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5862 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5863 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5864 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5865 if (temp != data)
5866 WREG32(mmCGTS_SM_CTRL_REG, data);
5867
5868 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5869 gfx_v8_0_wait_for_rlc_serdes(adev);
5870
5871 /* 6 - set mgcg override */
79deaaf4 5872 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5873
5874 udelay(50);
5875
5876 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5877 gfx_v8_0_wait_for_rlc_serdes(adev);
5878 }
dbff57bc
AD
5879
5880 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
5881}
5882
dbff57bc
AD
5883static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5884 bool enable)
6e378858
EH
5885{
5886 uint32_t temp, temp1, data, data1;
5887
5888 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5889
dbff57bc
AD
5890 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5891
14698b6c 5892 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
5893 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5894 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5895 if (temp1 != data1)
5896 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5897
dd31ae9a 5898 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
5899 gfx_v8_0_wait_for_rlc_serdes(adev);
5900
dd31ae9a 5901 /* 2 - clear cgcg override */
79deaaf4 5902 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
5903
5904 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5905 gfx_v8_0_wait_for_rlc_serdes(adev);
5906
dd31ae9a 5907 /* 3 - write cmd to set CGLS */
79deaaf4 5908 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 5909
dd31ae9a 5910 /* 4 - enable cgcg */
6e378858
EH
5911 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5912
14698b6c
AD
5913 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5914 /* enable cgls*/
5915 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 5916
14698b6c
AD
5917 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5918 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 5919
14698b6c
AD
5920 if (temp1 != data1)
5921 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5922 } else {
5923 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5924 }
6e378858
EH
5925
5926 if (temp != data)
5927 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
5928
5929 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5930 * Cmp_busy/GFX_Idle interrupts
5931 */
5932 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
5933 } else {
5934 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5935 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5936
5937 /* TEST CGCG */
5938 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5940 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5941 if (temp1 != data1)
5942 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5943
5944 /* read gfx register to wake up cgcg */
5945 RREG32(mmCB_CGTT_SCLK_CTRL);
5946 RREG32(mmCB_CGTT_SCLK_CTRL);
5947 RREG32(mmCB_CGTT_SCLK_CTRL);
5948 RREG32(mmCB_CGTT_SCLK_CTRL);
5949
5950 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5951 gfx_v8_0_wait_for_rlc_serdes(adev);
5952
5953 /* write cmd to Set CGCG Overrride */
79deaaf4 5954 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5955
5956 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5957 gfx_v8_0_wait_for_rlc_serdes(adev);
5958
5959 /* write cmd to Clear CGLS */
79deaaf4 5960 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
5961
5962 /* disable cgcg, cgls should be disabled too. */
5963 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 5964 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
5965 if (temp != data)
5966 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
d5dc36a4
AD
5967 /* enable interrupts again for PG */
5968 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858 5969 }
dbff57bc 5970
7894745a
TSD
5971 gfx_v8_0_wait_for_rlc_serdes(adev);
5972
dbff57bc 5973 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 5974}
dbff57bc
AD
5975static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5976 bool enable)
6e378858
EH
5977{
5978 if (enable) {
5979 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5980 * === MGCG + MGLS + TS(CG/LS) ===
5981 */
dbff57bc
AD
5982 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5983 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
5984 } else {
5985 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5986 * === CGCG + CGLS ===
5987 */
dbff57bc
AD
5988 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5989 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
5990 }
5991 return 0;
5992}
5993
a8ca3413
RZ
5994static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5995 enum amd_clockgating_state state)
5996{
8a19e7fa
RZ
5997 uint32_t msg_id, pp_state = 0;
5998 uint32_t pp_support_state = 0;
a8ca3413
RZ
5999 void *pp_handle = adev->powerplay.pp_handle;
6000
8a19e7fa
RZ
6001 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6002 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6003 pp_support_state = PP_STATE_SUPPORT_LS;
6004 pp_state = PP_STATE_LS;
6005 }
6006 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6007 pp_support_state |= PP_STATE_SUPPORT_CG;
6008 pp_state |= PP_STATE_CG;
6009 }
6010 if (state == AMD_CG_STATE_UNGATE)
6011 pp_state = 0;
6012
6013 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6014 PP_BLOCK_GFX_CG,
6015 pp_support_state,
6016 pp_state);
6017 amd_set_clockgating_by_smu(pp_handle, msg_id);
6018 }
a8ca3413 6019
8a19e7fa
RZ
6020 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6021 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6022 pp_support_state = PP_STATE_SUPPORT_LS;
6023 pp_state = PP_STATE_LS;
6024 }
a8ca3413 6025
8a19e7fa
RZ
6026 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6027 pp_support_state |= PP_STATE_SUPPORT_CG;
6028 pp_state |= PP_STATE_CG;
6029 }
6030
6031 if (state == AMD_CG_STATE_UNGATE)
6032 pp_state = 0;
6033
6034 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6035 PP_BLOCK_GFX_MG,
6036 pp_support_state,
6037 pp_state);
6038 amd_set_clockgating_by_smu(pp_handle, msg_id);
6039 }
a8ca3413
RZ
6040
6041 return 0;
6042}
6043
6044static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6045 enum amd_clockgating_state state)
6046{
8a19e7fa
RZ
6047
6048 uint32_t msg_id, pp_state = 0;
6049 uint32_t pp_support_state = 0;
a8ca3413
RZ
6050 void *pp_handle = adev->powerplay.pp_handle;
6051
8a19e7fa
RZ
6052 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6053 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6054 pp_support_state = PP_STATE_SUPPORT_LS;
6055 pp_state = PP_STATE_LS;
6056 }
6057 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6058 pp_support_state |= PP_STATE_SUPPORT_CG;
6059 pp_state |= PP_STATE_CG;
6060 }
6061 if (state == AMD_CG_STATE_UNGATE)
6062 pp_state = 0;
6063
6064 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6065 PP_BLOCK_GFX_CG,
6066 pp_support_state,
6067 pp_state);
6068 amd_set_clockgating_by_smu(pp_handle, msg_id);
6069 }
a8ca3413 6070
8a19e7fa
RZ
6071 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6072 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6073 pp_support_state = PP_STATE_SUPPORT_LS;
6074 pp_state = PP_STATE_LS;
6075 }
6076 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6077 pp_support_state |= PP_STATE_SUPPORT_CG;
6078 pp_state |= PP_STATE_CG;
6079 }
6080 if (state == AMD_CG_STATE_UNGATE)
6081 pp_state = 0;
6082
6083 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6084 PP_BLOCK_GFX_3D,
6085 pp_support_state,
6086 pp_state);
6087 amd_set_clockgating_by_smu(pp_handle, msg_id);
6088 }
a8ca3413 6089
8a19e7fa
RZ
6090 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6091 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6092 pp_support_state = PP_STATE_SUPPORT_LS;
6093 pp_state = PP_STATE_LS;
6094 }
a8ca3413 6095
8a19e7fa
RZ
6096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6097 pp_support_state |= PP_STATE_SUPPORT_CG;
6098 pp_state |= PP_STATE_CG;
6099 }
a8ca3413 6100
8a19e7fa
RZ
6101 if (state == AMD_CG_STATE_UNGATE)
6102 pp_state = 0;
a8ca3413 6103
8a19e7fa
RZ
6104 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6105 PP_BLOCK_GFX_MG,
6106 pp_support_state,
6107 pp_state);
6108 amd_set_clockgating_by_smu(pp_handle, msg_id);
6109 }
6110
6111 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6112 pp_support_state = PP_STATE_SUPPORT_LS;
6113
6114 if (state == AMD_CG_STATE_UNGATE)
6115 pp_state = 0;
6116 else
6117 pp_state = PP_STATE_LS;
6118
6119 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6120 PP_BLOCK_GFX_RLC,
6121 pp_support_state,
6122 pp_state);
6123 amd_set_clockgating_by_smu(pp_handle, msg_id);
6124 }
6125
6126 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6127 pp_support_state = PP_STATE_SUPPORT_LS;
6128
6129 if (state == AMD_CG_STATE_UNGATE)
6130 pp_state = 0;
6131 else
6132 pp_state = PP_STATE_LS;
6133 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6134 PP_BLOCK_GFX_CP,
8a19e7fa 6135 pp_support_state,
a8ca3413 6136 pp_state);
8a19e7fa
RZ
6137 amd_set_clockgating_by_smu(pp_handle, msg_id);
6138 }
a8ca3413
RZ
6139
6140 return 0;
6141}
6142
5fc3aeeb 6143static int gfx_v8_0_set_clockgating_state(void *handle,
6144 enum amd_clockgating_state state)
aaa36a97 6145{
6e378858
EH
6146 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6147
ce137c04
ML
6148 if (amdgpu_sriov_vf(adev))
6149 return 0;
6150
6e378858
EH
6151 switch (adev->asic_type) {
6152 case CHIP_FIJI:
dbff57bc
AD
6153 case CHIP_CARRIZO:
6154 case CHIP_STONEY:
6155 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6156 state == AMD_CG_STATE_GATE);
6e378858 6157 break;
a8ca3413
RZ
6158 case CHIP_TONGA:
6159 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6160 break;
6161 case CHIP_POLARIS10:
6162 case CHIP_POLARIS11:
739e9fff 6163 case CHIP_POLARIS12:
a8ca3413
RZ
6164 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6165 break;
6e378858
EH
6166 default:
6167 break;
6168 }
aaa36a97
AD
6169 return 0;
6170}
6171
536fbf94 6172static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6173{
5003f278 6174 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6175}
6176
536fbf94 6177static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6178{
6179 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6180
6181 if (ring->use_doorbell)
6182 /* XXX check if swapping is necessary on BE */
5003f278 6183 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6184 else
5003f278 6185 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6186}
6187
6188static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6189{
6190 struct amdgpu_device *adev = ring->adev;
6191
6192 if (ring->use_doorbell) {
6193 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6194 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6195 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6196 } else {
536fbf94 6197 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6198 (void)RREG32(mmCP_RB0_WPTR);
6199 }
6200}
6201
d2edb07b 6202static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6203{
6204 u32 ref_and_mask, reg_mem_engine;
6205
4e638ae9
XY
6206 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6207 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6208 switch (ring->me) {
6209 case 1:
6210 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6211 break;
6212 case 2:
6213 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6214 break;
6215 default:
6216 return;
6217 }
6218 reg_mem_engine = 0;
6219 } else {
6220 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6221 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6222 }
6223
6224 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6225 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6226 WAIT_REG_MEM_FUNCTION(3) | /* == */
6227 reg_mem_engine));
6228 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6229 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6230 amdgpu_ring_write(ring, ref_and_mask);
6231 amdgpu_ring_write(ring, ref_and_mask);
6232 amdgpu_ring_write(ring, 0x20); /* poll interval */
6233}
6234
45682886
ML
6235static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6236{
6237 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6238 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6239 EVENT_INDEX(4));
6240
6241 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6242 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6243 EVENT_INDEX(0));
6244}
6245
6246
d35db561
CZ
6247static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6248{
6249 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6250 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6251 WRITE_DATA_DST_SEL(0) |
6252 WR_CONFIRM));
6253 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6254 amdgpu_ring_write(ring, 0);
6255 amdgpu_ring_write(ring, 1);
6256
6257}
6258
93323131 6259static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
6260 struct amdgpu_ib *ib,
6261 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
6262{
6263 u32 header, control = 0;
aaa36a97 6264
de807f81 6265 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6266 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6267 else
6268 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6269
d88bf583 6270 control |= ib->length_dw | (vm_id << 24);
aaa36a97 6271
635e7132 6272 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
2e2e3c7f
ML
6273 control |= INDIRECT_BUFFER_PRE_ENB(1);
6274
635e7132
ML
6275 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6276 gfx_v8_0_ring_emit_de_meta(ring);
6277 }
6278
aaa36a97
AD
6279 amdgpu_ring_write(ring, header);
6280 amdgpu_ring_write(ring,
6281#ifdef __BIG_ENDIAN
6282 (2 << 0) |
6283#endif
6284 (ib->gpu_addr & 0xFFFFFFFC));
6285 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6286 amdgpu_ring_write(ring, control);
6287}
6288
93323131 6289static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
6290 struct amdgpu_ib *ib,
6291 unsigned vm_id, bool ctx_switch)
93323131 6292{
33b7ed01 6293 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 6294
33b7ed01 6295 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6296 amdgpu_ring_write(ring,
6297#ifdef __BIG_ENDIAN
62d2ce4b 6298 (2 << 0) |
93323131 6299#endif
62d2ce4b 6300 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6301 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6302 amdgpu_ring_write(ring, control);
6303}
6304
aaa36a97 6305static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6306 u64 seq, unsigned flags)
aaa36a97 6307{
890ee23f
CZ
6308 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6309 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6310
aaa36a97
AD
6311 /* EVENT_WRITE_EOP - flush caches, send int */
6312 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6313 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6314 EOP_TC_ACTION_EN |
f84e63f2 6315 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6316 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6317 EVENT_INDEX(5)));
6318 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6319 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6320 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6321 amdgpu_ring_write(ring, lower_32_bits(seq));
6322 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6323
aaa36a97
AD
6324}
6325
b8c7b39e 6326static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6327{
21cd942e 6328 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6329 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6330 uint64_t addr = ring->fence_drv.gpu_addr;
6331
6332 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6333 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6334 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6335 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6336 amdgpu_ring_write(ring, addr & 0xfffffffc);
6337 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6338 amdgpu_ring_write(ring, seq);
6339 amdgpu_ring_write(ring, 0xffffffff);
6340 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6341}
6342
6343static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6344 unsigned vm_id, uint64_t pd_addr)
6345{
21cd942e 6346 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6347
aaa36a97
AD
6348 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6349 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6350 WRITE_DATA_DST_SEL(0)) |
6351 WR_CONFIRM);
aaa36a97
AD
6352 if (vm_id < 8) {
6353 amdgpu_ring_write(ring,
6354 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6355 } else {
6356 amdgpu_ring_write(ring,
6357 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6358 }
6359 amdgpu_ring_write(ring, 0);
6360 amdgpu_ring_write(ring, pd_addr >> 12);
6361
aaa36a97
AD
6362 /* bits 0-15 are the VM contexts0-15 */
6363 /* invalidate the cache */
6364 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6365 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6366 WRITE_DATA_DST_SEL(0)));
6367 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6368 amdgpu_ring_write(ring, 0);
6369 amdgpu_ring_write(ring, 1 << vm_id);
6370
6371 /* wait for the invalidate to complete */
6372 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6373 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6374 WAIT_REG_MEM_FUNCTION(0) | /* always */
6375 WAIT_REG_MEM_ENGINE(0))); /* me */
6376 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6377 amdgpu_ring_write(ring, 0);
6378 amdgpu_ring_write(ring, 0); /* ref */
6379 amdgpu_ring_write(ring, 0); /* mask */
6380 amdgpu_ring_write(ring, 0x20); /* poll interval */
6381
6382 /* compute doesn't have PFP */
6383 if (usepfp) {
6384 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6385 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6386 amdgpu_ring_write(ring, 0x0);
aaa36a97
AD
6387 }
6388}
6389
536fbf94 6390static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6391{
6392 return ring->adev->wb.wb[ring->wptr_offs];
6393}
6394
6395static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6396{
6397 struct amdgpu_device *adev = ring->adev;
6398
6399 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6400 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6401 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6402}
6403
6404static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6405 u64 addr, u64 seq,
890ee23f 6406 unsigned flags)
aaa36a97 6407{
890ee23f
CZ
6408 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6409 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6410
aaa36a97
AD
6411 /* RELEASE_MEM - flush caches, send int */
6412 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6413 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6414 EOP_TC_ACTION_EN |
a3d5aaa8 6415 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6416 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6417 EVENT_INDEX(5)));
890ee23f 6418 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6419 amdgpu_ring_write(ring, addr & 0xfffffffc);
6420 amdgpu_ring_write(ring, upper_32_bits(addr));
6421 amdgpu_ring_write(ring, lower_32_bits(seq));
6422 amdgpu_ring_write(ring, upper_32_bits(seq));
6423}
6424
4e638ae9
XY
6425static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6426 u64 seq, unsigned int flags)
6427{
6428 /* we only allocate 32bit for each seq wb address */
f10b478d 6429 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6430
6431 /* write fence seq to the "addr" */
6432 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6433 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6434 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6435 amdgpu_ring_write(ring, lower_32_bits(addr));
6436 amdgpu_ring_write(ring, upper_32_bits(addr));
6437 amdgpu_ring_write(ring, lower_32_bits(seq));
6438
6439 if (flags & AMDGPU_FENCE_FLAG_INT) {
6440 /* set register to trigger INT */
6441 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6443 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6444 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6445 amdgpu_ring_write(ring, 0);
6446 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6447 }
6448}
6449
c2167a65
ML
6450static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6451{
6452 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6453 amdgpu_ring_write(ring, 0);
6454}
6455
753ad49c
ML
6456static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6457{
6458 uint32_t dw2 = 0;
6459
c2ce92fc 6460 if (amdgpu_sriov_vf(ring->adev))
95243543 6461 gfx_v8_0_ring_emit_ce_meta(ring);
c2ce92fc 6462
753ad49c
ML
6463 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6464 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6465 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6466 /* set load_global_config & load_global_uconfig */
6467 dw2 |= 0x8001;
6468 /* set load_cs_sh_regs */
6469 dw2 |= 0x01000000;
6470 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6471 dw2 |= 0x10002;
6472
6473 /* set load_ce_ram if preamble presented */
6474 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6475 dw2 |= 0x10000000;
6476 } else {
6477 /* still load_ce_ram if this is the first time preamble presented
6478 * although there is no context switch happens.
6479 */
6480 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6481 dw2 |= 0x10000000;
6482 }
6483
6484 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6485 amdgpu_ring_write(ring, dw2);
6486 amdgpu_ring_write(ring, 0);
6487}
6488
806ba2d4
ML
6489static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6490{
6491 unsigned ret;
6492
6493 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6494 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6495 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6496 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6497 ret = ring->wptr & ring->buf_mask;
6498 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6499 return ret;
6500}
6501
6502static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6503{
6504 unsigned cur;
6505
6506 BUG_ON(offset > ring->buf_mask);
6507 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6508
6509 cur = (ring->wptr & ring->buf_mask) - 1;
6510 if (likely(cur > offset))
6511 ring->ring[offset] = cur - offset;
6512 else
6513 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6514}
6515
880e87e3
XY
6516static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6517{
6518 struct amdgpu_device *adev = ring->adev;
6519
6520 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6521 amdgpu_ring_write(ring, 0 | /* src: register*/
6522 (5 << 8) | /* dst: memory */
6523 (1 << 20)); /* write confirm */
6524 amdgpu_ring_write(ring, reg);
6525 amdgpu_ring_write(ring, 0);
6526 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6527 adev->virt.reg_val_offs * 4));
6528 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6529 adev->virt.reg_val_offs * 4));
6530}
6531
6532static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6533 uint32_t val)
6534{
6535 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6536 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6537 amdgpu_ring_write(ring, reg);
6538 amdgpu_ring_write(ring, 0);
6539 amdgpu_ring_write(ring, val);
6540}
6541
aaa36a97
AD
6542static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6543 enum amdgpu_interrupt_state state)
6544{
61cb8cef
TSD
6545 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6546 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6547}
6548
6549static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6550 int me, int pipe,
6551 enum amdgpu_interrupt_state state)
6552{
d0c55cdf 6553 u32 mec_int_cntl, mec_int_cntl_reg;
aaa36a97 6554
aaa36a97 6555 /*
d0c55cdf
AD
6556 * amdgpu controls only the first MEC. That's why this function only
6557 * handles the setting of interrupts for this specific MEC. All other
aaa36a97
AD
6558 * pipes' interrupts are set by amdkfd.
6559 */
6560
6561 if (me == 1) {
6562 switch (pipe) {
6563 case 0:
d0c55cdf
AD
6564 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6565 break;
6566 case 1:
6567 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6568 break;
6569 case 2:
6570 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6571 break;
6572 case 3:
6573 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
aaa36a97
AD
6574 break;
6575 default:
6576 DRM_DEBUG("invalid pipe %d\n", pipe);
6577 return;
6578 }
6579 } else {
6580 DRM_DEBUG("invalid me %d\n", me);
6581 return;
6582 }
6583
d0c55cdf
AD
6584 switch (state) {
6585 case AMDGPU_IRQ_STATE_DISABLE:
6586 mec_int_cntl = RREG32(mec_int_cntl_reg);
6587 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6588 WREG32(mec_int_cntl_reg, mec_int_cntl);
6589 break;
6590 case AMDGPU_IRQ_STATE_ENABLE:
6591 mec_int_cntl = RREG32(mec_int_cntl_reg);
6592 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6593 WREG32(mec_int_cntl_reg, mec_int_cntl);
6594 break;
6595 default:
6596 break;
6597 }
aaa36a97
AD
6598}
6599
6600static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6601 struct amdgpu_irq_src *source,
6602 unsigned type,
6603 enum amdgpu_interrupt_state state)
6604{
61cb8cef
TSD
6605 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6606 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6607
6608 return 0;
6609}
6610
6611static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6612 struct amdgpu_irq_src *source,
6613 unsigned type,
6614 enum amdgpu_interrupt_state state)
6615{
61cb8cef
TSD
6616 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6617 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6618
6619 return 0;
6620}
6621
6622static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6623 struct amdgpu_irq_src *src,
6624 unsigned type,
6625 enum amdgpu_interrupt_state state)
6626{
6627 switch (type) {
6628 case AMDGPU_CP_IRQ_GFX_EOP:
6629 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6630 break;
6631 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6632 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6633 break;
6634 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6635 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6636 break;
6637 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6638 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6639 break;
6640 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6641 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6642 break;
6643 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6644 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6645 break;
6646 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6647 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6648 break;
6649 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6650 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6651 break;
6652 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6653 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6654 break;
6655 default:
6656 break;
6657 }
6658 return 0;
6659}
6660
6661static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6662 struct amdgpu_irq_src *source,
6663 struct amdgpu_iv_entry *entry)
6664{
6665 int i;
6666 u8 me_id, pipe_id, queue_id;
6667 struct amdgpu_ring *ring;
6668
6669 DRM_DEBUG("IH: CP EOP\n");
6670 me_id = (entry->ring_id & 0x0c) >> 2;
6671 pipe_id = (entry->ring_id & 0x03) >> 0;
6672 queue_id = (entry->ring_id & 0x70) >> 4;
6673
6674 switch (me_id) {
6675 case 0:
6676 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6677 break;
6678 case 1:
6679 case 2:
6680 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6681 ring = &adev->gfx.compute_ring[i];
6682 /* Per-queue interrupt is supported for MEC starting from VI.
6683 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6684 */
6685 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6686 amdgpu_fence_process(ring);
6687 }
6688 break;
6689 }
6690 return 0;
6691}
6692
6693static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6694 struct amdgpu_irq_src *source,
6695 struct amdgpu_iv_entry *entry)
6696{
6697 DRM_ERROR("Illegal register access in command stream\n");
6698 schedule_work(&adev->reset_work);
6699 return 0;
6700}
6701
6702static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6703 struct amdgpu_irq_src *source,
6704 struct amdgpu_iv_entry *entry)
6705{
6706 DRM_ERROR("Illegal instruction in command stream\n");
6707 schedule_work(&adev->reset_work);
6708 return 0;
6709}
6710
4e638ae9
XY
6711static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6712 struct amdgpu_irq_src *src,
6713 unsigned int type,
6714 enum amdgpu_interrupt_state state)
6715{
07c397f9 6716 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6717
4e638ae9
XY
6718 switch (type) {
6719 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
ccaf3574
TSD
6720 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6721 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6722 if (ring->me == 1)
6723 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6724 ring->pipe,
6725 GENERIC2_INT_ENABLE,
6726 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6727 else
6728 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6729 ring->pipe,
6730 GENERIC2_INT_ENABLE,
6731 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
4e638ae9
XY
6732 break;
6733 default:
6734 BUG(); /* kiq only support GENERIC2_INT now */
6735 break;
6736 }
6737 return 0;
6738}
6739
6740static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6741 struct amdgpu_irq_src *source,
6742 struct amdgpu_iv_entry *entry)
6743{
6744 u8 me_id, pipe_id, queue_id;
07c397f9 6745 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6746
4e638ae9
XY
6747 me_id = (entry->ring_id & 0x0c) >> 2;
6748 pipe_id = (entry->ring_id & 0x03) >> 0;
6749 queue_id = (entry->ring_id & 0x70) >> 4;
6750 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6751 me_id, pipe_id, queue_id);
6752
6753 amdgpu_fence_process(ring);
6754 return 0;
6755}
6756
a1255107 6757static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 6758 .name = "gfx_v8_0",
aaa36a97 6759 .early_init = gfx_v8_0_early_init,
ccba7691 6760 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
6761 .sw_init = gfx_v8_0_sw_init,
6762 .sw_fini = gfx_v8_0_sw_fini,
6763 .hw_init = gfx_v8_0_hw_init,
6764 .hw_fini = gfx_v8_0_hw_fini,
6765 .suspend = gfx_v8_0_suspend,
6766 .resume = gfx_v8_0_resume,
6767 .is_idle = gfx_v8_0_is_idle,
6768 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 6769 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 6770 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 6771 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 6772 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
6773 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6774 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 6775 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
6776};
6777
6778static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 6779 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
6780 .align_mask = 0xff,
6781 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6782 .support_64bit_ptrs = false,
e7706b42 6783 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6784 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6785 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e9d672b2
ML
6786 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6787 5 + /* COND_EXEC */
6788 7 + /* PIPELINE_SYNC */
6789 19 + /* VM_FLUSH */
6790 8 + /* FENCE for VM_FLUSH */
6791 20 + /* GDS switch */
6792 4 + /* double SWITCH_BUFFER,
6793 the first COND_EXEC jump to the place just
6794 prior to this double SWITCH_BUFFER */
6795 5 + /* COND_EXEC */
6796 7 + /* HDP_flush */
6797 4 + /* VGT_flush */
6798 14 + /* CE_META */
6799 31 + /* DE_META */
6800 3 + /* CNTX_CTRL */
6801 5 + /* HDP_INVL */
6802 8 + 8 + /* FENCE x2 */
6803 2, /* SWITCH_BUFFER */
e12f3d7a 6804 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 6805 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 6806 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 6807 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6808 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6809 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 6810 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6811 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6812 .test_ring = gfx_v8_0_ring_test_ring,
6813 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6814 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6815 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 6816 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 6817 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
806ba2d4
ML
6818 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6819 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
aaa36a97
AD
6820};
6821
6822static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 6823 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
6824 .align_mask = 0xff,
6825 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6826 .support_64bit_ptrs = false,
e7706b42 6827 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6828 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6829 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
6830 .emit_frame_size =
6831 20 + /* gfx_v8_0_ring_emit_gds_switch */
6832 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6833 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6834 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6835 17 + /* gfx_v8_0_ring_emit_vm_flush */
6836 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6837 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 6838 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 6839 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 6840 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6841 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6842 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 6843 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6844 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6845 .test_ring = gfx_v8_0_ring_test_ring,
6846 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6847 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6848 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
6849};
6850
4e638ae9
XY
6851static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6852 .type = AMDGPU_RING_TYPE_KIQ,
6853 .align_mask = 0xff,
6854 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6855 .support_64bit_ptrs = false,
4e638ae9
XY
6856 .get_rptr = gfx_v8_0_ring_get_rptr,
6857 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6858 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6859 .emit_frame_size =
6860 20 + /* gfx_v8_0_ring_emit_gds_switch */
6861 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6862 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6863 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6864 17 + /* gfx_v8_0_ring_emit_vm_flush */
6865 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6866 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6867 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6868 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
4e638ae9
XY
6869 .test_ring = gfx_v8_0_ring_test_ring,
6870 .test_ib = gfx_v8_0_ring_test_ib,
6871 .insert_nop = amdgpu_ring_insert_nop,
6872 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
6873 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6874 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
6875};
6876
aaa36a97
AD
6877static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6878{
6879 int i;
6880
4e638ae9
XY
6881 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6882
aaa36a97
AD
6883 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6884 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6885
6886 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6887 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6888}
6889
6890static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6891 .set = gfx_v8_0_set_eop_interrupt_state,
6892 .process = gfx_v8_0_eop_irq,
6893};
6894
6895static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6896 .set = gfx_v8_0_set_priv_reg_fault_state,
6897 .process = gfx_v8_0_priv_reg_irq,
6898};
6899
6900static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6901 .set = gfx_v8_0_set_priv_inst_fault_state,
6902 .process = gfx_v8_0_priv_inst_irq,
6903};
6904
4e638ae9
XY
6905static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6906 .set = gfx_v8_0_kiq_set_interrupt_state,
6907 .process = gfx_v8_0_kiq_irq,
6908};
6909
aaa36a97
AD
6910static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6911{
6912 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6913 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6914
6915 adev->gfx.priv_reg_irq.num_types = 1;
6916 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6917
6918 adev->gfx.priv_inst_irq.num_types = 1;
6919 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
6920
6921 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6922 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
6923}
6924
dbff57bc
AD
6925static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6926{
ae6a58e4 6927 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
6928}
6929
aaa36a97
AD
6930static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6931{
6932 /* init asci gds info */
6933 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6934 adev->gds.gws.total_size = 64;
6935 adev->gds.oa.total_size = 16;
6936
6937 if (adev->gds.mem.total_size == 64 * 1024) {
6938 adev->gds.mem.gfx_partition_size = 4096;
6939 adev->gds.mem.cs_partition_size = 4096;
6940
6941 adev->gds.gws.gfx_partition_size = 4;
6942 adev->gds.gws.cs_partition_size = 4;
6943
6944 adev->gds.oa.gfx_partition_size = 4;
6945 adev->gds.oa.cs_partition_size = 1;
6946 } else {
6947 adev->gds.mem.gfx_partition_size = 1024;
6948 adev->gds.mem.cs_partition_size = 1024;
6949
6950 adev->gds.gws.gfx_partition_size = 16;
6951 adev->gds.gws.cs_partition_size = 16;
6952
6953 adev->gds.oa.gfx_partition_size = 4;
6954 adev->gds.oa.cs_partition_size = 4;
6955 }
6956}
6957
9de06de8
NH
6958static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6959 u32 bitmap)
6960{
6961 u32 data;
6962
6963 if (!bitmap)
6964 return;
6965
6966 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6967 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6968
6969 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6970}
6971
8f8e00c1 6972static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 6973{
8f8e00c1 6974 u32 data, mask;
aaa36a97 6975
5003f278
TSD
6976 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6977 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 6978
378506a7 6979 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 6980
5003f278 6981 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
6982}
6983
7dae69a2 6984static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
6985{
6986 int i, j, k, counter, active_cu_number = 0;
6987 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 6988 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 6989 unsigned disable_masks[4 * 2];
fe723cd3 6990 u32 ao_cu_num;
aaa36a97 6991
6157bd7a
FC
6992 memset(cu_info, 0, sizeof(*cu_info));
6993
fe723cd3
RZ
6994 if (adev->flags & AMD_IS_APU)
6995 ao_cu_num = 2;
6996 else
6997 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6998
9de06de8
NH
6999 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7000
aaa36a97
AD
7001 mutex_lock(&adev->grbm_idx_mutex);
7002 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7003 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7004 mask = 1;
7005 ao_bitmap = 0;
7006 counter = 0;
9559ef5b 7007 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7008 if (i < 4 && j < 2)
7009 gfx_v8_0_set_user_cu_inactive_bitmap(
7010 adev, disable_masks[i * 2 + j]);
8f8e00c1 7011 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7012 cu_info->bitmap[i][j] = bitmap;
7013
fe723cd3 7014 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
aaa36a97 7015 if (bitmap & mask) {
fe723cd3 7016 if (counter < ao_cu_num)
aaa36a97
AD
7017 ao_bitmap |= mask;
7018 counter ++;
7019 }
7020 mask <<= 1;
7021 }
7022 active_cu_number += counter;
dbfe85ea
FC
7023 if (i < 2 && j < 2)
7024 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7025 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
aaa36a97
AD
7026 }
7027 }
9559ef5b 7028 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7029 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7030
7031 cu_info->number = active_cu_number;
7032 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 7033}
a1255107
AD
7034
7035const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7036{
7037 .type = AMD_IP_BLOCK_TYPE_GFX,
7038 .major = 8,
7039 .minor = 0,
7040 .rev = 0,
7041 .funcs = &gfx_v8_0_ip_funcs,
7042};
7043
7044const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7045{
7046 .type = AMD_IP_BLOCK_TYPE_GFX,
7047 .major = 8,
7048 .minor = 1,
7049 .rev = 0,
7050 .funcs = &gfx_v8_0_ip_funcs,
7051};
acad2b2a 7052
95243543 7053static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
acad2b2a
ML
7054{
7055 uint64_t ce_payload_addr;
7056 int cnt_ce;
7057 static union {
49abb980
XY
7058 struct vi_ce_ib_state regular;
7059 struct vi_ce_ib_state_chained_ib chained;
e8411302 7060 } ce_payload = {};
acad2b2a
ML
7061
7062 if (ring->adev->virt.chained_ib_support) {
95243543
ML
7063 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7064 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7065 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7066 } else {
95243543
ML
7067 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7068 offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7069 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7070 }
7071
7072 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7073 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7074 WRITE_DATA_DST_SEL(8) |
7075 WR_CONFIRM) |
7076 WRITE_DATA_CACHE_POLICY(0));
7077 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7078 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7079 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7080}
7081
95243543 7082static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
acad2b2a 7083{
95243543 7084 uint64_t de_payload_addr, gds_addr, csa_addr;
acad2b2a
ML
7085 int cnt_de;
7086 static union {
49abb980
XY
7087 struct vi_de_ib_state regular;
7088 struct vi_de_ib_state_chained_ib chained;
e8411302 7089 } de_payload = {};
acad2b2a 7090
95243543 7091 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
acad2b2a
ML
7092 gds_addr = csa_addr + 4096;
7093 if (ring->adev->virt.chained_ib_support) {
7094 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7095 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7096 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7097 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7098 } else {
7099 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7100 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7101 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7102 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7103 }
7104
7105 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7106 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7107 WRITE_DATA_DST_SEL(8) |
7108 WR_CONFIRM) |
7109 WRITE_DATA_CACHE_POLICY(0));
7110 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7111 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7112 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7113}