]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drm/amdgpu: gfx support ras gpu reset
[mirror_ubuntu-hirsute-kernel.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
CommitLineData
b1023571
KW
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
c1b24a14 23#include <linux/kernel.h>
b1023571 24#include <linux/firmware.h>
248a1d6f 25#include <drm/drmP.h>
b1023571
KW
26#include "amdgpu.h"
27#include "amdgpu_gfx.h"
28#include "soc15.h"
29#include "soc15d.h"
3251c043 30#include "amdgpu_atomfirmware.h"
b1023571 31
cde5c34f
FX
32#include "gc/gc_9_0_offset.h"
33#include "gc/gc_9_0_sh_mask.h"
fb960bd2 34#include "vega10_enum.h"
75199b8c 35#include "hdp/hdp_4_0_offset.h"
b1023571 36
5326ad54 37#include "soc15.h"
b1023571
KW
38#include "soc15_common.h"
39#include "clearstate_gfx9.h"
40#include "v9_structs.h"
41
44a99b65
AG
42#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
43
760a1d55
FX
44#include "amdgpu_ras.h"
45
b1023571 46#define GFX9_NUM_GFX_RINGS 1
17e4bd6c 47#define GFX9_MEC_HPD_SIZE 4096
6bce4667
HZ
48#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
49#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
b1023571 50
91d3130a
HZ
51#define mmPWR_MISC_CNTL_STATUS 0x0183
52#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
53#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
54#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
55#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
56#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
b1023571
KW
57
58MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
59MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
60MODULE_FIRMWARE("amdgpu/vega10_me.bin");
61MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
62MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
63MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
64
739ffd9b
AD
65MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
66MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
67MODULE_FIRMWARE("amdgpu/vega12_me.bin");
68MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
69MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
70MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
71
940328fe
FX
72MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
73MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
74MODULE_FIRMWARE("amdgpu/vega20_me.bin");
75MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
76MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
77MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
78
060d124b
CZ
79MODULE_FIRMWARE("amdgpu/raven_ce.bin");
80MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
81MODULE_FIRMWARE("amdgpu/raven_me.bin");
82MODULE_FIRMWARE("amdgpu/raven_mec.bin");
83MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
84MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
85
501a580a
LG
86MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
87MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
88MODULE_FIRMWARE("amdgpu/picasso_me.bin");
89MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
90MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
91MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
c50fe0c5 92MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
501a580a 93
cf4b60c6
FX
94MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
95MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
96MODULE_FIRMWARE("amdgpu/raven2_me.bin");
97MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
98MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
99MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
100
946a4d5b
SL
101static const struct soc15_reg_golden golden_settings_gc_9_0[] =
102{
54d682d9 103 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
c55045ad 104 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
946a4d5b 105 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
946a4d5b
SL
106 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
107 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
108 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
946a4d5b 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
c5fb5426
FX
110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
946a4d5b
SL
114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
919a94d8
TZ
119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
b1023571
KW
123};
124
946a4d5b 125static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
b1023571 126{
946a4d5b
SL
127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
e6d57520
FX
129 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
130 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
131 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
946a4d5b
SL
132 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
e6d57520 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
946a4d5b 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
e6d57520
FX
136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
946a4d5b 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
e6d57520
FX
142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
919a94d8 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
b1023571
KW
145};
146
bb5368aa
FX
147static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
148{
ac26b0f3 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
bb5368aa
FX
150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
151 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
152 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
153 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
154 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
160};
161
946a4d5b
SL
162static const struct soc15_reg_golden golden_settings_gc_9_1[] =
163{
164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
166 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
167 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
168 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
169 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
f9f97e3c
TZ
184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
a5fdb336
CZ
188};
189
946a4d5b 190static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
a5fdb336 191{
946a4d5b
SL
192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
194 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
195 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
196 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
197 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
b1023571
KW
199};
200
28ab1229
FX
201static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
202{
203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
205 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
206 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
207 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
208 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
222};
223
946a4d5b 224static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
f5eaffcc 225{
f7b1844b 226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
946a4d5b
SL
227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
228 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
f5eaffcc
KW
229};
230
62b35f9a
HZ
231static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
232{
233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
235 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
236 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
237 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
238 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
249};
250
251static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
252{
253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
255 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
256 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
257 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
258 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
f9f97e3c
TZ
262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
62b35f9a
HZ
266};
267
727b888f
HR
268static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
269{
270 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
271 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
272 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
273 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
274 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
275 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
276 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278};
279
280static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
281{
282 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
283 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
284 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
285 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
286 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
287 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
288 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290};
291
b1023571 292#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
62b35f9a 293#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
7b6ba9ea 294#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
28ab1229 295#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
b1023571
KW
296
297static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
298static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
299static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
300static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
301static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
302 struct amdgpu_cu_info *cu_info);
303static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
304static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
635e7132 305static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
b1023571
KW
306
307static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
308{
309 switch (adev->asic_type) {
310 case CHIP_VEGA10:
946a4d5b 311 soc15_program_register_sequence(adev,
b1023571 312 golden_settings_gc_9_0,
c47b41a7 313 ARRAY_SIZE(golden_settings_gc_9_0));
946a4d5b 314 soc15_program_register_sequence(adev,
b1023571 315 golden_settings_gc_9_0_vg10,
c47b41a7 316 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
b1023571 317 break;
d5e8ef06 318 case CHIP_VEGA12:
62b35f9a
HZ
319 soc15_program_register_sequence(adev,
320 golden_settings_gc_9_2_1,
321 ARRAY_SIZE(golden_settings_gc_9_2_1));
322 soc15_program_register_sequence(adev,
323 golden_settings_gc_9_2_1_vg12,
324 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
d5e8ef06 325 break;
bb5368aa
FX
326 case CHIP_VEGA20:
327 soc15_program_register_sequence(adev,
328 golden_settings_gc_9_0,
329 ARRAY_SIZE(golden_settings_gc_9_0));
330 soc15_program_register_sequence(adev,
331 golden_settings_gc_9_0_vg20,
332 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
333 break;
a5fdb336 334 case CHIP_RAVEN:
28ab1229
FX
335 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
336 ARRAY_SIZE(golden_settings_gc_9_1));
337 if (adev->rev_id >= 8)
338 soc15_program_register_sequence(adev,
339 golden_settings_gc_9_1_rv2,
340 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
341 else
342 soc15_program_register_sequence(adev,
343 golden_settings_gc_9_1_rv1,
344 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
345 break;
b1023571
KW
346 default:
347 break;
348 }
f5eaffcc 349
946a4d5b 350 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
f5eaffcc 351 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
b1023571
KW
352}
353
354static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
355{
6a05148f 356 adev->gfx.scratch.num_reg = 8;
b1023571
KW
357 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
358 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
359}
360
361static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
362 bool wc, uint32_t reg, uint32_t val)
363{
364 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
365 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
366 WRITE_DATA_DST_SEL(0) |
367 (wc ? WR_CONFIRM : 0));
368 amdgpu_ring_write(ring, reg);
369 amdgpu_ring_write(ring, 0);
370 amdgpu_ring_write(ring, val);
371}
372
373static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
374 int mem_space, int opt, uint32_t addr0,
375 uint32_t addr1, uint32_t ref, uint32_t mask,
376 uint32_t inv)
377{
378 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
379 amdgpu_ring_write(ring,
380 /* memory (1) or register (0) */
381 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
382 WAIT_REG_MEM_OPERATION(opt) | /* wait */
383 WAIT_REG_MEM_FUNCTION(3) | /* equal */
384 WAIT_REG_MEM_ENGINE(eng_sel)));
385
386 if (mem_space)
387 BUG_ON(addr0 & 0x3); /* Dword align */
388 amdgpu_ring_write(ring, addr0);
389 amdgpu_ring_write(ring, addr1);
390 amdgpu_ring_write(ring, ref);
391 amdgpu_ring_write(ring, mask);
392 amdgpu_ring_write(ring, inv); /* poll interval */
393}
394
395static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
396{
397 struct amdgpu_device *adev = ring->adev;
398 uint32_t scratch;
399 uint32_t tmp = 0;
400 unsigned i;
401 int r;
402
403 r = amdgpu_gfx_scratch_get(adev, &scratch);
dc9eeff8 404 if (r)
b1023571 405 return r;
dc9eeff8 406
b1023571
KW
407 WREG32(scratch, 0xCAFEDEAD);
408 r = amdgpu_ring_alloc(ring, 3);
dc9eeff8
CK
409 if (r)
410 goto error_free_scratch;
411
b1023571
KW
412 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
413 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
414 amdgpu_ring_write(ring, 0xDEADBEEF);
415 amdgpu_ring_commit(ring);
416
417 for (i = 0; i < adev->usec_timeout; i++) {
418 tmp = RREG32(scratch);
419 if (tmp == 0xDEADBEEF)
420 break;
421 DRM_UDELAY(1);
422 }
dc9eeff8
CK
423
424 if (i >= adev->usec_timeout)
425 r = -ETIMEDOUT;
426
427error_free_scratch:
b1023571
KW
428 amdgpu_gfx_scratch_free(adev, scratch);
429 return r;
430}
431
432static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
433{
ed9324af
ML
434 struct amdgpu_device *adev = ring->adev;
435 struct amdgpu_ib ib;
436 struct dma_fence *f = NULL;
437
438 unsigned index;
439 uint64_t gpu_addr;
440 uint32_t tmp;
441 long r;
442
443 r = amdgpu_device_wb_get(adev, &index);
98079389 444 if (r)
ed9324af 445 return r;
ed9324af
ML
446
447 gpu_addr = adev->wb.gpu_addr + (index * 4);
448 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
449 memset(&ib, 0, sizeof(ib));
450 r = amdgpu_ib_get(adev, NULL, 16, &ib);
98079389 451 if (r)
ed9324af 452 goto err1;
98079389 453
ed9324af
ML
454 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
455 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
456 ib.ptr[2] = lower_32_bits(gpu_addr);
457 ib.ptr[3] = upper_32_bits(gpu_addr);
458 ib.ptr[4] = 0xDEADBEEF;
459 ib.length_dw = 5;
b1023571 460
ed9324af
ML
461 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
462 if (r)
463 goto err2;
b1023571 464
ed9324af
ML
465 r = dma_fence_wait_timeout(f, false, timeout);
466 if (r == 0) {
98079389
CK
467 r = -ETIMEDOUT;
468 goto err2;
ed9324af 469 } else if (r < 0) {
98079389 470 goto err2;
ed9324af
ML
471 }
472
473 tmp = adev->wb.wb[index];
98079389
CK
474 if (tmp == 0xDEADBEEF)
475 r = 0;
476 else
477 r = -EINVAL;
b1023571 478
b1023571 479err2:
ed9324af
ML
480 amdgpu_ib_free(adev, &ib, NULL);
481 dma_fence_put(f);
b1023571 482err1:
ed9324af
ML
483 amdgpu_device_wb_free(adev, index);
484 return r;
b1023571
KW
485}
486
c833d8aa
ML
487
488static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
489{
490 release_firmware(adev->gfx.pfp_fw);
491 adev->gfx.pfp_fw = NULL;
492 release_firmware(adev->gfx.me_fw);
493 adev->gfx.me_fw = NULL;
494 release_firmware(adev->gfx.ce_fw);
495 adev->gfx.ce_fw = NULL;
496 release_firmware(adev->gfx.rlc_fw);
497 adev->gfx.rlc_fw = NULL;
498 release_firmware(adev->gfx.mec_fw);
499 adev->gfx.mec_fw = NULL;
500 release_firmware(adev->gfx.mec2_fw);
501 adev->gfx.mec2_fw = NULL;
502
503 kfree(adev->gfx.rlc.register_list_format);
504}
505
621a6318
HR
506static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
507{
508 const struct rlc_firmware_header_v2_1 *rlc_hdr;
509
510 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
511 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
512 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
513 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
514 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
515 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
516 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
517 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
518 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
519 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
520 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
521 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
522 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
523 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
524 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
525}
526
39b62541
ED
527static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
528{
529 adev->gfx.me_fw_write_wait = false;
530 adev->gfx.mec_fw_write_wait = false;
531
532 switch (adev->asic_type) {
533 case CHIP_VEGA10:
534 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
535 (adev->gfx.me_feature_version >= 42) &&
536 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
537 (adev->gfx.pfp_feature_version >= 42))
538 adev->gfx.me_fw_write_wait = true;
539
540 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
541 (adev->gfx.mec_feature_version >= 42))
542 adev->gfx.mec_fw_write_wait = true;
543 break;
544 case CHIP_VEGA12:
545 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
546 (adev->gfx.me_feature_version >= 44) &&
547 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
548 (adev->gfx.pfp_feature_version >= 44))
549 adev->gfx.me_fw_write_wait = true;
550
551 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
552 (adev->gfx.mec_feature_version >= 44))
553 adev->gfx.mec_fw_write_wait = true;
554 break;
555 case CHIP_VEGA20:
556 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
557 (adev->gfx.me_feature_version >= 44) &&
558 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
559 (adev->gfx.pfp_feature_version >= 44))
560 adev->gfx.me_fw_write_wait = true;
561
562 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
563 (adev->gfx.mec_feature_version >= 44))
564 adev->gfx.mec_fw_write_wait = true;
565 break;
566 case CHIP_RAVEN:
567 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
568 (adev->gfx.me_feature_version >= 42) &&
569 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
570 (adev->gfx.pfp_feature_version >= 42))
571 adev->gfx.me_fw_write_wait = true;
572
573 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
574 (adev->gfx.mec_feature_version >= 42))
575 adev->gfx.mec_fw_write_wait = true;
576 break;
a00ead2b
RZ
577 default:
578 break;
39b62541
ED
579 }
580}
581
00544006
HR
582static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
583{
584 switch (adev->asic_type) {
585 case CHIP_VEGA10:
586 case CHIP_VEGA12:
587 case CHIP_VEGA20:
588 break;
589 case CHIP_RAVEN:
590 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
591 break;
592 if ((adev->gfx.rlc_fw_version < 531) ||
083d0229 593 (adev->gfx.rlc_fw_version == 53815) ||
00544006
HR
594 (adev->gfx.rlc_feature_version < 1) ||
595 !adev->gfx.rlc.is_rlc_v2_1)
596 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
597 break;
598 default:
599 break;
600 }
601}
602
b1023571
KW
603static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
604{
605 const char *chip_name;
606 char fw_name[30];
607 int err;
608 struct amdgpu_firmware_info *info = NULL;
609 const struct common_firmware_header *header = NULL;
610 const struct gfx_firmware_header_v1_0 *cp_hdr;
a4d41ad0
HZ
611 const struct rlc_firmware_header_v2_0 *rlc_hdr;
612 unsigned int *tmp = NULL;
613 unsigned int i = 0;
621a6318
HR
614 uint16_t version_major;
615 uint16_t version_minor;
b1023571
KW
616
617 DRM_DEBUG("\n");
618
619 switch (adev->asic_type) {
620 case CHIP_VEGA10:
621 chip_name = "vega10";
622 break;
739ffd9b
AD
623 case CHIP_VEGA12:
624 chip_name = "vega12";
625 break;
940328fe
FX
626 case CHIP_VEGA20:
627 chip_name = "vega20";
628 break;
eaa85724 629 case CHIP_RAVEN:
cf4b60c6
FX
630 if (adev->rev_id >= 8)
631 chip_name = "raven2";
741deade
AD
632 else if (adev->pdev->device == 0x15d8)
633 chip_name = "picasso";
cf4b60c6
FX
634 else
635 chip_name = "raven";
eaa85724 636 break;
b1023571
KW
637 default:
638 BUG();
639 }
640
641 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
642 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
643 if (err)
644 goto out;
645 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
646 if (err)
647 goto out;
648 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
649 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
650 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
651
652 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
653 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
654 if (err)
655 goto out;
656 err = amdgpu_ucode_validate(adev->gfx.me_fw);
657 if (err)
658 goto out;
659 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
660 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662
663 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
664 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
665 if (err)
666 goto out;
667 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
668 if (err)
669 goto out;
670 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
671 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673
c50fe0c5
AL
674 /*
675 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
676 * instead of picasso_rlc.bin.
677 * Judgment method:
678 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
679 * or revision >= 0xD8 && revision <= 0xDF
680 * otherwise is PCO FP5
681 */
682 if (!strcmp(chip_name, "picasso") &&
683 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
684 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
685 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
686 else
687 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
b1023571
KW
688 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
689 if (err)
690 goto out;
691 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
a4d41ad0 692 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
621a6318
HR
693
694 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
695 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
696 if (version_major == 2 && version_minor == 1)
697 adev->gfx.rlc.is_rlc_v2_1 = true;
698
a4d41ad0
HZ
699 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
700 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
701 adev->gfx.rlc.save_and_restore_offset =
702 le32_to_cpu(rlc_hdr->save_and_restore_offset);
703 adev->gfx.rlc.clear_state_descriptor_offset =
704 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
705 adev->gfx.rlc.avail_scratch_ram_locations =
706 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
707 adev->gfx.rlc.reg_restore_list_size =
708 le32_to_cpu(rlc_hdr->reg_restore_list_size);
709 adev->gfx.rlc.reg_list_format_start =
710 le32_to_cpu(rlc_hdr->reg_list_format_start);
711 adev->gfx.rlc.reg_list_format_separate_start =
712 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
713 adev->gfx.rlc.starting_offsets_start =
714 le32_to_cpu(rlc_hdr->starting_offsets_start);
715 adev->gfx.rlc.reg_list_format_size_bytes =
716 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
717 adev->gfx.rlc.reg_list_size_bytes =
718 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
719 adev->gfx.rlc.register_list_format =
720 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
721 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
722 if (!adev->gfx.rlc.register_list_format) {
723 err = -ENOMEM;
724 goto out;
725 }
726
727 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
728 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
06668916 729 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
a4d41ad0
HZ
730 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
731
732 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
733
734 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
735 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
06668916 736 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
a4d41ad0 737 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
b1023571 738
621a6318
HR
739 if (adev->gfx.rlc.is_rlc_v2_1)
740 gfx_v9_0_init_rlc_ext_microcode(adev);
741
b1023571
KW
742 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
743 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
744 if (err)
745 goto out;
746 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
747 if (err)
748 goto out;
749 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
750 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
751 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
752
753
754 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
755 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
756 if (!err) {
757 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
758 if (err)
759 goto out;
760 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
761 adev->gfx.mec2_fw->data;
762 adev->gfx.mec2_fw_version =
763 le32_to_cpu(cp_hdr->header.ucode_version);
764 adev->gfx.mec2_feature_version =
765 le32_to_cpu(cp_hdr->ucode_feature_version);
766 } else {
767 err = 0;
768 adev->gfx.mec2_fw = NULL;
769 }
770
771 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
772 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
773 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
774 info->fw = adev->gfx.pfp_fw;
775 header = (const struct common_firmware_header *)info->fw->data;
776 adev->firmware.fw_size +=
777 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
778
779 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
780 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
781 info->fw = adev->gfx.me_fw;
782 header = (const struct common_firmware_header *)info->fw->data;
783 adev->firmware.fw_size +=
784 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
785
786 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
787 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
788 info->fw = adev->gfx.ce_fw;
789 header = (const struct common_firmware_header *)info->fw->data;
790 adev->firmware.fw_size +=
791 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
792
793 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
794 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
795 info->fw = adev->gfx.rlc_fw;
796 header = (const struct common_firmware_header *)info->fw->data;
797 adev->firmware.fw_size +=
798 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
799
b58b65cf
EQ
800 if (adev->gfx.rlc.is_rlc_v2_1 &&
801 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
802 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
803 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
621a6318
HR
804 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
805 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
806 info->fw = adev->gfx.rlc_fw;
807 adev->firmware.fw_size +=
808 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
809
810 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
811 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
812 info->fw = adev->gfx.rlc_fw;
813 adev->firmware.fw_size +=
814 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
815
816 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
817 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
818 info->fw = adev->gfx.rlc_fw;
819 adev->firmware.fw_size +=
820 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
821 }
822
b1023571
KW
823 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
824 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
825 info->fw = adev->gfx.mec_fw;
826 header = (const struct common_firmware_header *)info->fw->data;
827 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
828 adev->firmware.fw_size +=
829 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
830
831 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
832 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
833 info->fw = adev->gfx.mec_fw;
834 adev->firmware.fw_size +=
835 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
836
837 if (adev->gfx.mec2_fw) {
838 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
839 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
840 info->fw = adev->gfx.mec2_fw;
841 header = (const struct common_firmware_header *)info->fw->data;
842 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
843 adev->firmware.fw_size +=
844 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
845 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
846 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
847 info->fw = adev->gfx.mec2_fw;
848 adev->firmware.fw_size +=
849 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
850 }
851
852 }
853
854out:
00544006 855 gfx_v9_0_check_if_need_gfxoff(adev);
39b62541 856 gfx_v9_0_check_fw_write_wait(adev);
b1023571
KW
857 if (err) {
858 dev_err(adev->dev,
859 "gfx9: Failed to load firmware \"%s\"\n",
860 fw_name);
861 release_firmware(adev->gfx.pfp_fw);
862 adev->gfx.pfp_fw = NULL;
863 release_firmware(adev->gfx.me_fw);
864 adev->gfx.me_fw = NULL;
865 release_firmware(adev->gfx.ce_fw);
866 adev->gfx.ce_fw = NULL;
867 release_firmware(adev->gfx.rlc_fw);
868 adev->gfx.rlc_fw = NULL;
869 release_firmware(adev->gfx.mec_fw);
870 adev->gfx.mec_fw = NULL;
871 release_firmware(adev->gfx.mec2_fw);
872 adev->gfx.mec2_fw = NULL;
873 }
874 return err;
875}
876
c9719c69
HZ
877static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
878{
879 u32 count = 0;
880 const struct cs_section_def *sect = NULL;
881 const struct cs_extent_def *ext = NULL;
882
883 /* begin clear state */
884 count += 2;
885 /* context control state */
886 count += 3;
887
888 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
889 for (ext = sect->section; ext->extent != NULL; ++ext) {
890 if (sect->id == SECT_CONTEXT)
891 count += 2 + ext->reg_count;
892 else
893 return 0;
894 }
895 }
896
897 /* end clear state */
898 count += 2;
899 /* clear state */
900 count += 2;
901
902 return count;
903}
904
905static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
906 volatile u32 *buffer)
907{
908 u32 count = 0, i;
909 const struct cs_section_def *sect = NULL;
910 const struct cs_extent_def *ext = NULL;
911
912 if (adev->gfx.rlc.cs_data == NULL)
913 return;
914 if (buffer == NULL)
915 return;
916
917 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
918 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
919
920 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
921 buffer[count++] = cpu_to_le32(0x80000000);
922 buffer[count++] = cpu_to_le32(0x80000000);
923
924 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
925 for (ext = sect->section; ext->extent != NULL; ++ext) {
926 if (sect->id == SECT_CONTEXT) {
927 buffer[count++] =
928 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
929 buffer[count++] = cpu_to_le32(ext->reg_index -
930 PACKET3_SET_CONTEXT_REG_START);
931 for (i = 0; i < ext->reg_count; i++)
932 buffer[count++] = cpu_to_le32(ext->extent[i]);
933 } else {
934 return;
935 }
936 }
937 }
938
939 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
940 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
941
942 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
943 buffer[count++] = cpu_to_le32(0);
944}
945
989b6823
EQ
946static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
947{
948 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
949 uint32_t pg_always_on_cu_num = 2;
950 uint32_t always_on_cu_num;
951 uint32_t i, j, k;
952 uint32_t mask, cu_bitmap, counter;
953
954 if (adev->flags & AMD_IS_APU)
955 always_on_cu_num = 4;
956 else if (adev->asic_type == CHIP_VEGA12)
957 always_on_cu_num = 8;
958 else
959 always_on_cu_num = 12;
960
961 mutex_lock(&adev->grbm_idx_mutex);
962 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
963 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
964 mask = 1;
965 cu_bitmap = 0;
966 counter = 0;
967 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
968
969 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
970 if (cu_info->bitmap[i][j] & mask) {
971 if (counter == pg_always_on_cu_num)
972 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
973 if (counter < always_on_cu_num)
974 cu_bitmap |= mask;
975 else
976 break;
977 counter++;
978 }
979 mask <<= 1;
980 }
981
982 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
983 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
984 }
985 }
986 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
987 mutex_unlock(&adev->grbm_idx_mutex);
988}
989
ba7bb665
HZ
990static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
991{
e5475e16 992 uint32_t data;
ba7bb665
HZ
993
994 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
995 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
996 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
997 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
998 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
999
1000 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1001 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1002
1003 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1004 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1005
1006 mutex_lock(&adev->grbm_idx_mutex);
1007 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1008 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1009 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1010
1011 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
e5475e16
TSD
1012 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1013 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1014 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
ba7bb665
HZ
1015 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1016
1017 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1018 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1019 data &= 0x0000FFFF;
1020 data |= 0x00C00000;
1021 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1022
b989531b
EQ
1023 /*
1024 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1025 * programmed in gfx_v9_0_init_always_on_cu_mask()
1026 */
ba7bb665
HZ
1027
1028 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1029 * but used for RLC_LB_CNTL configuration */
1030 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
e5475e16
TSD
1031 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1032 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
ba7bb665
HZ
1033 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1034 mutex_unlock(&adev->grbm_idx_mutex);
b989531b
EQ
1035
1036 gfx_v9_0_init_always_on_cu_mask(adev);
ba7bb665
HZ
1037}
1038
989b6823
EQ
1039static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1040{
1041 uint32_t data;
1042
1043 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1044 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1045 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1046 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1047 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1048
1049 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1050 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1051
1052 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1053 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1054
1055 mutex_lock(&adev->grbm_idx_mutex);
1056 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1057 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1058 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1059
1060 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1061 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1062 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1063 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1064 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1065
1066 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1067 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1068 data &= 0x0000FFFF;
1069 data |= 0x00C00000;
1070 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1071
1072 /*
1073 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1074 * programmed in gfx_v9_0_init_always_on_cu_mask()
1075 */
1076
1077 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1078 * but used for RLC_LB_CNTL configuration */
1079 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1080 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1081 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1082 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1083 mutex_unlock(&adev->grbm_idx_mutex);
1084
1085 gfx_v9_0_init_always_on_cu_mask(adev);
1086}
1087
e8835e0e
HZ
1088static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1089{
e5475e16 1090 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
e8835e0e
HZ
1091}
1092
106c7d61 1093static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
c9719c69 1094{
106c7d61 1095 return 5;
c9719c69
HZ
1096}
1097
1098static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1099{
c9719c69
HZ
1100 const struct cs_section_def *cs_data;
1101 int r;
1102
1103 adev->gfx.rlc.cs_data = gfx9_cs_data;
1104
1105 cs_data = adev->gfx.rlc.cs_data;
1106
1107 if (cs_data) {
106c7d61
LG
1108 /* init clear state block */
1109 r = amdgpu_gfx_rlc_init_csb(adev);
1110 if (r)
a4a02777 1111 return r;
c9719c69
HZ
1112 }
1113
741deade 1114 if (adev->asic_type == CHIP_RAVEN) {
c9719c69
HZ
1115 /* TODO: double check the cp_table_size for RV */
1116 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
106c7d61
LG
1117 r = amdgpu_gfx_rlc_init_cpt(adev);
1118 if (r)
a4a02777 1119 return r;
989b6823 1120 }
ba7bb665 1121
989b6823
EQ
1122 switch (adev->asic_type) {
1123 case CHIP_RAVEN:
ba7bb665 1124 gfx_v9_0_init_lbpw(adev);
989b6823
EQ
1125 break;
1126 case CHIP_VEGA20:
1127 gfx_v9_4_init_lbpw(adev);
1128 break;
1129 default:
1130 break;
c9719c69
HZ
1131 }
1132
1133 return 0;
1134}
1135
137dc4b9
EQ
1136static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1137{
1138 int r;
1139
1140 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1141 if (unlikely(r != 0))
1142 return r;
1143
1144 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1145 AMDGPU_GEM_DOMAIN_VRAM);
1146 if (!r)
1147 adev->gfx.rlc.clear_state_gpu_addr =
1148 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1149
1150 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1151
1152 return r;
1153}
1154
1155static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1156{
1157 int r;
1158
1159 if (!adev->gfx.rlc.clear_state_obj)
1160 return;
1161
1162 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1163 if (likely(r == 0)) {
1164 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1165 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1166 }
1167}
1168
b1023571
KW
1169static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1170{
078af1a3
CK
1171 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1172 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
b1023571
KW
1173}
1174
b1023571
KW
1175static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1176{
1177 int r;
1178 u32 *hpd;
1179 const __le32 *fw_data;
1180 unsigned fw_size;
1181 u32 *fw;
42794b27 1182 size_t mec_hpd_size;
b1023571
KW
1183
1184 const struct gfx_firmware_header_v1_0 *mec_hdr;
1185
78c16834
AR
1186 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1187
78c16834 1188 /* take ownership of the relevant compute queues */
41f6a99a 1189 amdgpu_gfx_compute_queue_acquire(adev);
78c16834 1190 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
b1023571 1191
a4a02777 1192 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
b44da694 1193 AMDGPU_GEM_DOMAIN_VRAM,
a4a02777
CK
1194 &adev->gfx.mec.hpd_eop_obj,
1195 &adev->gfx.mec.hpd_eop_gpu_addr,
1196 (void **)&hpd);
b1023571 1197 if (r) {
a4a02777 1198 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
b1023571
KW
1199 gfx_v9_0_mec_fini(adev);
1200 return r;
1201 }
1202
1203 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1204
1205 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1206 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1207
1208 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1209
1210 fw_data = (const __le32 *)
1211 (adev->gfx.mec_fw->data +
1212 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1213 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1214
a4a02777
CK
1215 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1216 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1217 &adev->gfx.mec.mec_fw_obj,
1218 &adev->gfx.mec.mec_fw_gpu_addr,
1219 (void **)&fw);
b1023571 1220 if (r) {
a4a02777 1221 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
b1023571
KW
1222 gfx_v9_0_mec_fini(adev);
1223 return r;
1224 }
a4a02777 1225
b1023571
KW
1226 memcpy(fw, fw_data, fw_size);
1227
1228 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1229 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1230
b1023571
KW
1231 return 0;
1232}
1233
1234static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1235{
5e78835a 1236 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
b1023571
KW
1237 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1238 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1239 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1240 (SQ_IND_INDEX__FORCE_READ_MASK));
5e78835a 1241 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
b1023571
KW
1242}
1243
1244static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1245 uint32_t wave, uint32_t thread,
1246 uint32_t regno, uint32_t num, uint32_t *out)
1247{
5e78835a 1248 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
b1023571
KW
1249 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1250 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1251 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1252 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1253 (SQ_IND_INDEX__FORCE_READ_MASK) |
1254 (SQ_IND_INDEX__AUTO_INCR_MASK));
1255 while (num--)
5e78835a 1256 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
b1023571
KW
1257}
1258
1259static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1260{
1261 /* type 1 wave data */
1262 dst[(*no_fields)++] = 1;
1263 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1264 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1265 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1266 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1267 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1268 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1269 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1270 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1271 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1272 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1273 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1274 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1275 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1276 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1277}
1278
1279static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1280 uint32_t wave, uint32_t start,
1281 uint32_t size, uint32_t *dst)
1282{
1283 wave_read_regs(
1284 adev, simd, wave, 0,
1285 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1286}
1287
822770ad
NH
1288static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1289 uint32_t wave, uint32_t thread,
1290 uint32_t start, uint32_t size,
1291 uint32_t *dst)
1292{
1293 wave_read_regs(
1294 adev, simd, wave, thread,
1295 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1296}
b1023571 1297
f7a9ee81
AG
1298static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1299 u32 me, u32 pipe, u32 q)
1300{
1301 soc15_grbm_select(adev, me, pipe, q, 0);
1302}
1303
b1023571
KW
1304static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1305 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1306 .select_se_sh = &gfx_v9_0_select_se_sh,
1307 .read_wave_data = &gfx_v9_0_read_wave_data,
1308 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
822770ad 1309 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
f7a9ee81 1310 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
b1023571
KW
1311};
1312
3251c043 1313static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
b1023571
KW
1314{
1315 u32 gb_addr_config;
3251c043 1316 int err;
b1023571
KW
1317
1318 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1319
1320 switch (adev->asic_type) {
1321 case CHIP_VEGA10:
b1023571 1322 adev->gfx.config.max_hw_contexts = 8;
b1023571
KW
1323 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1324 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1325 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1326 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1327 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1328 break;
e5c62edd
AD
1329 case CHIP_VEGA12:
1330 adev->gfx.config.max_hw_contexts = 8;
1331 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1332 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1333 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1334 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
62b35f9a 1335 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
e5c62edd
AD
1336 DRM_INFO("fix gfx.config for vega12\n");
1337 break;
d3adedb4
FX
1338 case CHIP_VEGA20:
1339 adev->gfx.config.max_hw_contexts = 8;
1340 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1345 gb_addr_config &= ~0xf3e777ff;
1346 gb_addr_config |= 0x22014042;
3251c043
AD
1347 /* check vbios table if gpu info is not available */
1348 err = amdgpu_atomfirmware_get_gfx_info(adev);
1349 if (err)
1350 return err;
d3adedb4 1351 break;
5cf7433d
CZ
1352 case CHIP_RAVEN:
1353 adev->gfx.config.max_hw_contexts = 8;
1354 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1355 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1356 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1357 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
28ab1229
FX
1358 if (adev->rev_id >= 8)
1359 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1360 else
1361 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
5cf7433d 1362 break;
b1023571
KW
1363 default:
1364 BUG();
1365 break;
1366 }
1367
1368 adev->gfx.config.gb_addr_config = gb_addr_config;
1369
1370 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1371 REG_GET_FIELD(
1372 adev->gfx.config.gb_addr_config,
1373 GB_ADDR_CONFIG,
1374 NUM_PIPES);
ad7d0ff3
AD
1375
1376 adev->gfx.config.max_tile_pipes =
1377 adev->gfx.config.gb_addr_config_fields.num_pipes;
1378
b1023571
KW
1379 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1380 REG_GET_FIELD(
1381 adev->gfx.config.gb_addr_config,
1382 GB_ADDR_CONFIG,
1383 NUM_BANKS);
1384 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1385 REG_GET_FIELD(
1386 adev->gfx.config.gb_addr_config,
1387 GB_ADDR_CONFIG,
1388 MAX_COMPRESSED_FRAGS);
1389 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1390 REG_GET_FIELD(
1391 adev->gfx.config.gb_addr_config,
1392 GB_ADDR_CONFIG,
1393 NUM_RB_PER_SE);
1394 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1395 REG_GET_FIELD(
1396 adev->gfx.config.gb_addr_config,
1397 GB_ADDR_CONFIG,
1398 NUM_SHADER_ENGINES);
1399 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1400 REG_GET_FIELD(
1401 adev->gfx.config.gb_addr_config,
1402 GB_ADDR_CONFIG,
1403 PIPE_INTERLEAVE_SIZE));
3251c043
AD
1404
1405 return 0;
b1023571
KW
1406}
1407
1408static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1409 struct amdgpu_ngg_buf *ngg_buf,
1410 int size_se,
1411 int default_size_se)
1412{
1413 int r;
1414
1415 if (size_se < 0) {
1416 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1417 return -EINVAL;
1418 }
1419 size_se = size_se ? size_se : default_size_se;
1420
42ce2243 1421 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
b1023571
KW
1422 r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1423 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1424 &ngg_buf->bo,
1425 &ngg_buf->gpu_addr,
1426 NULL);
1427 if (r) {
1428 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1429 return r;
1430 }
1431 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1432
1433 return r;
1434}
1435
1436static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1437{
1438 int i;
1439
1440 for (i = 0; i < NGG_BUF_MAX; i++)
1441 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1442 &adev->gfx.ngg.buf[i].gpu_addr,
1443 NULL);
1444
1445 memset(&adev->gfx.ngg.buf[0], 0,
1446 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1447
1448 adev->gfx.ngg.init = false;
1449
1450 return 0;
1451}
1452
1453static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1454{
1455 int r;
1456
1457 if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1458 return 0;
1459
1460 /* GDS reserve memory: 64 bytes alignment */
1461 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1462 adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
1463 adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
d33bba4d
JZ
1464 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1465 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
b1023571
KW
1466
1467 /* Primitive Buffer */
af8baf15 1468 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
b1023571
KW
1469 amdgpu_prim_buf_per_se,
1470 64 * 1024);
1471 if (r) {
1472 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1473 goto err;
1474 }
1475
1476 /* Position Buffer */
af8baf15 1477 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
b1023571
KW
1478 amdgpu_pos_buf_per_se,
1479 256 * 1024);
1480 if (r) {
1481 dev_err(adev->dev, "Failed to create Position Buffer\n");
1482 goto err;
1483 }
1484
1485 /* Control Sideband */
af8baf15 1486 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
b1023571
KW
1487 amdgpu_cntl_sb_buf_per_se,
1488 256);
1489 if (r) {
1490 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1491 goto err;
1492 }
1493
1494 /* Parameter Cache, not created by default */
1495 if (amdgpu_param_buf_per_se <= 0)
1496 goto out;
1497
af8baf15 1498 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
b1023571
KW
1499 amdgpu_param_buf_per_se,
1500 512 * 1024);
1501 if (r) {
1502 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1503 goto err;
1504 }
1505
1506out:
1507 adev->gfx.ngg.init = true;
1508 return 0;
1509err:
1510 gfx_v9_0_ngg_fini(adev);
1511 return r;
1512}
1513
1514static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1515{
1516 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1517 int r;
91629eff 1518 u32 data, base;
b1023571
KW
1519
1520 if (!amdgpu_ngg)
1521 return 0;
1522
1523 /* Program buffer size */
91629eff
TSD
1524 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1525 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1526 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1527 adev->gfx.ngg.buf[NGG_POS].size >> 8);
5e78835a 1528 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
b1023571 1529
91629eff
TSD
1530 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1531 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1532 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1533 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
5e78835a 1534 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
b1023571
KW
1535
1536 /* Program buffer base address */
af8baf15 1537 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
b1023571 1538 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
5e78835a 1539 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
b1023571 1540
af8baf15 1541 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
b1023571 1542 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
5e78835a 1543 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
b1023571 1544
af8baf15 1545 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
b1023571 1546 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
5e78835a 1547 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
b1023571 1548
af8baf15 1549 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
b1023571 1550 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
5e78835a 1551 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
b1023571 1552
af8baf15 1553 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
b1023571 1554 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
5e78835a 1555 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
b1023571 1556
af8baf15 1557 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
b1023571 1558 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
5e78835a 1559 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
b1023571
KW
1560
1561 /* Clear GDS reserved memory */
1562 r = amdgpu_ring_alloc(ring, 17);
1563 if (r) {
6e82c6e0
CK
1564 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1565 ring->name, r);
b1023571
KW
1566 return r;
1567 }
1568
1569 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 1570 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
b1023571 1571 (adev->gds.mem.total_size +
77a2faa5 1572 adev->gfx.ngg.gds_reserve_size));
b1023571
KW
1573
1574 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1575 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
d33bba4d 1576 PACKET3_DMA_DATA_DST_SEL(1) |
b1023571
KW
1577 PACKET3_DMA_DATA_SRC_SEL(2)));
1578 amdgpu_ring_write(ring, 0);
1579 amdgpu_ring_write(ring, 0);
1580 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1581 amdgpu_ring_write(ring, 0);
d33bba4d
JZ
1582 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1583 adev->gfx.ngg.gds_reserve_size);
b1023571
KW
1584
1585 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 1586 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
b1023571
KW
1587
1588 amdgpu_ring_commit(ring);
1589
1590 return 0;
1591}
1592
1361f455
AD
1593static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1594 int mec, int pipe, int queue)
1595{
1596 int r;
1597 unsigned irq_type;
1598 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1599
1600 ring = &adev->gfx.compute_ring[ring_id];
1601
1602 /* mec0 is me1 */
1603 ring->me = mec + 1;
1604 ring->pipe = pipe;
1605 ring->queue = queue;
1606
1607 ring->ring_obj = NULL;
1608 ring->use_doorbell = true;
9564f192 1609 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1361f455
AD
1610 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1611 + (ring_id * GFX9_MEC_HPD_SIZE);
1612 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1613
1614 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1615 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1616 + ring->pipe;
1617
1618 /* type-2 packets are deprecated on MEC, use type-3 instead */
1619 r = amdgpu_ring_init(adev, ring, 1024,
1620 &adev->gfx.eop_irq, irq_type);
1621 if (r)
1622 return r;
1623
1624
1625 return 0;
1626}
1627
b1023571
KW
1628static int gfx_v9_0_sw_init(void *handle)
1629{
1361f455 1630 int i, j, k, r, ring_id;
b1023571 1631 struct amdgpu_ring *ring;
ac104e99 1632 struct amdgpu_kiq *kiq;
b1023571
KW
1633 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1634
4853bbb6
AD
1635 switch (adev->asic_type) {
1636 case CHIP_VEGA10:
8b399477 1637 case CHIP_VEGA12:
61324ddc 1638 case CHIP_VEGA20:
4853bbb6
AD
1639 case CHIP_RAVEN:
1640 adev->gfx.mec.num_mec = 2;
1641 break;
1642 default:
1643 adev->gfx.mec.num_mec = 1;
1644 break;
1645 }
1646
1647 adev->gfx.mec.num_pipe_per_mec = 4;
1648 adev->gfx.mec.num_queue_per_pipe = 8;
1649
b1023571 1650 /* EOP Event */
44a99b65 1651 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
b1023571
KW
1652 if (r)
1653 return r;
1654
1655 /* Privileged reg */
44a99b65 1656 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
b1023571
KW
1657 &adev->gfx.priv_reg_irq);
1658 if (r)
1659 return r;
1660
1661 /* Privileged inst */
44a99b65 1662 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
b1023571
KW
1663 &adev->gfx.priv_inst_irq);
1664 if (r)
1665 return r;
1666
760a1d55
FX
1667 /* ECC error */
1668 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1669 &adev->gfx.cp_ecc_error_irq);
1670 if (r)
1671 return r;
1672
1673 /* FUE error */
1674 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1675 &adev->gfx.cp_ecc_error_irq);
1676 if (r)
1677 return r;
1678
b1023571
KW
1679 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1680
1681 gfx_v9_0_scratch_init(adev);
1682
1683 r = gfx_v9_0_init_microcode(adev);
1684 if (r) {
1685 DRM_ERROR("Failed to load gfx firmware!\n");
1686 return r;
1687 }
1688
fdb81fd7 1689 r = adev->gfx.rlc.funcs->init(adev);
c9719c69
HZ
1690 if (r) {
1691 DRM_ERROR("Failed to init rlc BOs!\n");
1692 return r;
1693 }
1694
b1023571
KW
1695 r = gfx_v9_0_mec_init(adev);
1696 if (r) {
1697 DRM_ERROR("Failed to init MEC BOs!\n");
1698 return r;
1699 }
1700
1701 /* set up the gfx ring */
1702 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1703 ring = &adev->gfx.gfx_ring[i];
1704 ring->ring_obj = NULL;
f6886c47
TSD
1705 if (!i)
1706 sprintf(ring->name, "gfx");
1707 else
1708 sprintf(ring->name, "gfx_%d", i);
b1023571 1709 ring->use_doorbell = true;
9564f192 1710 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
b1023571
KW
1711 r = amdgpu_ring_init(adev, ring, 1024,
1712 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1713 if (r)
1714 return r;
1715 }
1716
1361f455
AD
1717 /* set up the compute queues - allocate horizontally across pipes */
1718 ring_id = 0;
1719 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1720 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1721 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2db0cdbe 1722 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1361f455
AD
1723 continue;
1724
1725 r = gfx_v9_0_compute_ring_init(adev,
1726 ring_id,
1727 i, k, j);
1728 if (r)
1729 return r;
1730
1731 ring_id++;
1732 }
b1023571 1733 }
b1023571
KW
1734 }
1735
71c37505 1736 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
e30a5223
AD
1737 if (r) {
1738 DRM_ERROR("Failed to init KIQ BOs!\n");
1739 return r;
1740 }
ac104e99 1741
e30a5223 1742 kiq = &adev->gfx.kiq;
71c37505 1743 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
e30a5223
AD
1744 if (r)
1745 return r;
464826d6 1746
e30a5223 1747 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
ffe6d881 1748 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
e30a5223
AD
1749 if (r)
1750 return r;
ac104e99 1751
b1023571
KW
1752 adev->gfx.ce_ram_size = 0x8000;
1753
3251c043
AD
1754 r = gfx_v9_0_gpu_early_init(adev);
1755 if (r)
1756 return r;
b1023571
KW
1757
1758 r = gfx_v9_0_ngg_init(adev);
1759 if (r)
1760 return r;
1761
1762 return 0;
1763}
1764
1765
1766static int gfx_v9_0_sw_fini(void *handle)
1767{
1768 int i;
1769 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1770
760a1d55
FX
1771 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1772 adev->gfx.ras_if) {
1773 struct ras_common_if *ras_if = adev->gfx.ras_if;
1774 struct ras_ih_if ih_info = {
1775 .head = *ras_if,
1776 };
1777
1778 amdgpu_ras_debugfs_remove(adev, ras_if);
1779 amdgpu_ras_sysfs_remove(adev, ras_if);
1780 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
1781 amdgpu_ras_feature_enable(adev, ras_if, 0);
1782 kfree(ras_if);
1783 }
1784
b1023571
KW
1785 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1786 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1787 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1788
1789 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1790 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1791 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1792 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1793
b9683c21 1794 amdgpu_gfx_compute_mqd_sw_fini(adev);
71c37505
AD
1795 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1796 amdgpu_gfx_kiq_fini(adev);
ac104e99 1797
b1023571
KW
1798 gfx_v9_0_mec_fini(adev);
1799 gfx_v9_0_ngg_fini(adev);
9862def9
ML
1800 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
1801 &adev->gfx.rlc.clear_state_gpu_addr,
1802 (void **)&adev->gfx.rlc.cs_ptr);
741deade 1803 if (adev->asic_type == CHIP_RAVEN) {
9862def9
ML
1804 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1805 &adev->gfx.rlc.cp_table_gpu_addr,
1806 (void **)&adev->gfx.rlc.cp_table_ptr);
1807 }
c833d8aa 1808 gfx_v9_0_free_microcode(adev);
b1023571
KW
1809
1810 return 0;
1811}
1812
1813
1814static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1815{
1816 /* TODO */
1817}
1818
1819static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1820{
be448a4d 1821 u32 data;
b1023571 1822
be448a4d
NH
1823 if (instance == 0xffffffff)
1824 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1825 else
1826 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1827
1828 if (se_num == 0xffffffff)
b1023571 1829 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
be448a4d 1830 else
b1023571 1831 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
be448a4d
NH
1832
1833 if (sh_num == 0xffffffff)
1834 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1835 else
b1023571 1836 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
be448a4d 1837
5e78835a 1838 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
b1023571
KW
1839}
1840
b1023571
KW
1841static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1842{
1843 u32 data, mask;
1844
5e78835a
TSD
1845 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1846 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
b1023571
KW
1847
1848 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1849 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1850
378506a7
AD
1851 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1852 adev->gfx.config.max_sh_per_se);
b1023571
KW
1853
1854 return (~data) & mask;
1855}
1856
1857static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1858{
1859 int i, j;
2572c24c 1860 u32 data;
b1023571
KW
1861 u32 active_rbs = 0;
1862 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1863 adev->gfx.config.max_sh_per_se;
1864
1865 mutex_lock(&adev->grbm_idx_mutex);
1866 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1867 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1868 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1869 data = gfx_v9_0_get_rb_active_bitmap(adev);
1870 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1871 rb_bitmap_width_per_sh);
1872 }
1873 }
1874 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1875 mutex_unlock(&adev->grbm_idx_mutex);
1876
1877 adev->gfx.config.backend_enable_mask = active_rbs;
2572c24c 1878 adev->gfx.config.num_rbs = hweight32(active_rbs);
b1023571
KW
1879}
1880
1881#define DEFAULT_SH_MEM_BASES (0x6000)
1882#define FIRST_COMPUTE_VMID (8)
1883#define LAST_COMPUTE_VMID (16)
1884static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1885{
1886 int i;
1887 uint32_t sh_mem_config;
1888 uint32_t sh_mem_bases;
1889
1890 /*
1891 * Configure apertures:
1892 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1893 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1894 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1895 */
1896 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1897
1898 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1899 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
eaa05d52 1900 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
b1023571
KW
1901
1902 mutex_lock(&adev->srbm_mutex);
1903 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1904 soc15_grbm_select(adev, 0, 0, 0, i);
1905 /* CP and shaders */
5e78835a
TSD
1906 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1907 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
b1023571
KW
1908 }
1909 soc15_grbm_select(adev, 0, 0, 0, 0);
1910 mutex_unlock(&adev->srbm_mutex);
1911}
1912
434e6df2 1913static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
b1023571
KW
1914{
1915 u32 tmp;
1916 int i;
1917
40f06773 1918 WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
b1023571
KW
1919
1920 gfx_v9_0_tiling_mode_table_init(adev);
1921
1922 gfx_v9_0_setup_rb(adev);
1923 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
5eeae247 1924 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
b1023571
KW
1925
1926 /* XXX SH_MEM regs */
1927 /* where to put LDS, scratch, GPUVM in FSA64 space */
1928 mutex_lock(&adev->srbm_mutex);
32b646b2 1929 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
b1023571
KW
1930 soc15_grbm_select(adev, 0, 0, 0, i);
1931 /* CP and shaders */
a7ea6548
AD
1932 if (i == 0) {
1933 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1934 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1935 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1936 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
1937 } else {
1938 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1939 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1940 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
bfa8eea2
FC
1941 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1942 (adev->gmc.private_aperture_start >> 48));
1943 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1944 (adev->gmc.shared_aperture_start >> 48));
a7ea6548
AD
1945 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1946 }
b1023571
KW
1947 }
1948 soc15_grbm_select(adev, 0, 0, 0, 0);
1949
1950 mutex_unlock(&adev->srbm_mutex);
1951
1952 gfx_v9_0_init_compute_vmid(adev);
1953
1954 mutex_lock(&adev->grbm_idx_mutex);
1955 /*
1956 * making sure that the following register writes will be broadcasted
1957 * to all the shaders
1958 */
1959 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1960
5e78835a 1961 WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
b1023571
KW
1962 (adev->gfx.config.sc_prim_fifo_size_frontend <<
1963 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1964 (adev->gfx.config.sc_prim_fifo_size_backend <<
1965 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1966 (adev->gfx.config.sc_hiz_tile_fifo_size <<
1967 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1968 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1969 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1970 mutex_unlock(&adev->grbm_idx_mutex);
1971
1972}
1973
1974static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1975{
1976 u32 i, j, k;
1977 u32 mask;
1978
1979 mutex_lock(&adev->grbm_idx_mutex);
1980 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1981 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1982 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1983 for (k = 0; k < adev->usec_timeout; k++) {
5e78835a 1984 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
b1023571
KW
1985 break;
1986 udelay(1);
1987 }
1366b2d0 1988 if (k == adev->usec_timeout) {
1989 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1990 0xffffffff, 0xffffffff);
1991 mutex_unlock(&adev->grbm_idx_mutex);
1992 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1993 i, j);
1994 return;
1995 }
b1023571
KW
1996 }
1997 }
1998 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1999 mutex_unlock(&adev->grbm_idx_mutex);
2000
2001 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2002 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2003 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2004 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2005 for (k = 0; k < adev->usec_timeout; k++) {
5e78835a 2006 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
b1023571
KW
2007 break;
2008 udelay(1);
2009 }
2010}
2011
2012static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2013 bool enable)
2014{
5e78835a 2015 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
b1023571 2016
b1023571
KW
2017 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2018 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2019 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2020 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2021
5e78835a 2022 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
b1023571
KW
2023}
2024
6bce4667
HZ
2025static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2026{
2027 /* csib */
2028 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2029 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2030 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2031 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2032 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2033 adev->gfx.rlc.clear_state_size);
2034}
2035
727b888f 2036static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
6bce4667
HZ
2037 int indirect_offset,
2038 int list_size,
2039 int *unique_indirect_regs,
cb5ed37f 2040 int unique_indirect_reg_count,
6bce4667 2041 int *indirect_start_offsets,
cb5ed37f
EQ
2042 int *indirect_start_offsets_count,
2043 int max_start_offsets_count)
6bce4667
HZ
2044{
2045 int idx;
6bce4667
HZ
2046
2047 for (; indirect_offset < list_size; indirect_offset++) {
cb5ed37f 2048 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
727b888f
HR
2049 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2050 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
6bce4667 2051
727b888f
HR
2052 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2053 indirect_offset += 2;
6bce4667 2054
727b888f 2055 /* look for the matching indice */
cb5ed37f 2056 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
727b888f
HR
2057 if (unique_indirect_regs[idx] ==
2058 register_list_format[indirect_offset] ||
2059 !unique_indirect_regs[idx])
2060 break;
2061 }
6bce4667 2062
cb5ed37f 2063 BUG_ON(idx >= unique_indirect_reg_count);
6bce4667 2064
727b888f
HR
2065 if (!unique_indirect_regs[idx])
2066 unique_indirect_regs[idx] = register_list_format[indirect_offset];
6bce4667 2067
727b888f 2068 indirect_offset++;
6bce4667 2069 }
6bce4667
HZ
2070 }
2071}
2072
727b888f 2073static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
6bce4667
HZ
2074{
2075 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2076 int unique_indirect_reg_count = 0;
2077
2078 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2079 int indirect_start_offsets_count = 0;
2080
2081 int list_size = 0;
727b888f 2082 int i = 0, j = 0;
6bce4667
HZ
2083 u32 tmp = 0;
2084
2085 u32 *register_list_format =
2086 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2087 if (!register_list_format)
2088 return -ENOMEM;
2089 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2090 adev->gfx.rlc.reg_list_format_size_bytes);
2091
2092 /* setup unique_indirect_regs array and indirect_start_offsets array */
727b888f
HR
2093 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2094 gfx_v9_1_parse_ind_reg_list(register_list_format,
2095 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2096 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2097 unique_indirect_regs,
cb5ed37f 2098 unique_indirect_reg_count,
727b888f 2099 indirect_start_offsets,
cb5ed37f
EQ
2100 &indirect_start_offsets_count,
2101 ARRAY_SIZE(indirect_start_offsets));
6bce4667
HZ
2102
2103 /* enable auto inc in case it is disabled */
2104 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2105 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2106 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2107
2108 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2109 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2110 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2111 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2112 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2113 adev->gfx.rlc.register_restore[i]);
2114
6bce4667
HZ
2115 /* load indirect register */
2116 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2117 adev->gfx.rlc.reg_list_format_start);
727b888f
HR
2118
2119 /* direct register portion */
2120 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
6bce4667
HZ
2121 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2122 register_list_format[i]);
2123
727b888f
HR
2124 /* indirect register portion */
2125 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2126 if (register_list_format[i] == 0xFFFFFFFF) {
2127 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2128 continue;
2129 }
2130
2131 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2132 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2133
2134 for (j = 0; j < unique_indirect_reg_count; j++) {
2135 if (register_list_format[i] == unique_indirect_regs[j]) {
2136 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2137 break;
2138 }
2139 }
2140
2141 BUG_ON(j >= unique_indirect_reg_count);
2142
2143 i++;
2144 }
2145
6bce4667
HZ
2146 /* set save/restore list size */
2147 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2148 list_size = list_size >> 1;
2149 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2150 adev->gfx.rlc.reg_restore_list_size);
2151 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2152
2153 /* write the starting offsets to RLC scratch ram */
2154 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2155 adev->gfx.rlc.starting_offsets_start);
c1b24a14 2156 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
6bce4667 2157 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
727b888f 2158 indirect_start_offsets[i]);
6bce4667
HZ
2159
2160 /* load unique indirect regs*/
c1b24a14 2161 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
727b888f
HR
2162 if (unique_indirect_regs[i] != 0) {
2163 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2164 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2165 unique_indirect_regs[i] & 0x3FFFF);
2166
2167 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2168 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2169 unique_indirect_regs[i] >> 20);
2170 }
6bce4667
HZ
2171 }
2172
2173 kfree(register_list_format);
2174 return 0;
2175}
2176
2177static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2178{
0e5293d0 2179 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
6bce4667
HZ
2180}
2181
91d3130a
HZ
2182static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2183 bool enable)
2184{
2185 uint32_t data = 0;
2186 uint32_t default_data = 0;
2187
2188 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2189 if (enable == true) {
2190 /* enable GFXIP control over CGPG */
2191 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2192 if(default_data != data)
2193 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2194
2195 /* update status */
2196 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2197 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2198 if(default_data != data)
2199 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2200 } else {
2201 /* restore GFXIP control over GCPG */
2202 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2203 if(default_data != data)
2204 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2205 }
2206}
2207
2208static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2209{
2210 uint32_t data = 0;
2211
2212 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2213 AMD_PG_SUPPORT_GFX_SMG |
2214 AMD_PG_SUPPORT_GFX_DMG)) {
2215 /* init IDLE_POLL_COUNT = 60 */
2216 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2217 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2218 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2219 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2220
2221 /* init RLC PG Delay */
2222 data = 0;
2223 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2224 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2225 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2226 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2227 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2228
2229 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2230 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2231 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2232 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2233
2234 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2235 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2236 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2237 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2238
2239 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2240 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2241
2242 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2243 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2244 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2245
2246 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2247 }
2248}
2249
ed5ad1e4
HZ
2250static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2251 bool enable)
2252{
2253 uint32_t data = 0;
2254 uint32_t default_data = 0;
2255
2256 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
e24c7f06
TSD
2257 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2258 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2259 enable ? 1 : 0);
2260 if (default_data != data)
2261 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
ed5ad1e4
HZ
2262}
2263
2264static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2265 bool enable)
2266{
2267 uint32_t data = 0;
2268 uint32_t default_data = 0;
2269
2270 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
b926fe8e
TSD
2271 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2272 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2273 enable ? 1 : 0);
2274 if(default_data != data)
2275 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
ed5ad1e4
HZ
2276}
2277
3a6cc477
HZ
2278static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2279 bool enable)
2280{
2281 uint32_t data = 0;
2282 uint32_t default_data = 0;
2283
2284 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
54cfe0fc
TSD
2285 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2286 CP_PG_DISABLE,
2287 enable ? 0 : 1);
2288 if(default_data != data)
2289 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3a6cc477
HZ
2290}
2291
197f95c8
HZ
2292static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2293 bool enable)
2294{
2295 uint32_t data, default_data;
2296
2297 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
f55ee212
TSD
2298 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2299 GFX_POWER_GATING_ENABLE,
2300 enable ? 1 : 0);
197f95c8
HZ
2301 if(default_data != data)
2302 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2303}
2304
2305static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2306 bool enable)
2307{
2308 uint32_t data, default_data;
2309
2310 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
513f8133
TSD
2311 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2312 GFX_PIPELINE_PG_ENABLE,
2313 enable ? 1 : 0);
197f95c8
HZ
2314 if(default_data != data)
2315 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2316
2317 if (!enable)
2318 /* read any GFX register to wake up GFX */
2319 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2320}
2321
552c8f76 2322static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2323 bool enable)
18924c71
HZ
2324{
2325 uint32_t data, default_data;
2326
2327 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
7915c8fd
TSD
2328 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2329 STATIC_PER_CU_PG_ENABLE,
2330 enable ? 1 : 0);
18924c71
HZ
2331 if(default_data != data)
2332 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2333}
2334
552c8f76 2335static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
18924c71
HZ
2336 bool enable)
2337{
2338 uint32_t data, default_data;
2339
2340 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
e567fa69
TSD
2341 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2342 DYN_PER_CU_PG_ENABLE,
2343 enable ? 1 : 0);
18924c71
HZ
2344 if(default_data != data)
2345 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2346}
2347
6bce4667
HZ
2348static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2349{
af356b6d
EQ
2350 gfx_v9_0_init_csb(adev);
2351
b58b65cf
EQ
2352 /*
2353 * Rlc save restore list is workable since v2_1.
2354 * And it's needed by gfxoff feature.
2355 */
2356 if (adev->gfx.rlc.is_rlc_v2_1) {
2357 gfx_v9_1_init_rlc_save_restore_list(adev);
2358 gfx_v9_0_enable_save_restore_machine(adev);
2359 }
a5acf930 2360
6bce4667
HZ
2361 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2362 AMD_PG_SUPPORT_GFX_SMG |
2363 AMD_PG_SUPPORT_GFX_DMG |
2364 AMD_PG_SUPPORT_CP |
2365 AMD_PG_SUPPORT_GDS |
2366 AMD_PG_SUPPORT_RLC_SMU_HS)) {
a5acf930
HR
2367 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2368 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2369 gfx_v9_0_init_gfx_power_gating(adev);
6bce4667
HZ
2370 }
2371}
2372
b1023571
KW
2373void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2374{
b08796ce 2375 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
b1023571 2376 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
b1023571
KW
2377 gfx_v9_0_wait_for_rlc_serdes(adev);
2378}
2379
2380static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2381{
596c8e8b 2382 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
b1023571 2383 udelay(50);
596c8e8b 2384 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
b1023571
KW
2385 udelay(50);
2386}
2387
2388static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2389{
2390#ifdef AMDGPU_RLC_DEBUG_RETRY
2391 u32 rlc_ucode_ver;
2392#endif
b1023571 2393
342cda25 2394 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
ad97d9de 2395 udelay(50);
b1023571
KW
2396
2397 /* carrizo do enable cp interrupt after cp inited */
ad97d9de 2398 if (!(adev->flags & AMD_IS_APU)) {
b1023571 2399 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
ad97d9de 2400 udelay(50);
2401 }
b1023571
KW
2402
2403#ifdef AMDGPU_RLC_DEBUG_RETRY
2404 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
5e78835a 2405 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
b1023571
KW
2406 if(rlc_ucode_ver == 0x108) {
2407 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2408 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2409 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2410 * default is 0x9C4 to create a 100us interval */
5e78835a 2411 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
b1023571 2412 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
eaa05d52 2413 * to disable the page fault retry interrupts, default is
b1023571 2414 * 0x100 (256) */
5e78835a 2415 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
b1023571
KW
2416 }
2417#endif
2418}
2419
2420static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2421{
2422 const struct rlc_firmware_header_v2_0 *hdr;
2423 const __le32 *fw_data;
2424 unsigned i, fw_size;
2425
2426 if (!adev->gfx.rlc_fw)
2427 return -EINVAL;
2428
2429 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2430 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2431
2432 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2433 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2434 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2435
5e78835a 2436 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
b1023571
KW
2437 RLCG_UCODE_LOADING_START_ADDRESS);
2438 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2439 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2440 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
b1023571
KW
2441
2442 return 0;
2443}
2444
2445static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2446{
2447 int r;
2448
f840cc5f
ML
2449 if (amdgpu_sriov_vf(adev)) {
2450 gfx_v9_0_init_csb(adev);
cfee05bc 2451 return 0;
f840cc5f 2452 }
cfee05bc 2453
fdb81fd7 2454 adev->gfx.rlc.funcs->stop(adev);
b1023571
KW
2455
2456 /* disable CG */
5e78835a 2457 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
b1023571 2458
6bce4667
HZ
2459 gfx_v9_0_init_pg(adev);
2460
b1023571
KW
2461 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2462 /* legacy rlc firmware loading */
2463 r = gfx_v9_0_rlc_load_microcode(adev);
2464 if (r)
2465 return r;
2466 }
2467
688be01a
AD
2468 switch (adev->asic_type) {
2469 case CHIP_RAVEN:
2470 if (amdgpu_lbpw == 0)
2471 gfx_v9_0_enable_lbpw(adev, false);
2472 else
2473 gfx_v9_0_enable_lbpw(adev, true);
2474 break;
2475 case CHIP_VEGA20:
2476 if (amdgpu_lbpw > 0)
e8835e0e
HZ
2477 gfx_v9_0_enable_lbpw(adev, true);
2478 else
2479 gfx_v9_0_enable_lbpw(adev, false);
688be01a
AD
2480 break;
2481 default:
2482 break;
e8835e0e
HZ
2483 }
2484
fdb81fd7 2485 adev->gfx.rlc.funcs->start(adev);
b1023571
KW
2486
2487 return 0;
2488}
2489
2490static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2491{
2492 int i;
5e78835a 2493 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
b1023571 2494
ea64468e
TSD
2495 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2496 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2497 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2498 if (!enable) {
b1023571 2499 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
c66ed765 2500 adev->gfx.gfx_ring[i].sched.ready = false;
b1023571 2501 }
5e78835a 2502 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
b1023571
KW
2503 udelay(50);
2504}
2505
2506static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2507{
2508 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2509 const struct gfx_firmware_header_v1_0 *ce_hdr;
2510 const struct gfx_firmware_header_v1_0 *me_hdr;
2511 const __le32 *fw_data;
2512 unsigned i, fw_size;
2513
2514 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2515 return -EINVAL;
2516
2517 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2518 adev->gfx.pfp_fw->data;
2519 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2520 adev->gfx.ce_fw->data;
2521 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2522 adev->gfx.me_fw->data;
2523
2524 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2525 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2526 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2527
2528 gfx_v9_0_cp_gfx_enable(adev, false);
2529
2530 /* PFP */
2531 fw_data = (const __le32 *)
2532 (adev->gfx.pfp_fw->data +
2533 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2534 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
5e78835a 2535 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
b1023571 2536 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2537 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2538 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
b1023571
KW
2539
2540 /* CE */
2541 fw_data = (const __le32 *)
2542 (adev->gfx.ce_fw->data +
2543 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2544 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
5e78835a 2545 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
b1023571 2546 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2547 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2548 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
b1023571
KW
2549
2550 /* ME */
2551 fw_data = (const __le32 *)
2552 (adev->gfx.me_fw->data +
2553 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2554 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
5e78835a 2555 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
b1023571 2556 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2557 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2558 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
b1023571
KW
2559
2560 return 0;
2561}
2562
b1023571
KW
2563static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2564{
2565 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2566 const struct cs_section_def *sect = NULL;
2567 const struct cs_extent_def *ext = NULL;
d5de797f 2568 int r, i, tmp;
b1023571
KW
2569
2570 /* init the CP */
5e78835a
TSD
2571 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2572 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
b1023571
KW
2573
2574 gfx_v9_0_cp_gfx_enable(adev, true);
2575
d5de797f 2576 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
b1023571
KW
2577 if (r) {
2578 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2579 return r;
2580 }
2581
2582 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2583 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2584
2585 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2586 amdgpu_ring_write(ring, 0x80000000);
2587 amdgpu_ring_write(ring, 0x80000000);
2588
2589 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2590 for (ext = sect->section; ext->extent != NULL; ++ext) {
2591 if (sect->id == SECT_CONTEXT) {
2592 amdgpu_ring_write(ring,
2593 PACKET3(PACKET3_SET_CONTEXT_REG,
2594 ext->reg_count));
2595 amdgpu_ring_write(ring,
2596 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2597 for (i = 0; i < ext->reg_count; i++)
2598 amdgpu_ring_write(ring, ext->extent[i]);
2599 }
2600 }
2601 }
2602
2603 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2604 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2605
2606 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2607 amdgpu_ring_write(ring, 0);
2608
2609 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2610 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2611 amdgpu_ring_write(ring, 0x8000);
2612 amdgpu_ring_write(ring, 0x8000);
2613
d5de797f
KW
2614 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2615 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2616 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2617 amdgpu_ring_write(ring, tmp);
2618 amdgpu_ring_write(ring, 0);
2619
b1023571
KW
2620 amdgpu_ring_commit(ring);
2621
2622 return 0;
2623}
2624
2625static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2626{
2627 struct amdgpu_ring *ring;
2628 u32 tmp;
2629 u32 rb_bufsz;
3fc08b61 2630 u64 rb_addr, rptr_addr, wptr_gpu_addr;
b1023571
KW
2631
2632 /* Set the write pointer delay */
5e78835a 2633 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
b1023571
KW
2634
2635 /* set the RB to use vmid 0 */
5e78835a 2636 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
b1023571
KW
2637
2638 /* Set ring buffer size */
2639 ring = &adev->gfx.gfx_ring[0];
2640 rb_bufsz = order_base_2(ring->ring_size / 8);
2641 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2642 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2643#ifdef __BIG_ENDIAN
2644 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2645#endif
5e78835a 2646 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
b1023571
KW
2647
2648 /* Initialize the ring buffer's write pointers */
2649 ring->wptr = 0;
5e78835a
TSD
2650 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2651 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
b1023571
KW
2652
2653 /* set the wb address wether it's enabled or not */
2654 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5e78835a
TSD
2655 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2656 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
b1023571 2657
3fc08b61 2658 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5e78835a
TSD
2659 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2660 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3fc08b61 2661
b1023571 2662 mdelay(1);
5e78835a 2663 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
b1023571
KW
2664
2665 rb_addr = ring->gpu_addr >> 8;
5e78835a
TSD
2666 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2667 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
b1023571 2668
5e78835a 2669 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
b1023571
KW
2670 if (ring->use_doorbell) {
2671 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2672 DOORBELL_OFFSET, ring->doorbell_index);
2673 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2674 DOORBELL_EN, 1);
2675 } else {
2676 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2677 }
5e78835a 2678 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
b1023571
KW
2679
2680 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2681 DOORBELL_RANGE_LOWER, ring->doorbell_index);
5e78835a 2682 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
b1023571 2683
5e78835a 2684 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
b1023571
KW
2685 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2686
2687
2688 /* start the ring */
2689 gfx_v9_0_cp_gfx_start(adev);
c66ed765 2690 ring->sched.ready = true;
b1023571
KW
2691
2692 return 0;
2693}
2694
2695static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2696{
2697 int i;
2698
2699 if (enable) {
5e78835a 2700 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
b1023571 2701 } else {
5e78835a 2702 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
b1023571
KW
2703 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2704 for (i = 0; i < adev->gfx.num_compute_rings; i++)
c66ed765
AG
2705 adev->gfx.compute_ring[i].sched.ready = false;
2706 adev->gfx.kiq.ring.sched.ready = false;
b1023571
KW
2707 }
2708 udelay(50);
2709}
2710
b1023571
KW
2711static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2712{
2713 const struct gfx_firmware_header_v1_0 *mec_hdr;
2714 const __le32 *fw_data;
2715 unsigned i;
2716 u32 tmp;
2717
2718 if (!adev->gfx.mec_fw)
2719 return -EINVAL;
2720
2721 gfx_v9_0_cp_compute_enable(adev, false);
2722
2723 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2724 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2725
2726 fw_data = (const __le32 *)
2727 (adev->gfx.mec_fw->data +
2728 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2729 tmp = 0;
2730 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2731 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
5e78835a 2732 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
b1023571 2733
5e78835a 2734 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
b1023571 2735 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
5e78835a 2736 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
b1023571 2737 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
eaa05d52 2738
b1023571 2739 /* MEC1 */
5e78835a 2740 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
b1023571
KW
2741 mec_hdr->jt_offset);
2742 for (i = 0; i < mec_hdr->jt_size; i++)
5e78835a 2743 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
b1023571
KW
2744 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2745
5e78835a 2746 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
b1023571
KW
2747 adev->gfx.mec_fw_version);
2748 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2749
2750 return 0;
2751}
2752
464826d6
XY
2753/* KIQ functions */
2754static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
b1023571 2755{
464826d6
XY
2756 uint32_t tmp;
2757 struct amdgpu_device *adev = ring->adev;
b1023571 2758
464826d6 2759 /* tell RLC which is KIQ queue */
5e78835a 2760 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
464826d6
XY
2761 tmp &= 0xffffff00;
2762 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
5e78835a 2763 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
464826d6 2764 tmp |= 0x80;
5e78835a 2765 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
464826d6 2766}
b1023571 2767
0f1dfd52 2768static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
464826d6 2769{
bd3402ea 2770 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
de65513a 2771 uint64_t queue_mask = 0;
2fdde9fa 2772 int r, i;
b1023571 2773
de65513a
AR
2774 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2775 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2776 continue;
b1023571 2777
de65513a
AR
2778 /* This situation may be hit in the future if a new HW
2779 * generation exposes more than 64 queues. If so, the
2780 * definition of queue_mask needs updating */
1d11ee89 2781 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
de65513a
AR
2782 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2783 break;
b1023571 2784 }
b1023571 2785
de65513a
AR
2786 queue_mask |= (1ull << i);
2787 }
b1023571 2788
841cf911 2789 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2fdde9fa
AD
2790 if (r) {
2791 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
b1023571 2792 return r;
2fdde9fa 2793 }
b1023571 2794
0f1dfd52
AD
2795 /* set resources */
2796 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2797 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2798 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
de65513a
AR
2799 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2800 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
0f1dfd52
AD
2801 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2802 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2803 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2804 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
bd3402ea
AD
2805 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2806 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2807 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2808 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2809
2810 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2811 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2812 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2813 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2814 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2815 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2816 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2817 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2818 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
f4534f06 2819 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
bd3402ea
AD
2820 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2821 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2822 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2823 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2824 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2825 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2826 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2827 }
b1023571 2828
c66ed765
AG
2829 r = amdgpu_ring_test_helper(kiq_ring);
2830 if (r)
841cf911 2831 DRM_ERROR("KCQ enable failed\n");
464826d6 2832
2fdde9fa 2833 return r;
464826d6
XY
2834}
2835
e322edc3 2836static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
464826d6 2837{
33fb8698 2838 struct amdgpu_device *adev = ring->adev;
e322edc3 2839 struct v9_mqd *mqd = ring->mqd_ptr;
464826d6
XY
2840 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2841 uint32_t tmp;
2842
2843 mqd->header = 0xC0310800;
2844 mqd->compute_pipelinestat_enable = 0x00000001;
2845 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2846 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2847 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2848 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2849 mqd->compute_misc_reserved = 0x00000003;
2850
ffe6d881
AD
2851 mqd->dynamic_cu_mask_addr_lo =
2852 lower_32_bits(ring->mqd_gpu_addr
2853 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2854 mqd->dynamic_cu_mask_addr_hi =
2855 upper_32_bits(ring->mqd_gpu_addr
2856 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2857
d72f2f46 2858 eop_base_addr = ring->eop_gpu_addr >> 8;
464826d6
XY
2859 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2860 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2861
2862 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5e78835a 2863 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
464826d6 2864 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
268cb4c7 2865 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
464826d6
XY
2866
2867 mqd->cp_hqd_eop_control = tmp;
2868
2869 /* enable doorbell? */
5e78835a 2870 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
464826d6
XY
2871
2872 if (ring->use_doorbell) {
2873 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2874 DOORBELL_OFFSET, ring->doorbell_index);
2875 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2876 DOORBELL_EN, 1);
2877 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2878 DOORBELL_SOURCE, 0);
2879 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2880 DOORBELL_HIT, 0);
78888cff 2881 } else {
464826d6
XY
2882 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883 DOORBELL_EN, 0);
78888cff 2884 }
464826d6
XY
2885
2886 mqd->cp_hqd_pq_doorbell_control = tmp;
2887
2888 /* disable the queue if it's active */
2889 ring->wptr = 0;
2890 mqd->cp_hqd_dequeue_request = 0;
2891 mqd->cp_hqd_pq_rptr = 0;
2892 mqd->cp_hqd_pq_wptr_lo = 0;
2893 mqd->cp_hqd_pq_wptr_hi = 0;
2894
2895 /* set the pointer to the MQD */
33fb8698
AD
2896 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2897 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
464826d6
XY
2898
2899 /* set MQD vmid to 0 */
5e78835a 2900 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
464826d6
XY
2901 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2902 mqd->cp_mqd_control = tmp;
2903
2904 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2905 hqd_gpu_addr = ring->gpu_addr >> 8;
2906 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2907 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2908
2909 /* set up the HQD, this is similar to CP_RB0_CNTL */
5e78835a 2910 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
464826d6
XY
2911 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2912 (order_base_2(ring->ring_size / 4) - 1));
2913 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2914 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2915#ifdef __BIG_ENDIAN
2916 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2917#endif
2918 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2919 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2920 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2921 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2922 mqd->cp_hqd_pq_control = tmp;
2923
2924 /* set the wb address whether it's enabled or not */
2925 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2926 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2927 mqd->cp_hqd_pq_rptr_report_addr_hi =
2928 upper_32_bits(wb_gpu_addr) & 0xffff;
2929
2930 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2931 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2932 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2933 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2934
2935 tmp = 0;
2936 /* enable the doorbell if requested */
2937 if (ring->use_doorbell) {
5e78835a 2938 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
464826d6
XY
2939 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2940 DOORBELL_OFFSET, ring->doorbell_index);
2941
2942 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2943 DOORBELL_EN, 1);
2944 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2945 DOORBELL_SOURCE, 0);
2946 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2947 DOORBELL_HIT, 0);
2948 }
2949
2950 mqd->cp_hqd_pq_doorbell_control = tmp;
2951
2952 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2953 ring->wptr = 0;
0274a9c5 2954 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
464826d6
XY
2955
2956 /* set the vmid for the queue */
2957 mqd->cp_hqd_vmid = 0;
2958
0274a9c5 2959 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
464826d6
XY
2960 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2961 mqd->cp_hqd_persistent_state = tmp;
2962
fca4ce69
AD
2963 /* set MIN_IB_AVAIL_SIZE */
2964 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2965 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2966 mqd->cp_hqd_ib_control = tmp;
2967
464826d6
XY
2968 /* activate the queue */
2969 mqd->cp_hqd_active = 1;
2970
2971 return 0;
2972}
2973
e322edc3 2974static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
464826d6 2975{
33fb8698 2976 struct amdgpu_device *adev = ring->adev;
e322edc3 2977 struct v9_mqd *mqd = ring->mqd_ptr;
464826d6
XY
2978 int j;
2979
2980 /* disable wptr polling */
72edadd5 2981 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
464826d6 2982
5e78835a 2983 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
464826d6 2984 mqd->cp_hqd_eop_base_addr_lo);
5e78835a 2985 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
464826d6
XY
2986 mqd->cp_hqd_eop_base_addr_hi);
2987
2988 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5e78835a 2989 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
464826d6
XY
2990 mqd->cp_hqd_eop_control);
2991
2992 /* enable doorbell? */
5e78835a 2993 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
464826d6
XY
2994 mqd->cp_hqd_pq_doorbell_control);
2995
2996 /* disable the queue if it's active */
5e78835a
TSD
2997 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2998 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
464826d6 2999 for (j = 0; j < adev->usec_timeout; j++) {
5e78835a 3000 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
464826d6
XY
3001 break;
3002 udelay(1);
3003 }
5e78835a 3004 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
464826d6 3005 mqd->cp_hqd_dequeue_request);
5e78835a 3006 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
464826d6 3007 mqd->cp_hqd_pq_rptr);
5e78835a 3008 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
464826d6 3009 mqd->cp_hqd_pq_wptr_lo);
5e78835a 3010 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
464826d6
XY
3011 mqd->cp_hqd_pq_wptr_hi);
3012 }
3013
3014 /* set the pointer to the MQD */
5e78835a 3015 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
464826d6 3016 mqd->cp_mqd_base_addr_lo);
5e78835a 3017 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
464826d6
XY
3018 mqd->cp_mqd_base_addr_hi);
3019
3020 /* set MQD vmid to 0 */
5e78835a 3021 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
464826d6
XY
3022 mqd->cp_mqd_control);
3023
3024 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5e78835a 3025 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
464826d6 3026 mqd->cp_hqd_pq_base_lo);
5e78835a 3027 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
464826d6
XY
3028 mqd->cp_hqd_pq_base_hi);
3029
3030 /* set up the HQD, this is similar to CP_RB0_CNTL */
5e78835a 3031 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
464826d6
XY
3032 mqd->cp_hqd_pq_control);
3033
3034 /* set the wb address whether it's enabled or not */
5e78835a 3035 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
464826d6 3036 mqd->cp_hqd_pq_rptr_report_addr_lo);
5e78835a 3037 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
464826d6
XY
3038 mqd->cp_hqd_pq_rptr_report_addr_hi);
3039
3040 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5e78835a 3041 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
464826d6 3042 mqd->cp_hqd_pq_wptr_poll_addr_lo);
5e78835a 3043 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
464826d6
XY
3044 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3045
3046 /* enable the doorbell if requested */
3047 if (ring->use_doorbell) {
5e78835a 3048 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
9564f192 3049 (adev->doorbell_index.kiq * 2) << 2);
5e78835a 3050 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
9564f192 3051 (adev->doorbell_index.userqueue_end * 2) << 2);
464826d6
XY
3052 }
3053
5e78835a 3054 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
464826d6
XY
3055 mqd->cp_hqd_pq_doorbell_control);
3056
3057 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5e78835a 3058 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
464826d6 3059 mqd->cp_hqd_pq_wptr_lo);
5e78835a 3060 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
464826d6
XY
3061 mqd->cp_hqd_pq_wptr_hi);
3062
3063 /* set the vmid for the queue */
5e78835a 3064 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
464826d6 3065
5e78835a 3066 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
464826d6
XY
3067 mqd->cp_hqd_persistent_state);
3068
3069 /* activate the queue */
5e78835a 3070 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
464826d6
XY
3071 mqd->cp_hqd_active);
3072
72edadd5
TSD
3073 if (ring->use_doorbell)
3074 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
464826d6
XY
3075
3076 return 0;
3077}
3078
326aa996
AG
3079static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3080{
3081 struct amdgpu_device *adev = ring->adev;
3082 int j;
3083
3084 /* disable the queue if it's active */
3085 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3086
3087 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3088
3089 for (j = 0; j < adev->usec_timeout; j++) {
3090 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3091 break;
3092 udelay(1);
3093 }
3094
f7a9ee81 3095 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
326aa996
AG
3096 DRM_DEBUG("KIQ dequeue request failed.\n");
3097
f7a9ee81 3098 /* Manual disable if dequeue request times out */
326aa996
AG
3099 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
3100 }
3101
326aa996
AG
3102 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3103 0);
3104 }
3105
3106 WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3107 WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3108 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3109 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3110 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3111 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3112 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3113 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3114
3115 return 0;
3116}
3117
e322edc3 3118static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
464826d6
XY
3119{
3120 struct amdgpu_device *adev = ring->adev;
e322edc3 3121 struct v9_mqd *mqd = ring->mqd_ptr;
464826d6
XY
3122 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3123
898b7893 3124 gfx_v9_0_kiq_setting(ring);
464826d6 3125
13a752e3 3126 if (adev->in_gpu_reset) { /* for GPU_RESET case */
464826d6 3127 /* reset MQD to a clean status */
0ef376ca 3128 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3129 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
464826d6
XY
3130
3131 /* reset ring buffer */
3132 ring->wptr = 0;
b98724db 3133 amdgpu_ring_clear_ring(ring);
464826d6 3134
898b7893
AD
3135 mutex_lock(&adev->srbm_mutex);
3136 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3137 gfx_v9_0_kiq_init_register(ring);
3138 soc15_grbm_select(adev, 0, 0, 0, 0);
3139 mutex_unlock(&adev->srbm_mutex);
464826d6 3140 } else {
ffe6d881
AD
3141 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3142 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3143 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
ba0c19f5
AD
3144 mutex_lock(&adev->srbm_mutex);
3145 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3146 gfx_v9_0_mqd_init(ring);
3147 gfx_v9_0_kiq_init_register(ring);
3148 soc15_grbm_select(adev, 0, 0, 0, 0);
3149 mutex_unlock(&adev->srbm_mutex);
3150
3151 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3152 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
464826d6
XY
3153 }
3154
0f1dfd52 3155 return 0;
898b7893
AD
3156}
3157
3158static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3159{
3160 struct amdgpu_device *adev = ring->adev;
898b7893
AD
3161 struct v9_mqd *mqd = ring->mqd_ptr;
3162 int mqd_idx = ring - &adev->gfx.compute_ring[0];
898b7893 3163
44779b43 3164 if (!adev->in_gpu_reset && !adev->in_suspend) {
ffe6d881
AD
3165 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3166 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3167 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
464826d6
XY
3168 mutex_lock(&adev->srbm_mutex);
3169 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
e322edc3 3170 gfx_v9_0_mqd_init(ring);
464826d6
XY
3171 soc15_grbm_select(adev, 0, 0, 0, 0);
3172 mutex_unlock(&adev->srbm_mutex);
3173
898b7893 3174 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3175 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
13a752e3 3176 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
464826d6 3177 /* reset MQD to a clean status */
898b7893 3178 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3179 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
464826d6
XY
3180
3181 /* reset ring buffer */
3182 ring->wptr = 0;
898b7893 3183 amdgpu_ring_clear_ring(ring);
ba0c19f5
AD
3184 } else {
3185 amdgpu_ring_clear_ring(ring);
464826d6
XY
3186 }
3187
464826d6
XY
3188 return 0;
3189}
3190
3191static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3192{
a9a8a788
RZ
3193 struct amdgpu_ring *ring;
3194 int r;
464826d6
XY
3195
3196 ring = &adev->gfx.kiq.ring;
e1d53aa8
AD
3197
3198 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3199 if (unlikely(r != 0))
a9a8a788 3200 return r;
e1d53aa8
AD
3201
3202 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
a9a8a788
RZ
3203 if (unlikely(r != 0))
3204 return r;
3205
3206 gfx_v9_0_kiq_init_queue(ring);
3207 amdgpu_bo_kunmap(ring->mqd_obj);
3208 ring->mqd_ptr = NULL;
e1d53aa8 3209 amdgpu_bo_unreserve(ring->mqd_obj);
c66ed765 3210 ring->sched.ready = true;
a9a8a788
RZ
3211 return 0;
3212}
3213
3214static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3215{
3216 struct amdgpu_ring *ring = NULL;
3217 int r = 0, i;
3218
3219 gfx_v9_0_cp_compute_enable(adev, true);
464826d6
XY
3220
3221 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3222 ring = &adev->gfx.compute_ring[i];
e1d53aa8
AD
3223
3224 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3225 if (unlikely(r != 0))
3226 goto done;
3227 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3228 if (!r) {
898b7893 3229 r = gfx_v9_0_kcq_init_queue(ring);
464826d6
XY
3230 amdgpu_bo_kunmap(ring->mqd_obj);
3231 ring->mqd_ptr = NULL;
464826d6 3232 }
e1d53aa8
AD
3233 amdgpu_bo_unreserve(ring->mqd_obj);
3234 if (r)
3235 goto done;
464826d6
XY
3236 }
3237
0f1dfd52 3238 r = gfx_v9_0_kiq_kcq_enable(adev);
e1d53aa8
AD
3239done:
3240 return r;
464826d6
XY
3241}
3242
b1023571
KW
3243static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3244{
bd3402ea 3245 int r, i;
b1023571
KW
3246 struct amdgpu_ring *ring;
3247
3248 if (!(adev->flags & AMD_IS_APU))
3249 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3250
3251 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3252 /* legacy firmware loading */
3253 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3254 if (r)
3255 return r;
3256
3257 r = gfx_v9_0_cp_compute_load_microcode(adev);
3258 if (r)
3259 return r;
3260 }
3261
a9a8a788
RZ
3262 r = gfx_v9_0_kiq_resume(adev);
3263 if (r)
3264 return r;
3265
b1023571
KW
3266 r = gfx_v9_0_cp_gfx_resume(adev);
3267 if (r)
3268 return r;
3269
a9a8a788 3270 r = gfx_v9_0_kcq_resume(adev);
b1023571
KW
3271 if (r)
3272 return r;
3273
3274 ring = &adev->gfx.gfx_ring[0];
c66ed765
AG
3275 r = amdgpu_ring_test_helper(ring);
3276 if (r)
b1023571 3277 return r;
e30a5223 3278
b1023571
KW
3279 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3280 ring = &adev->gfx.compute_ring[i];
c66ed765 3281 amdgpu_ring_test_helper(ring);
b1023571
KW
3282 }
3283
3284 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3285
3286 return 0;
3287}
3288
3289static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3290{
3291 gfx_v9_0_cp_gfx_enable(adev, enable);
3292 gfx_v9_0_cp_compute_enable(adev, enable);
3293}
3294
3295static int gfx_v9_0_hw_init(void *handle)
3296{
3297 int r;
3298 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3299
3300 gfx_v9_0_init_golden_registers(adev);
3301
434e6df2 3302 gfx_v9_0_constants_init(adev);
b1023571 3303
137dc4b9
EQ
3304 r = gfx_v9_0_csb_vram_pin(adev);
3305 if (r)
3306 return r;
3307
fdb81fd7 3308 r = adev->gfx.rlc.funcs->resume(adev);
b1023571
KW
3309 if (r)
3310 return r;
3311
3312 r = gfx_v9_0_cp_resume(adev);
3313 if (r)
3314 return r;
3315
3316 r = gfx_v9_0_ngg_en(adev);
3317 if (r)
3318 return r;
3319
3320 return r;
3321}
3322
ffabea84 3323static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
85f95ad6 3324{
ffabea84
RZ
3325 int r, i;
3326 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
85f95ad6 3327
ffabea84
RZ
3328 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3329 if (r)
85f95ad6 3330 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
85f95ad6 3331
ffabea84
RZ
3332 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3333 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3334
3335 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3336 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
85f95ad6
ML
3337 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3338 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3339 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3340 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
ffabea84
RZ
3341 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3342 amdgpu_ring_write(kiq_ring, 0);
3343 amdgpu_ring_write(kiq_ring, 0);
3344 amdgpu_ring_write(kiq_ring, 0);
3345 }
c66ed765 3346 r = amdgpu_ring_test_helper(kiq_ring);
841cf911
RZ
3347 if (r)
3348 DRM_ERROR("KCQ disable failed\n");
3349
85f95ad6
ML
3350 return r;
3351}
3352
b1023571
KW
3353static int gfx_v9_0_hw_fini(void *handle)
3354{
3355 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3356
760a1d55 3357 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
b1023571
KW
3358 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3359 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
85f95ad6
ML
3360
3361 /* disable KCQ to avoid CPC touch memory not valid anymore */
ffabea84 3362 gfx_v9_0_kcq_disable(adev);
85f95ad6 3363
464826d6 3364 if (amdgpu_sriov_vf(adev)) {
9f0178fb
ML
3365 gfx_v9_0_cp_gfx_enable(adev, false);
3366 /* must disable polling for SRIOV when hw finished, otherwise
3367 * CPC engine may still keep fetching WB address which is already
3368 * invalid after sw finished and trigger DMAR reading error in
3369 * hypervisor side.
3370 */
3371 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
464826d6
XY
3372 return 0;
3373 }
326aa996
AG
3374
3375 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3376 * otherwise KIQ is hanging when binding back
3377 */
44779b43 3378 if (!adev->in_gpu_reset && !adev->in_suspend) {
326aa996
AG
3379 mutex_lock(&adev->srbm_mutex);
3380 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3381 adev->gfx.kiq.ring.pipe,
3382 adev->gfx.kiq.ring.queue, 0);
3383 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3384 soc15_grbm_select(adev, 0, 0, 0, 0);
3385 mutex_unlock(&adev->srbm_mutex);
3386 }
3387
b1023571 3388 gfx_v9_0_cp_enable(adev, false);
fdb81fd7 3389 adev->gfx.rlc.funcs->stop(adev);
b1023571 3390
137dc4b9
EQ
3391 gfx_v9_0_csb_vram_unpin(adev);
3392
b1023571
KW
3393 return 0;
3394}
3395
3396static int gfx_v9_0_suspend(void *handle)
3397{
44779b43 3398 return gfx_v9_0_hw_fini(handle);
b1023571
KW
3399}
3400
3401static int gfx_v9_0_resume(void *handle)
3402{
44779b43 3403 return gfx_v9_0_hw_init(handle);
b1023571
KW
3404}
3405
3406static bool gfx_v9_0_is_idle(void *handle)
3407{
3408 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3409
5e78835a 3410 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
b1023571
KW
3411 GRBM_STATUS, GUI_ACTIVE))
3412 return false;
3413 else
3414 return true;
3415}
3416
3417static int gfx_v9_0_wait_for_idle(void *handle)
3418{
3419 unsigned i;
b1023571
KW
3420 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3421
3422 for (i = 0; i < adev->usec_timeout; i++) {
2b9bdfa7 3423 if (gfx_v9_0_is_idle(handle))
b1023571
KW
3424 return 0;
3425 udelay(1);
3426 }
3427 return -ETIMEDOUT;
3428}
3429
b1023571
KW
3430static int gfx_v9_0_soft_reset(void *handle)
3431{
3432 u32 grbm_soft_reset = 0;
3433 u32 tmp;
3434 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3435
3436 /* GRBM_STATUS */
5e78835a 3437 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
b1023571
KW
3438 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3439 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3440 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3441 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3442 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3443 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3444 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3445 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3446 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3447 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3448 }
3449
3450 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3451 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3452 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3453 }
3454
3455 /* GRBM_STATUS2 */
5e78835a 3456 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
b1023571
KW
3457 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3458 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3459 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3460
3461
75bac5c6 3462 if (grbm_soft_reset) {
b1023571 3463 /* stop the rlc */
fdb81fd7 3464 adev->gfx.rlc.funcs->stop(adev);
b1023571
KW
3465
3466 /* Disable GFX parsing/prefetching */
3467 gfx_v9_0_cp_gfx_enable(adev, false);
3468
3469 /* Disable MEC parsing/prefetching */
3470 gfx_v9_0_cp_compute_enable(adev, false);
3471
3472 if (grbm_soft_reset) {
5e78835a 3473 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
b1023571
KW
3474 tmp |= grbm_soft_reset;
3475 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5e78835a
TSD
3476 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3477 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
b1023571
KW
3478
3479 udelay(50);
3480
3481 tmp &= ~grbm_soft_reset;
5e78835a
TSD
3482 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3483 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
b1023571
KW
3484 }
3485
3486 /* Wait a little for things to settle down */
3487 udelay(50);
b1023571
KW
3488 }
3489 return 0;
3490}
3491
3492static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3493{
3494 uint64_t clock;
3495
3496 mutex_lock(&adev->gfx.gpu_clock_mutex);
5e78835a
TSD
3497 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3498 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3499 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
b1023571
KW
3500 mutex_unlock(&adev->gfx.gpu_clock_mutex);
3501 return clock;
3502}
3503
3504static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3505 uint32_t vmid,
3506 uint32_t gds_base, uint32_t gds_size,
3507 uint32_t gws_base, uint32_t gws_size,
3508 uint32_t oa_base, uint32_t oa_size)
3509{
946a4d5b
SL
3510 struct amdgpu_device *adev = ring->adev;
3511
b1023571
KW
3512 /* GDS Base */
3513 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3514 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
b1023571
KW
3515 gds_base);
3516
3517 /* GDS Size */
3518 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3519 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
b1023571
KW
3520 gds_size);
3521
3522 /* GWS */
3523 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3524 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
b1023571
KW
3525 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3526
3527 /* OA */
3528 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3529 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
b1023571
KW
3530 (1 << (oa_size + oa_base)) - (1 << oa_base));
3531}
3532
5326ad54
JZ
3533static const u32 vgpr_init_compute_shader[] =
3534{
3535 0xb07c0000, 0xbe8000ff,
3536 0x000000f8, 0xbf110800,
3537 0x7e000280, 0x7e020280,
3538 0x7e040280, 0x7e060280,
3539 0x7e080280, 0x7e0a0280,
3540 0x7e0c0280, 0x7e0e0280,
3541 0x80808800, 0xbe803200,
3542 0xbf84fff5, 0xbf9c0000,
3543 0xd28c0001, 0x0001007f,
3544 0xd28d0001, 0x0002027e,
3545 0x10020288, 0xb8810904,
3546 0xb7814000, 0xd1196a01,
3547 0x00000301, 0xbe800087,
3548 0xbefc00c1, 0xd89c4000,
3549 0x00020201, 0xd89cc080,
3550 0x00040401, 0x320202ff,
3551 0x00000800, 0x80808100,
3552 0xbf84fff8, 0x7e020280,
3553 0xbf810000, 0x00000000,
3554};
3555
3556static const u32 sgpr_init_compute_shader[] =
3557{
3558 0xb07c0000, 0xbe8000ff,
3559 0x0000005f, 0xbee50080,
3560 0xbe812c65, 0xbe822c65,
3561 0xbe832c65, 0xbe842c65,
3562 0xbe852c65, 0xb77c0005,
3563 0x80808500, 0xbf84fff8,
3564 0xbe800080, 0xbf810000,
3565};
3566
3567static const struct soc15_reg_entry vgpr_init_regs[] = {
3568 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3569 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3570 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3571 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3572 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3573 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3574 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
3578};
3579
3580static const struct soc15_reg_entry sgpr_init_regs[] = {
3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3585 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3586 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3587 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3588 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3589 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3591};
3592
3593static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3594 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
3595 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
3596 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
3597 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
3598 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
3599 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
3600 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
3601 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
3602 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
3603 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
3604 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
3605 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
3606 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
3607 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
3608 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
3609 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
3610 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
3611 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
3612 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
3613 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
3614 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
3615 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
3616 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
3617 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
3618 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
3619 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
3620 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
3621 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
3622 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
3623 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
3624 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
3625};
3626
3627static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3628{
3629 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3630 struct amdgpu_ib ib;
3631 struct dma_fence *f = NULL;
3632 int r, i, j;
3633 u32 tmp;
3634 unsigned total_size, vgpr_offset, sgpr_offset;
3635 u64 gpu_addr;
3636
3637 /* only support when RAS is enabled */
3638 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3639 return 0;
3640
3641 /* bail if the compute ring is not ready */
3642 if (!ring->sched.ready)
3643 return 0;
3644
3645 tmp = RREG32_SOC15(GC, 0, mmGB_EDC_MODE);
3646 WREG32_SOC15(GC, 0, mmGB_EDC_MODE, 0);
3647
3648 total_size =
3649 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3650 total_size +=
3651 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3652 total_size = ALIGN(total_size, 256);
3653 vgpr_offset = total_size;
3654 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3655 sgpr_offset = total_size;
3656 total_size += sizeof(sgpr_init_compute_shader);
3657
3658 /* allocate an indirect buffer to put the commands in */
3659 memset(&ib, 0, sizeof(ib));
3660 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3661 if (r) {
3662 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3663 return r;
3664 }
3665
3666 /* load the compute shaders */
3667 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3668 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3669
3670 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3671 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3672
3673 /* init the ib length to 0 */
3674 ib.length_dw = 0;
3675
3676 /* VGPR */
3677 /* write the register state for the compute dispatch */
3678 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3679 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3680 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3681 - PACKET3_SET_SH_REG_START;
3682 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3683 }
3684 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3685 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3687 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3688 - PACKET3_SET_SH_REG_START;
3689 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3690 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3691
3692 /* write dispatch packet */
3693 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3694 ib.ptr[ib.length_dw++] = 128; /* x */
3695 ib.ptr[ib.length_dw++] = 1; /* y */
3696 ib.ptr[ib.length_dw++] = 1; /* z */
3697 ib.ptr[ib.length_dw++] =
3698 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3699
3700 /* write CS partial flush packet */
3701 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3702 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3703
3704 /* SGPR */
3705 /* write the register state for the compute dispatch */
3706 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3708 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3709 - PACKET3_SET_SH_REG_START;
3710 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3711 }
3712 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3713 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3714 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3715 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3716 - PACKET3_SET_SH_REG_START;
3717 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3718 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3719
3720 /* write dispatch packet */
3721 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3722 ib.ptr[ib.length_dw++] = 128; /* x */
3723 ib.ptr[ib.length_dw++] = 1; /* y */
3724 ib.ptr[ib.length_dw++] = 1; /* z */
3725 ib.ptr[ib.length_dw++] =
3726 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3727
3728 /* write CS partial flush packet */
3729 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3730 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3731
3732 /* shedule the ib on the ring */
3733 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3734 if (r) {
3735 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3736 goto fail;
3737 }
3738
3739 /* wait for the GPU to finish processing the IB */
3740 r = dma_fence_wait(f, false);
3741 if (r) {
3742 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3743 goto fail;
3744 }
3745
3746 /* read back registers to clear the counters */
3747 mutex_lock(&adev->grbm_idx_mutex);
3748 for (j = 0; j < 16; j++) {
3749 gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
3750 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3751 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3752 gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
3753 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3754 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3755 gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
3756 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3757 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3758 gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
3759 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3760 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3761 }
3762 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3763 mutex_unlock(&adev->grbm_idx_mutex);
3764
3765fail:
3766 amdgpu_ib_free(adev, &ib, NULL);
3767 dma_fence_put(f);
3768
3769 return r;
3770}
3771
b1023571
KW
3772static int gfx_v9_0_early_init(void *handle)
3773{
3774 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3775
3776 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
78c16834 3777 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
b1023571
KW
3778 gfx_v9_0_set_ring_funcs(adev);
3779 gfx_v9_0_set_irq_funcs(adev);
3780 gfx_v9_0_set_gds_init(adev);
3781 gfx_v9_0_set_rlc_funcs(adev);
3782
3783 return 0;
3784}
3785
760a1d55
FX
3786static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3787 struct amdgpu_iv_entry *entry);
3788
3789static int gfx_v9_0_ecc_late_init(void *handle)
3790{
3791 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3792 struct ras_common_if **ras_if = &adev->gfx.ras_if;
3793 struct ras_ih_if ih_info = {
3794 .cb = gfx_v9_0_process_ras_data_cb,
3795 };
3796 struct ras_fs_if fs_info = {
3797 .sysfs_name = "gfx_err_count",
3798 .debugfs_name = "gfx_err_inject",
3799 };
3800 struct ras_common_if ras_block = {
3801 .block = AMDGPU_RAS_BLOCK__GFX,
3802 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3803 .sub_block_index = 0,
3804 .name = "gfx",
3805 };
3806 int r;
3807
3808 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
a170d49d 3809 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
760a1d55
FX
3810 return 0;
3811 }
3812
acbbee01 3813 if (*ras_if)
3814 goto resume;
3815
760a1d55
FX
3816 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3817 if (!*ras_if)
3818 return -ENOMEM;
3819
5326ad54
JZ
3820 /* requires IBs so do in late init after IB pool is initialized */
3821 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3822 if (r)
3823 return r;
3824
760a1d55
FX
3825 **ras_if = ras_block;
3826
a170d49d 3827 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
70ab8c61 3828 if (r) {
3829 if (r == -EAGAIN) {
3830 amdgpu_ras_request_reset_on_boot(adev,
3831 AMDGPU_RAS_BLOCK__GFX);
3832 r = 0;
3833 }
760a1d55 3834 goto feature;
70ab8c61 3835 }
760a1d55
FX
3836
3837 ih_info.head = **ras_if;
3838 fs_info.head = **ras_if;
3839
3840 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3841 if (r)
3842 goto interrupt;
3843
3844 r = amdgpu_ras_debugfs_create(adev, &fs_info);
3845 if (r)
3846 goto debugfs;
3847
3848 r = amdgpu_ras_sysfs_create(adev, &fs_info);
3849 if (r)
3850 goto sysfs;
acbbee01 3851resume:
760a1d55
FX
3852 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3853 if (r)
3854 goto irq;
3855
3856 return 0;
3857irq:
3858 amdgpu_ras_sysfs_remove(adev, *ras_if);
3859sysfs:
3860 amdgpu_ras_debugfs_remove(adev, *ras_if);
3861debugfs:
3862 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3863interrupt:
3864 amdgpu_ras_feature_enable(adev, *ras_if, 0);
3865feature:
3866 kfree(*ras_if);
3867 *ras_if = NULL;
70ab8c61 3868 return r;
760a1d55
FX
3869}
3870
b1023571
KW
3871static int gfx_v9_0_late_init(void *handle)
3872{
3873 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3874 int r;
3875
3876 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3877 if (r)
3878 return r;
3879
3880 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3881 if (r)
3882 return r;
3883
760a1d55
FX
3884 r = gfx_v9_0_ecc_late_init(handle);
3885 if (r)
3886 return r;
3887
b1023571
KW
3888 return 0;
3889}
3890
106c7d61 3891static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
b1023571 3892{
106c7d61 3893 uint32_t rlc_setting;
b1023571
KW
3894
3895 /* if RLC is not enabled, do nothing */
5e78835a 3896 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
b1023571 3897 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
106c7d61 3898 return false;
b1023571 3899
106c7d61 3900 return true;
b1023571
KW
3901}
3902
106c7d61 3903static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
b1023571 3904{
106c7d61
LG
3905 uint32_t data;
3906 unsigned i;
b1023571 3907
106c7d61
LG
3908 data = RLC_SAFE_MODE__CMD_MASK;
3909 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3910 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
b1023571 3911
106c7d61
LG
3912 /* wait for RLC_SAFE_MODE */
3913 for (i = 0; i < adev->usec_timeout; i++) {
3914 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3915 break;
3916 udelay(1);
b1023571
KW
3917 }
3918}
3919
106c7d61
LG
3920static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3921{
3922 uint32_t data;
3923
3924 data = RLC_SAFE_MODE__CMD_MASK;
3925 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3926}
3927
197f95c8
HZ
3928static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3929 bool enable)
3930{
106c7d61 3931 amdgpu_gfx_rlc_enter_safe_mode(adev);
197f95c8
HZ
3932
3933 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3934 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3935 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3936 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3937 } else {
3938 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3939 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3940 }
3941
106c7d61 3942 amdgpu_gfx_rlc_exit_safe_mode(adev);
197f95c8
HZ
3943}
3944
18924c71
HZ
3945static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3946 bool enable)
3947{
3948 /* TODO: double check if we need to perform under safe mode */
3949 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3950
3951 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3952 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3953 else
3954 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3955
3956 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3957 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3958 else
3959 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3960
3961 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3962}
3963
b1023571
KW
3964static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3965 bool enable)
3966{
3967 uint32_t data, def;
3968
a7a0d543
LG
3969 amdgpu_gfx_rlc_enter_safe_mode(adev);
3970
b1023571
KW
3971 /* It is disabled by HW by default */
3972 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3973 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
5e78835a 3974 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
c3693768
EQ
3975
3976 if (adev->asic_type != CHIP_VEGA12)
3977 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
3978
3979 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
b1023571
KW
3980 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3981 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3982
3983 /* only for Vega10 & Raven1 */
3984 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
3985
3986 if (def != data)
5e78835a 3987 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
b1023571
KW
3988
3989 /* MGLS is a global flag to control all MGLS in GFX */
3990 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3991 /* 2 - RLC memory Light sleep */
3992 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5e78835a 3993 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
b1023571
KW
3994 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3995 if (def != data)
5e78835a 3996 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
b1023571
KW
3997 }
3998 /* 3 - CP memory Light sleep */
3999 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5e78835a 4000 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
b1023571
KW
4001 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4002 if (def != data)
5e78835a 4003 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
b1023571
KW
4004 }
4005 }
4006 } else {
4007 /* 1 - MGCG_OVERRIDE */
5e78835a 4008 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
c3693768
EQ
4009
4010 if (adev->asic_type != CHIP_VEGA12)
4011 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4012
4013 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
b1023571
KW
4014 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4015 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4016 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
c3693768 4017
b1023571 4018 if (def != data)
5e78835a 4019 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
b1023571
KW
4020
4021 /* 2 - disable MGLS in RLC */
5e78835a 4022 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
b1023571
KW
4023 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4024 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5e78835a 4025 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
b1023571
KW
4026 }
4027
4028 /* 3 - disable MGLS in CP */
5e78835a 4029 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
b1023571
KW
4030 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4031 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5e78835a 4032 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
b1023571
KW
4033 }
4034 }
a7a0d543
LG
4035
4036 amdgpu_gfx_rlc_exit_safe_mode(adev);
b1023571
KW
4037}
4038
4039static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4040 bool enable)
4041{
4042 uint32_t data, def;
4043
106c7d61 4044 amdgpu_gfx_rlc_enter_safe_mode(adev);
b1023571
KW
4045
4046 /* Enable 3D CGCG/CGLS */
4047 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4048 /* write cmd to clear cgcg/cgls ov */
5e78835a 4049 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
b1023571
KW
4050 /* unset CGCG override */
4051 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4052 /* update CGCG and CGLS override bits */
4053 if (def != data)
5e78835a 4054 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
a5aedc2d
EQ
4055
4056 /* enable 3Dcgcg FSM(0x0000363f) */
5e78835a 4057 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
a5aedc2d
EQ
4058
4059 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
b1023571
KW
4060 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4061 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4062 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4063 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4064 if (def != data)
5e78835a 4065 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
b1023571
KW
4066
4067 /* set IDLE_POLL_COUNT(0x00900100) */
5e78835a 4068 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
b1023571
KW
4069 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4070 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4071 if (def != data)
5e78835a 4072 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
b1023571
KW
4073 } else {
4074 /* Disable CGCG/CGLS */
5e78835a 4075 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
b1023571
KW
4076 /* disable cgcg, cgls should be disabled */
4077 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4078 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4079 /* disable cgcg and cgls in FSM */
4080 if (def != data)
5e78835a 4081 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
b1023571
KW
4082 }
4083
106c7d61 4084 amdgpu_gfx_rlc_exit_safe_mode(adev);
b1023571
KW
4085}
4086
4087static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4088 bool enable)
4089{
4090 uint32_t def, data;
4091
106c7d61 4092 amdgpu_gfx_rlc_enter_safe_mode(adev);
b1023571
KW
4093
4094 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5e78835a 4095 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
b1023571
KW
4096 /* unset CGCG override */
4097 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4098 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4099 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4100 else
4101 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4102 /* update CGCG and CGLS override bits */
4103 if (def != data)
5e78835a 4104 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
b1023571 4105
a5aedc2d 4106 /* enable cgcg FSM(0x0000363F) */
5e78835a 4107 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
a5aedc2d
EQ
4108
4109 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
b1023571
KW
4110 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4111 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4112 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4113 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4114 if (def != data)
5e78835a 4115 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
b1023571
KW
4116
4117 /* set IDLE_POLL_COUNT(0x00900100) */
5e78835a 4118 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
b1023571
KW
4119 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4120 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4121 if (def != data)
5e78835a 4122 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
b1023571 4123 } else {
5e78835a 4124 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
b1023571
KW
4125 /* reset CGCG/CGLS bits */
4126 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4127 /* disable cgcg and cgls in FSM */
4128 if (def != data)
5e78835a 4129 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
b1023571
KW
4130 }
4131
106c7d61 4132 amdgpu_gfx_rlc_exit_safe_mode(adev);
b1023571
KW
4133}
4134
4135static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4136 bool enable)
4137{
4138 if (enable) {
4139 /* CGCG/CGLS should be enabled after MGCG/MGLS
4140 * === MGCG + MGLS ===
4141 */
4142 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4143 /* === CGCG /CGLS for GFX 3D Only === */
4144 gfx_v9_0_update_3d_clock_gating(adev, enable);
4145 /* === CGCG + CGLS === */
4146 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4147 } else {
4148 /* CGCG/CGLS should be disabled before MGCG/MGLS
4149 * === CGCG + CGLS ===
4150 */
4151 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4152 /* === CGCG /CGLS for GFX 3D Only === */
4153 gfx_v9_0_update_3d_clock_gating(adev, enable);
4154 /* === MGCG + MGLS === */
4155 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4156 }
4157 return 0;
4158}
4159
4160static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
106c7d61
LG
4161 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4162 .set_safe_mode = gfx_v9_0_set_safe_mode,
4163 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
fdb81fd7 4164 .init = gfx_v9_0_rlc_init,
106c7d61
LG
4165 .get_csb_size = gfx_v9_0_get_csb_size,
4166 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4167 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
fdb81fd7
LG
4168 .resume = gfx_v9_0_rlc_resume,
4169 .stop = gfx_v9_0_rlc_stop,
4170 .reset = gfx_v9_0_rlc_reset,
4171 .start = gfx_v9_0_rlc_start
b1023571
KW
4172};
4173
4174static int gfx_v9_0_set_powergating_state(void *handle,
4175 enum amd_powergating_state state)
4176{
5897c99e 4177 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
197f95c8 4178 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5897c99e
HZ
4179
4180 switch (adev->asic_type) {
4181 case CHIP_RAVEN:
05df1f01
RZ
4182 if (!enable) {
4183 amdgpu_gfx_off_ctrl(adev, false);
4184 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4185 }
5897c99e
HZ
4186 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4187 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4188 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4189 } else {
4190 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4191 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4192 }
4193
4194 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4195 gfx_v9_0_enable_cp_power_gating(adev, true);
4196 else
4197 gfx_v9_0_enable_cp_power_gating(adev, false);
197f95c8
HZ
4198
4199 /* update gfx cgpg state */
4200 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
18924c71
HZ
4201
4202 /* update mgcg state */
4203 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
9134c6d7 4204
05df1f01
RZ
4205 if (enable)
4206 amdgpu_gfx_off_ctrl(adev, true);
991a6b32
EQ
4207 break;
4208 case CHIP_VEGA12:
05df1f01
RZ
4209 if (!enable) {
4210 amdgpu_gfx_off_ctrl(adev, false);
4211 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4212 } else {
4213 amdgpu_gfx_off_ctrl(adev, true);
4214 }
5897c99e
HZ
4215 break;
4216 default:
4217 break;
4218 }
4219
b1023571
KW
4220 return 0;
4221}
4222
4223static int gfx_v9_0_set_clockgating_state(void *handle,
4224 enum amd_clockgating_state state)
4225{
4226 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4227
fb82afab
XY
4228 if (amdgpu_sriov_vf(adev))
4229 return 0;
4230
b1023571
KW
4231 switch (adev->asic_type) {
4232 case CHIP_VEGA10:
23862464 4233 case CHIP_VEGA12:
28b576b2 4234 case CHIP_VEGA20:
a4dc61f5 4235 case CHIP_RAVEN:
b1023571
KW
4236 gfx_v9_0_update_gfx_clock_gating(adev,
4237 state == AMD_CG_STATE_GATE ? true : false);
4238 break;
4239 default:
4240 break;
4241 }
4242 return 0;
4243}
4244
12ad27fa
HR
4245static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4246{
4247 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4248 int data;
4249
4250 if (amdgpu_sriov_vf(adev))
4251 *flags = 0;
4252
4253 /* AMD_CG_SUPPORT_GFX_MGCG */
5e78835a 4254 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
12ad27fa
HR
4255 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4256 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4257
4258 /* AMD_CG_SUPPORT_GFX_CGCG */
5e78835a 4259 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
12ad27fa
HR
4260 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4261 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4262
4263 /* AMD_CG_SUPPORT_GFX_CGLS */
4264 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4265 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4266
4267 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5e78835a 4268 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
12ad27fa
HR
4269 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4270 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4271
4272 /* AMD_CG_SUPPORT_GFX_CP_LS */
5e78835a 4273 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
12ad27fa
HR
4274 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4275 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4276
4277 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5e78835a 4278 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
12ad27fa
HR
4279 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4280 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4281
4282 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4283 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4284 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4285}
4286
b1023571
KW
4287static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4288{
4289 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4290}
4291
4292static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4293{
4294 struct amdgpu_device *adev = ring->adev;
4295 u64 wptr;
4296
4297 /* XXX check if swapping is necessary on BE */
4298 if (ring->use_doorbell) {
4299 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4300 } else {
5e78835a
TSD
4301 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4302 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
b1023571
KW
4303 }
4304
4305 return wptr;
4306}
4307
4308static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4309{
4310 struct amdgpu_device *adev = ring->adev;
4311
4312 if (ring->use_doorbell) {
4313 /* XXX check if swapping is necessary on BE */
4314 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4315 WDOORBELL64(ring->doorbell_index, ring->wptr);
4316 } else {
5e78835a
TSD
4317 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4318 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
b1023571
KW
4319 }
4320}
4321
4322static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4323{
946a4d5b 4324 struct amdgpu_device *adev = ring->adev;
b1023571 4325 u32 ref_and_mask, reg_mem_engine;
bf383fb6 4326 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
b1023571
KW
4327
4328 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4329 switch (ring->me) {
4330 case 1:
4331 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4332 break;
4333 case 2:
4334 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4335 break;
4336 default:
4337 return;
4338 }
4339 reg_mem_engine = 0;
4340 } else {
4341 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4342 reg_mem_engine = 1; /* pfp */
4343 }
4344
4345 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
946a4d5b
SL
4346 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4347 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
b1023571
KW
4348 ref_and_mask, ref_and_mask, 0x20);
4349}
4350
b1023571 4351static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
34955e03
RZ
4352 struct amdgpu_job *job,
4353 struct amdgpu_ib *ib,
c4c905ec 4354 uint32_t flags)
b1023571 4355{
34955e03 4356 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
eaa05d52 4357 u32 header, control = 0;
b1023571 4358
eaa05d52
ML
4359 if (ib->flags & AMDGPU_IB_FLAG_CE)
4360 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4361 else
4362 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
b1023571 4363
c4f46f22 4364 control |= ib->length_dw | (vmid << 24);
b1023571 4365
635e7132 4366 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
eaa05d52 4367 control |= INDIRECT_BUFFER_PRE_ENB(1);
9ccd52eb 4368
635e7132
ML
4369 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4370 gfx_v9_0_ring_emit_de_meta(ring);
4371 }
4372
eaa05d52 4373 amdgpu_ring_write(ring, header);
72408a41 4374 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
eaa05d52 4375 amdgpu_ring_write(ring,
b1023571 4376#ifdef __BIG_ENDIAN
eaa05d52 4377 (2 << 0) |
b1023571 4378#endif
eaa05d52
ML
4379 lower_32_bits(ib->gpu_addr));
4380 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4381 amdgpu_ring_write(ring, control);
b1023571
KW
4382}
4383
b1023571 4384static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
34955e03
RZ
4385 struct amdgpu_job *job,
4386 struct amdgpu_ib *ib,
c4c905ec 4387 uint32_t flags)
b1023571 4388{
34955e03
RZ
4389 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4390 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
b1023571 4391
41cca166
MO
4392 /* Currently, there is a high possibility to get wave ID mismatch
4393 * between ME and GDS, leading to a hw deadlock, because ME generates
4394 * different wave IDs than the GDS expects. This situation happens
4395 * randomly when at least 5 compute pipes use GDS ordered append.
4396 * The wave IDs generated by ME are also wrong after suspend/resume.
4397 * Those are probably bugs somewhere else in the kernel driver.
4398 *
4399 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4400 * GDS to 0 for this ring (me/pipe).
4401 */
4402 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4403 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4404 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4405 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4406 }
4407
34955e03 4408 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
b1023571 4409 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
34955e03 4410 amdgpu_ring_write(ring,
b1023571 4411#ifdef __BIG_ENDIAN
34955e03 4412 (2 << 0) |
b1023571 4413#endif
34955e03
RZ
4414 lower_32_bits(ib->gpu_addr));
4415 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4416 amdgpu_ring_write(ring, control);
b1023571
KW
4417}
4418
4419static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4420 u64 seq, unsigned flags)
4421{
4422 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4423 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
d240cd9e 4424 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
b1023571
KW
4425
4426 /* RELEASE_MEM - flush caches, send int */
4427 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
d240cd9e
MO
4428 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4429 EOP_TC_NC_ACTION_EN) :
4430 (EOP_TCL1_ACTION_EN |
4431 EOP_TC_ACTION_EN |
4432 EOP_TC_WB_ACTION_EN |
4433 EOP_TC_MD_ACTION_EN)) |
b1023571
KW
4434 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4435 EVENT_INDEX(5)));
4436 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4437
4438 /*
4439 * the address should be Qword aligned if 64bit write, Dword
4440 * aligned if only send 32bit data low (discard data high)
4441 */
4442 if (write64bit)
4443 BUG_ON(addr & 0x7);
4444 else
4445 BUG_ON(addr & 0x3);
4446 amdgpu_ring_write(ring, lower_32_bits(addr));
4447 amdgpu_ring_write(ring, upper_32_bits(addr));
4448 amdgpu_ring_write(ring, lower_32_bits(seq));
4449 amdgpu_ring_write(ring, upper_32_bits(seq));
4450 amdgpu_ring_write(ring, 0);
4451}
4452
4453static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4454{
4455 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4456 uint32_t seq = ring->fence_drv.sync_seq;
4457 uint64_t addr = ring->fence_drv.gpu_addr;
4458
4459 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4460 lower_32_bits(addr), upper_32_bits(addr),
4461 seq, 0xffffffff, 4);
4462}
4463
4464static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
c633c00b 4465 unsigned vmid, uint64_t pd_addr)
b1023571 4466{
c633c00b 4467 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
b1023571 4468
b1023571 4469 /* compute doesn't have PFP */
9096d6e5 4470 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
b1023571
KW
4471 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4472 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4473 amdgpu_ring_write(ring, 0x0);
b1023571
KW
4474 }
4475}
4476
4477static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4478{
4479 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4480}
4481
4482static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4483{
4484 u64 wptr;
4485
4486 /* XXX check if swapping is necessary on BE */
4487 if (ring->use_doorbell)
4488 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4489 else
4490 BUG();
4491 return wptr;
4492}
4493
761c77c1
AR
4494static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4495 bool acquire)
4496{
4497 struct amdgpu_device *adev = ring->adev;
4498 int pipe_num, tmp, reg;
4499 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4500
4501 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4502
4503 /* first me only has 2 entries, GFX and HP3D */
4504 if (ring->me > 0)
4505 pipe_num -= 2;
4506
4507 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4508 tmp = RREG32(reg);
4509 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4510 WREG32(reg, tmp);
4511}
4512
4513static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4514 struct amdgpu_ring *ring,
4515 bool acquire)
4516{
4517 int i, pipe;
4518 bool reserve;
4519 struct amdgpu_ring *iring;
4520
4521 mutex_lock(&adev->gfx.pipe_reserve_mutex);
4522 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
4523 if (acquire)
4524 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4525 else
4526 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4527
4528 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4529 /* Clear all reservations - everyone reacquires all resources */
4530 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4531 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4532 true);
4533
4534 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4535 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4536 true);
4537 } else {
4538 /* Lower all pipes without a current reservation */
4539 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4540 iring = &adev->gfx.gfx_ring[i];
4541 pipe = amdgpu_gfx_queue_to_bit(adev,
4542 iring->me,
4543 iring->pipe,
4544 0);
4545 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4546 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4547 }
4548
4549 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4550 iring = &adev->gfx.compute_ring[i];
4551 pipe = amdgpu_gfx_queue_to_bit(adev,
4552 iring->me,
4553 iring->pipe,
4554 0);
4555 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4556 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4557 }
4558 }
4559
4560 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4561}
4562
4563static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4564 struct amdgpu_ring *ring,
4565 bool acquire)
4566{
4567 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4568 uint32_t queue_priority = acquire ? 0xf : 0x0;
4569
4570 mutex_lock(&adev->srbm_mutex);
4571 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4572
4573 WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4574 WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4575
4576 soc15_grbm_select(adev, 0, 0, 0, 0);
4577 mutex_unlock(&adev->srbm_mutex);
4578}
4579
4580static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4581 enum drm_sched_priority priority)
4582{
4583 struct amdgpu_device *adev = ring->adev;
4584 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4585
4586 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4587 return;
4588
4589 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4590 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4591}
4592
b1023571
KW
4593static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4594{
4595 struct amdgpu_device *adev = ring->adev;
4596
4597 /* XXX check if swapping is necessary on BE */
4598 if (ring->use_doorbell) {
4599 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4600 WDOORBELL64(ring->doorbell_index, ring->wptr);
4601 } else{
4602 BUG(); /* only DOORBELL method supported on gfx9 now */
4603 }
4604}
4605
aa6faa44
XY
4606static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4607 u64 seq, unsigned int flags)
4608{
cd29253f
SL
4609 struct amdgpu_device *adev = ring->adev;
4610
aa6faa44
XY
4611 /* we only allocate 32bit for each seq wb address */
4612 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4613
4614 /* write fence seq to the "addr" */
4615 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4616 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4617 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4618 amdgpu_ring_write(ring, lower_32_bits(addr));
4619 amdgpu_ring_write(ring, upper_32_bits(addr));
4620 amdgpu_ring_write(ring, lower_32_bits(seq));
4621
4622 if (flags & AMDGPU_FENCE_FLAG_INT) {
4623 /* set register to trigger INT */
4624 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4625 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4626 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4627 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4628 amdgpu_ring_write(ring, 0);
4629 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4630 }
4631}
4632
b1023571
KW
4633static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4634{
4635 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4636 amdgpu_ring_write(ring, 0);
4637}
4638
cca02cd3
XY
4639static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4640{
d81a2209 4641 struct v9_ce_ib_state ce_payload = {0};
cca02cd3
XY
4642 uint64_t csa_addr;
4643 int cnt;
4644
4645 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
6f05c4e9 4646 csa_addr = amdgpu_csa_vaddr(ring->adev);
cca02cd3
XY
4647
4648 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4649 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4650 WRITE_DATA_DST_SEL(8) |
4651 WR_CONFIRM) |
4652 WRITE_DATA_CACHE_POLICY(0));
4653 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4654 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4655 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4656}
4657
4658static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4659{
d81a2209 4660 struct v9_de_ib_state de_payload = {0};
cca02cd3
XY
4661 uint64_t csa_addr, gds_addr;
4662 int cnt;
4663
6f05c4e9 4664 csa_addr = amdgpu_csa_vaddr(ring->adev);
cca02cd3
XY
4665 gds_addr = csa_addr + 4096;
4666 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4667 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4668
4669 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4670 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4671 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4672 WRITE_DATA_DST_SEL(8) |
4673 WR_CONFIRM) |
4674 WRITE_DATA_CACHE_POLICY(0));
4675 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4676 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4677 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4678}
4679
2ea6ab27
ML
4680static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4681{
4682 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4683 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4684}
4685
b1023571
KW
4686static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4687{
4688 uint32_t dw2 = 0;
4689
cca02cd3
XY
4690 if (amdgpu_sriov_vf(ring->adev))
4691 gfx_v9_0_ring_emit_ce_meta(ring);
4692
2ea6ab27
ML
4693 gfx_v9_0_ring_emit_tmz(ring, true);
4694
b1023571
KW
4695 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4696 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4697 /* set load_global_config & load_global_uconfig */
4698 dw2 |= 0x8001;
4699 /* set load_cs_sh_regs */
4700 dw2 |= 0x01000000;
4701 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4702 dw2 |= 0x10002;
4703
4704 /* set load_ce_ram if preamble presented */
4705 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4706 dw2 |= 0x10000000;
4707 } else {
4708 /* still load_ce_ram if this is the first time preamble presented
4709 * although there is no context switch happens.
4710 */
4711 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4712 dw2 |= 0x10000000;
4713 }
4714
4715 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4716 amdgpu_ring_write(ring, dw2);
4717 amdgpu_ring_write(ring, 0);
4718}
4719
9a5e02b5
ML
4720static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4721{
4722 unsigned ret;
4723 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4724 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4725 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4726 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4727 ret = ring->wptr & ring->buf_mask;
4728 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4729 return ret;
4730}
4731
4732static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4733{
4734 unsigned cur;
4735 BUG_ON(offset > ring->buf_mask);
4736 BUG_ON(ring->ring[offset] != 0x55aa55aa);
4737
4738 cur = (ring->wptr & ring->buf_mask) - 1;
4739 if (likely(cur > offset))
4740 ring->ring[offset] = cur - offset;
4741 else
4742 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4743}
4744
aa6faa44
XY
4745static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4746{
4747 struct amdgpu_device *adev = ring->adev;
4748
4749 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4750 amdgpu_ring_write(ring, 0 | /* src: register*/
4751 (5 << 8) | /* dst: memory */
4752 (1 << 20)); /* write confirm */
4753 amdgpu_ring_write(ring, reg);
4754 amdgpu_ring_write(ring, 0);
4755 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4756 adev->virt.reg_val_offs * 4));
4757 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4758 adev->virt.reg_val_offs * 4));
4759}
4760
4761static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
254e825b 4762 uint32_t val)
aa6faa44 4763{
254e825b
CK
4764 uint32_t cmd = 0;
4765
4766 switch (ring->funcs->type) {
4767 case AMDGPU_RING_TYPE_GFX:
4768 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4769 break;
4770 case AMDGPU_RING_TYPE_KIQ:
4771 cmd = (1 << 16); /* no inc addr */
4772 break;
4773 default:
4774 cmd = WR_CONFIRM;
4775 break;
4776 }
aa6faa44 4777 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
254e825b 4778 amdgpu_ring_write(ring, cmd);
aa6faa44
XY
4779 amdgpu_ring_write(ring, reg);
4780 amdgpu_ring_write(ring, 0);
4781 amdgpu_ring_write(ring, val);
4782}
4783
230fcc34
CK
4784static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4785 uint32_t val, uint32_t mask)
4786{
4787 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4788}
4789
10ed3c31
AD
4790static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4791 uint32_t reg0, uint32_t reg1,
4792 uint32_t ref, uint32_t mask)
4793{
4794 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
39b62541
ED
4795 struct amdgpu_device *adev = ring->adev;
4796 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4797 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
10ed3c31 4798
39b62541 4799 if (fw_version_ok)
58cd8fbc
CK
4800 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4801 ref, mask, 0x20);
4802 else
4803 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4804 ref, mask);
10ed3c31
AD
4805}
4806
80dbea47
CK
4807static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4808{
4809 struct amdgpu_device *adev = ring->adev;
4810 uint32_t value = 0;
4811
4812 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4813 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4814 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4815 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4816 WREG32(mmSQ_CMD, value);
4817}
4818
b1023571
KW
4819static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4820 enum amdgpu_interrupt_state state)
4821{
b1023571
KW
4822 switch (state) {
4823 case AMDGPU_IRQ_STATE_DISABLE:
b1023571 4824 case AMDGPU_IRQ_STATE_ENABLE:
9da2c652
TSD
4825 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4826 TIME_STAMP_INT_ENABLE,
4827 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
b1023571
KW
4828 break;
4829 default:
4830 break;
4831 }
4832}
4833
4834static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4835 int me, int pipe,
4836 enum amdgpu_interrupt_state state)
4837{
4838 u32 mec_int_cntl, mec_int_cntl_reg;
4839
4840 /*
d0c55cdf
AD
4841 * amdgpu controls only the first MEC. That's why this function only
4842 * handles the setting of interrupts for this specific MEC. All other
b1023571
KW
4843 * pipes' interrupts are set by amdkfd.
4844 */
4845
4846 if (me == 1) {
4847 switch (pipe) {
4848 case 0:
4849 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4850 break;
d0c55cdf
AD
4851 case 1:
4852 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4853 break;
4854 case 2:
4855 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4856 break;
4857 case 3:
4858 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4859 break;
b1023571
KW
4860 default:
4861 DRM_DEBUG("invalid pipe %d\n", pipe);
4862 return;
4863 }
4864 } else {
4865 DRM_DEBUG("invalid me %d\n", me);
4866 return;
4867 }
4868
4869 switch (state) {
4870 case AMDGPU_IRQ_STATE_DISABLE:
4871 mec_int_cntl = RREG32(mec_int_cntl_reg);
4872 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4873 TIME_STAMP_INT_ENABLE, 0);
4874 WREG32(mec_int_cntl_reg, mec_int_cntl);
4875 break;
4876 case AMDGPU_IRQ_STATE_ENABLE:
4877 mec_int_cntl = RREG32(mec_int_cntl_reg);
4878 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4879 TIME_STAMP_INT_ENABLE, 1);
4880 WREG32(mec_int_cntl_reg, mec_int_cntl);
4881 break;
4882 default:
4883 break;
4884 }
4885}
4886
4887static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4888 struct amdgpu_irq_src *source,
4889 unsigned type,
4890 enum amdgpu_interrupt_state state)
4891{
b1023571
KW
4892 switch (state) {
4893 case AMDGPU_IRQ_STATE_DISABLE:
b1023571 4894 case AMDGPU_IRQ_STATE_ENABLE:
8dd553e1
TSD
4895 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4896 PRIV_REG_INT_ENABLE,
4897 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
b1023571
KW
4898 break;
4899 default:
4900 break;
4901 }
4902
4903 return 0;
4904}
4905
4906static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4907 struct amdgpu_irq_src *source,
4908 unsigned type,
4909 enum amdgpu_interrupt_state state)
4910{
b1023571
KW
4911 switch (state) {
4912 case AMDGPU_IRQ_STATE_DISABLE:
b1023571 4913 case AMDGPU_IRQ_STATE_ENABLE:
98709ca6
TSD
4914 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4915 PRIV_INSTR_INT_ENABLE,
4916 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
b1023571
KW
4917 default:
4918 break;
4919 }
4920
4921 return 0;
4922}
4923
760a1d55
FX
4924#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
4925 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4926 CP_ECC_ERROR_INT_ENABLE, 1)
4927
4928#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
4929 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4930 CP_ECC_ERROR_INT_ENABLE, 0)
4931
4932static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4933 struct amdgpu_irq_src *source,
4934 unsigned type,
4935 enum amdgpu_interrupt_state state)
4936{
4937 switch (state) {
4938 case AMDGPU_IRQ_STATE_DISABLE:
4939 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4940 CP_ECC_ERROR_INT_ENABLE, 0);
4941 DISABLE_ECC_ON_ME_PIPE(1, 0);
4942 DISABLE_ECC_ON_ME_PIPE(1, 1);
4943 DISABLE_ECC_ON_ME_PIPE(1, 2);
4944 DISABLE_ECC_ON_ME_PIPE(1, 3);
4945 break;
4946
4947 case AMDGPU_IRQ_STATE_ENABLE:
4948 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4949 CP_ECC_ERROR_INT_ENABLE, 1);
4950 ENABLE_ECC_ON_ME_PIPE(1, 0);
4951 ENABLE_ECC_ON_ME_PIPE(1, 1);
4952 ENABLE_ECC_ON_ME_PIPE(1, 2);
4953 ENABLE_ECC_ON_ME_PIPE(1, 3);
4954 break;
4955 default:
4956 break;
4957 }
4958
4959 return 0;
4960}
4961
4962
b1023571
KW
4963static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4964 struct amdgpu_irq_src *src,
4965 unsigned type,
4966 enum amdgpu_interrupt_state state)
4967{
4968 switch (type) {
4969 case AMDGPU_CP_IRQ_GFX_EOP:
4970 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4971 break;
4972 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4973 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4974 break;
4975 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4976 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4977 break;
4978 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4979 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4980 break;
4981 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4982 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4983 break;
4984 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4985 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4986 break;
4987 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4988 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4989 break;
4990 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4991 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4992 break;
4993 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4994 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4995 break;
4996 default:
4997 break;
4998 }
4999 return 0;
5000}
5001
5002static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5003 struct amdgpu_irq_src *source,
5004 struct amdgpu_iv_entry *entry)
5005{
5006 int i;
5007 u8 me_id, pipe_id, queue_id;
5008 struct amdgpu_ring *ring;
5009
5010 DRM_DEBUG("IH: CP EOP\n");
5011 me_id = (entry->ring_id & 0x0c) >> 2;
5012 pipe_id = (entry->ring_id & 0x03) >> 0;
5013 queue_id = (entry->ring_id & 0x70) >> 4;
5014
5015 switch (me_id) {
5016 case 0:
5017 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5018 break;
5019 case 1:
5020 case 2:
5021 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5022 ring = &adev->gfx.compute_ring[i];
5023 /* Per-queue interrupt is supported for MEC starting from VI.
5024 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5025 */
5026 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5027 amdgpu_fence_process(ring);
5028 }
5029 break;
5030 }
5031 return 0;
5032}
5033
898c2cb5
CK
5034static void gfx_v9_0_fault(struct amdgpu_device *adev,
5035 struct amdgpu_iv_entry *entry)
5036{
5037 u8 me_id, pipe_id, queue_id;
5038 struct amdgpu_ring *ring;
5039 int i;
5040
5041 me_id = (entry->ring_id & 0x0c) >> 2;
5042 pipe_id = (entry->ring_id & 0x03) >> 0;
5043 queue_id = (entry->ring_id & 0x70) >> 4;
5044
5045 switch (me_id) {
5046 case 0:
5047 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5048 break;
5049 case 1:
5050 case 2:
5051 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5052 ring = &adev->gfx.compute_ring[i];
5053 if (ring->me == me_id && ring->pipe == pipe_id &&
5054 ring->queue == queue_id)
5055 drm_sched_fault(&ring->sched);
5056 }
5057 break;
5058 }
5059}
5060
b1023571
KW
5061static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5062 struct amdgpu_irq_src *source,
5063 struct amdgpu_iv_entry *entry)
5064{
5065 DRM_ERROR("Illegal register access in command stream\n");
898c2cb5 5066 gfx_v9_0_fault(adev, entry);
b1023571
KW
5067 return 0;
5068}
5069
5070static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5071 struct amdgpu_irq_src *source,
5072 struct amdgpu_iv_entry *entry)
5073{
5074 DRM_ERROR("Illegal instruction in command stream\n");
898c2cb5 5075 gfx_v9_0_fault(adev, entry);
b1023571
KW
5076 return 0;
5077}
5078
760a1d55
FX
5079static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5080 struct amdgpu_iv_entry *entry)
5081{
5082 /* TODO ue will trigger an interrupt. */
9b54d201 5083 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
760a1d55
FX
5084 amdgpu_ras_reset_gpu(adev, 0);
5085 return AMDGPU_RAS_UE;
5086}
5087
5088static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5089 struct amdgpu_irq_src *source,
5090 struct amdgpu_iv_entry *entry)
5091{
14cfde84 5092 struct ras_common_if *ras_if = adev->gfx.ras_if;
760a1d55 5093 struct ras_dispatch_if ih_data = {
760a1d55
FX
5094 .entry = entry,
5095 };
14cfde84 5096
5097 if (!ras_if)
5098 return 0;
5099
5100 ih_data.head = *ras_if;
5101
760a1d55
FX
5102 DRM_ERROR("CP ECC ERROR IRQ\n");
5103 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5104 return 0;
5105}
5106
fa04b6ba 5107static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
b1023571
KW
5108 .name = "gfx_v9_0",
5109 .early_init = gfx_v9_0_early_init,
5110 .late_init = gfx_v9_0_late_init,
5111 .sw_init = gfx_v9_0_sw_init,
5112 .sw_fini = gfx_v9_0_sw_fini,
5113 .hw_init = gfx_v9_0_hw_init,
5114 .hw_fini = gfx_v9_0_hw_fini,
5115 .suspend = gfx_v9_0_suspend,
5116 .resume = gfx_v9_0_resume,
5117 .is_idle = gfx_v9_0_is_idle,
5118 .wait_for_idle = gfx_v9_0_wait_for_idle,
5119 .soft_reset = gfx_v9_0_soft_reset,
5120 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5121 .set_powergating_state = gfx_v9_0_set_powergating_state,
12ad27fa 5122 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
b1023571
KW
5123};
5124
5125static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5126 .type = AMDGPU_RING_TYPE_GFX,
5127 .align_mask = 0xff,
5128 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5129 .support_64bit_ptrs = true,
0eeb68b3 5130 .vmhub = AMDGPU_GFXHUB,
b1023571
KW
5131 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5132 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5133 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
e9d672b2
ML
5134 .emit_frame_size = /* totally 242 maximum if 16 IBs */
5135 5 + /* COND_EXEC */
5136 7 + /* PIPELINE_SYNC */
f732b6b3
CK
5137 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5138 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5139 2 + /* VM_FLUSH */
e9d672b2
ML
5140 8 + /* FENCE for VM_FLUSH */
5141 20 + /* GDS switch */
5142 4 + /* double SWITCH_BUFFER,
5143 the first COND_EXEC jump to the place just
5144 prior to this double SWITCH_BUFFER */
5145 5 + /* COND_EXEC */
5146 7 + /* HDP_flush */
5147 4 + /* VGT_flush */
5148 14 + /* CE_META */
5149 31 + /* DE_META */
5150 3 + /* CNTX_CTRL */
5151 5 + /* HDP_INVL */
5152 8 + 8 + /* FENCE x2 */
5153 2, /* SWITCH_BUFFER */
b1023571
KW
5154 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5155 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5156 .emit_fence = gfx_v9_0_ring_emit_fence,
5157 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5158 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5159 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5160 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
b1023571
KW
5161 .test_ring = gfx_v9_0_ring_test_ring,
5162 .test_ib = gfx_v9_0_ring_test_ib,
5163 .insert_nop = amdgpu_ring_insert_nop,
5164 .pad_ib = amdgpu_ring_generic_pad_ib,
5165 .emit_switch_buffer = gfx_v9_ring_emit_sb,
5166 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
9a5e02b5
ML
5167 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5168 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
3b4d68e9 5169 .emit_tmz = gfx_v9_0_ring_emit_tmz,
254e825b 5170 .emit_wreg = gfx_v9_0_ring_emit_wreg,
230fcc34 5171 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
10ed3c31 5172 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
80dbea47 5173 .soft_recovery = gfx_v9_0_ring_soft_recovery,
b1023571
KW
5174};
5175
5176static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5177 .type = AMDGPU_RING_TYPE_COMPUTE,
5178 .align_mask = 0xff,
5179 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5180 .support_64bit_ptrs = true,
0eeb68b3 5181 .vmhub = AMDGPU_GFXHUB,
b1023571
KW
5182 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5183 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5184 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5185 .emit_frame_size =
5186 20 + /* gfx_v9_0_ring_emit_gds_switch */
5187 7 + /* gfx_v9_0_ring_emit_hdp_flush */
2ee150cd 5188 5 + /* hdp invalidate */
b1023571 5189 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
f732b6b3
CK
5190 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5191 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5192 2 + /* gfx_v9_0_ring_emit_vm_flush */
b1023571 5193 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
41cca166 5194 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
b1023571
KW
5195 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5196 .emit_fence = gfx_v9_0_ring_emit_fence,
5197 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5198 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5199 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5200 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
b1023571
KW
5201 .test_ring = gfx_v9_0_ring_test_ring,
5202 .test_ib = gfx_v9_0_ring_test_ib,
5203 .insert_nop = amdgpu_ring_insert_nop,
5204 .pad_ib = amdgpu_ring_generic_pad_ib,
761c77c1 5205 .set_priority = gfx_v9_0_ring_set_priority_compute,
254e825b 5206 .emit_wreg = gfx_v9_0_ring_emit_wreg,
230fcc34 5207 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
10ed3c31 5208 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
b1023571
KW
5209};
5210
aa6faa44
XY
5211static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5212 .type = AMDGPU_RING_TYPE_KIQ,
5213 .align_mask = 0xff,
5214 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5215 .support_64bit_ptrs = true,
0eeb68b3 5216 .vmhub = AMDGPU_GFXHUB,
aa6faa44
XY
5217 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5218 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5219 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5220 .emit_frame_size =
5221 20 + /* gfx_v9_0_ring_emit_gds_switch */
5222 7 + /* gfx_v9_0_ring_emit_hdp_flush */
2ee150cd 5223 5 + /* hdp invalidate */
aa6faa44 5224 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
f732b6b3
CK
5225 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5226 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5227 2 + /* gfx_v9_0_ring_emit_vm_flush */
aa6faa44 5228 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
41cca166 5229 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
aa6faa44 5230 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
aa6faa44 5231 .test_ring = gfx_v9_0_ring_test_ring,
aa6faa44
XY
5232 .insert_nop = amdgpu_ring_insert_nop,
5233 .pad_ib = amdgpu_ring_generic_pad_ib,
5234 .emit_rreg = gfx_v9_0_ring_emit_rreg,
5235 .emit_wreg = gfx_v9_0_ring_emit_wreg,
230fcc34 5236 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
10ed3c31 5237 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
aa6faa44 5238};
b1023571
KW
5239
5240static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5241{
5242 int i;
5243
aa6faa44
XY
5244 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5245
b1023571
KW
5246 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5247 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5248
5249 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5250 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5251}
5252
5253static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5254 .set = gfx_v9_0_set_eop_interrupt_state,
5255 .process = gfx_v9_0_eop_irq,
5256};
5257
5258static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5259 .set = gfx_v9_0_set_priv_reg_fault_state,
5260 .process = gfx_v9_0_priv_reg_irq,
5261};
5262
5263static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5264 .set = gfx_v9_0_set_priv_inst_fault_state,
5265 .process = gfx_v9_0_priv_inst_irq,
5266};
5267
760a1d55
FX
5268static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5269 .set = gfx_v9_0_set_cp_ecc_error_state,
5270 .process = gfx_v9_0_cp_ecc_error_irq,
5271};
5272
5273
b1023571
KW
5274static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5275{
5276 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5277 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5278
5279 adev->gfx.priv_reg_irq.num_types = 1;
5280 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5281
5282 adev->gfx.priv_inst_irq.num_types = 1;
5283 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
760a1d55
FX
5284
5285 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5286 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
b1023571
KW
5287}
5288
5289static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5290{
5291 switch (adev->asic_type) {
5292 case CHIP_VEGA10:
8b399477 5293 case CHIP_VEGA12:
61324ddc 5294 case CHIP_VEGA20:
a4dc61f5 5295 case CHIP_RAVEN:
b1023571
KW
5296 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5297 break;
5298 default:
5299 break;
5300 }
5301}
5302
5303static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5304{
5305 /* init asci gds info */
8bda1013
ED
5306 switch (adev->asic_type) {
5307 case CHIP_VEGA10:
5308 case CHIP_VEGA12:
5309 case CHIP_VEGA20:
5310 adev->gds.mem.total_size = 0x10000;
5311 break;
5312 case CHIP_RAVEN:
5313 adev->gds.mem.total_size = 0x1000;
5314 break;
5315 default:
5316 adev->gds.mem.total_size = 0x10000;
5317 break;
5318 }
5319
41cca166
MO
5320 switch (adev->asic_type) {
5321 case CHIP_VEGA10:
5322 case CHIP_VEGA20:
5323 adev->gds.gds_compute_max_wave_id = 0x7ff;
5324 break;
5325 case CHIP_VEGA12:
5326 adev->gds.gds_compute_max_wave_id = 0x27f;
5327 break;
5328 case CHIP_RAVEN:
5329 if (adev->rev_id >= 0x8)
5330 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5331 else
5332 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5333 break;
5334 default:
5335 /* this really depends on the chip */
5336 adev->gds.gds_compute_max_wave_id = 0x7ff;
5337 break;
5338 }
5339
b1023571
KW
5340 adev->gds.gws.total_size = 64;
5341 adev->gds.oa.total_size = 16;
5342
5343 if (adev->gds.mem.total_size == 64 * 1024) {
5344 adev->gds.mem.gfx_partition_size = 4096;
5345 adev->gds.mem.cs_partition_size = 4096;
5346
5347 adev->gds.gws.gfx_partition_size = 4;
5348 adev->gds.gws.cs_partition_size = 4;
5349
5350 adev->gds.oa.gfx_partition_size = 4;
5351 adev->gds.oa.cs_partition_size = 1;
5352 } else {
5353 adev->gds.mem.gfx_partition_size = 1024;
5354 adev->gds.mem.cs_partition_size = 1024;
5355
5356 adev->gds.gws.gfx_partition_size = 16;
5357 adev->gds.gws.cs_partition_size = 16;
5358
5359 adev->gds.oa.gfx_partition_size = 4;
5360 adev->gds.oa.cs_partition_size = 4;
5361 }
5362}
5363
c94d38f0
NH
5364static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5365 u32 bitmap)
5366{
5367 u32 data;
5368
5369 if (!bitmap)
5370 return;
5371
5372 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5373 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5374
5375 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5376}
5377
b1023571
KW
5378static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5379{
5380 u32 data, mask;
5381
5e78835a
TSD
5382 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5383 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
b1023571
KW
5384
5385 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5386 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5387
378506a7 5388 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
b1023571
KW
5389
5390 return (~data) & mask;
5391}
5392
5393static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5394 struct amdgpu_cu_info *cu_info)
5395{
5396 int i, j, k, counter, active_cu_number = 0;
5397 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
c94d38f0 5398 unsigned disable_masks[4 * 2];
b1023571
KW
5399
5400 if (!adev || !cu_info)
5401 return -EINVAL;
5402
c94d38f0
NH
5403 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5404
b1023571
KW
5405 mutex_lock(&adev->grbm_idx_mutex);
5406 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5407 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5408 mask = 1;
5409 ao_bitmap = 0;
5410 counter = 0;
5411 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
c94d38f0
NH
5412 if (i < 4 && j < 2)
5413 gfx_v9_0_set_user_cu_inactive_bitmap(
5414 adev, disable_masks[i * 2 + j]);
b1023571
KW
5415 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5416 cu_info->bitmap[i][j] = bitmap;
5417
fe723cd3 5418 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
b1023571 5419 if (bitmap & mask) {
fe723cd3 5420 if (counter < adev->gfx.config.max_cu_per_sh)
b1023571
KW
5421 ao_bitmap |= mask;
5422 counter ++;
5423 }
5424 mask <<= 1;
5425 }
5426 active_cu_number += counter;
dbfe85ea
FC
5427 if (i < 2 && j < 2)
5428 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5429 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
b1023571
KW
5430 }
5431 }
5432 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5433 mutex_unlock(&adev->grbm_idx_mutex);
5434
5435 cu_info->number = active_cu_number;
5436 cu_info->ao_cu_mask = ao_cu_mask;
d5a114a6 5437 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
b1023571
KW
5438
5439 return 0;
5440}
5441
b1023571
KW
5442const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5443{
5444 .type = AMD_IP_BLOCK_TYPE_GFX,
5445 .major = 9,
5446 .minor = 0,
5447 .rev = 0,
5448 .funcs = &gfx_v9_0_ip_funcs,
5449};