2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
30 #include "vega10/soc15ip.h"
31 #include "vega10/GC/gc_9_0_offset.h"
32 #include "vega10/GC/gc_9_0_sh_mask.h"
33 #include "vega10/vega10_enum.h"
34 #include "vega10/HDP/hdp_4_0_offset.h"
36 #include "soc15_common.h"
37 #include "clearstate_gfx9.h"
38 #include "v9_structs.h"
40 #define GFX9_NUM_GFX_RINGS 1
41 #define GFX9_NUM_COMPUTE_RINGS 8
43 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
45 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
46 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
47 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
48 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
49 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
50 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
52 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset
[] =
54 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
),
55 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID0
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID0
)},
56 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID1_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID1_SIZE
),
57 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID1
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID1
)},
58 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID2_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID2_SIZE
),
59 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID2
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID2
)},
60 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID3_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID3_SIZE
),
61 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID3
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID3
)},
62 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID4_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID4_SIZE
),
63 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID4
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID4
)},
64 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID5_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID5_SIZE
),
65 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID5
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID5
)},
66 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID6_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID6_SIZE
),
67 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID6
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID6
)},
68 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID7_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID7_SIZE
),
69 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID7
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID7
)},
70 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID8_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID8_SIZE
),
71 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID8
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID8
)},
72 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID9_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID9_SIZE
),
73 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID9
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID9
)},
74 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID10_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID10_SIZE
),
75 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID10
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID10
)},
76 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID11_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID11_SIZE
),
77 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID11
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID11
)},
78 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID12_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID12_SIZE
),
79 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID12
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID12
)},
80 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID13_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID13_SIZE
),
81 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID13
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID13
)},
82 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID14_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID14_SIZE
),
83 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID14
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID14
)},
84 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID15_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID15_SIZE
),
85 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID15
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID15
)}
88 static const u32 golden_settings_gc_9_0
[] =
90 SOC15_REG_OFFSET(GC
, 0, mmDB_DEBUG2
), 0xf00ffeff, 0x00000400,
91 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
), 0x00000003, 0x82400024,
92 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_ENHANCE
), 0x3fffffff, 0x00000001,
93 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
), 0x0000ff0f, 0x00000000,
94 SOC15_REG_OFFSET(GC
, 0, mmTA_CNTL_AUX
), 0xfffffeef, 0x010b0000,
95 SOC15_REG_OFFSET(GC
, 0, mmTCP_CHAN_STEER_HI
), 0xffffffff, 0x4a2c0e68,
96 SOC15_REG_OFFSET(GC
, 0, mmTCP_CHAN_STEER_LO
), 0xffffffff, 0xb5d3f197,
97 SOC15_REG_OFFSET(GC
, 0, mmVGT_GS_MAX_WAVE_ID
), 0x00000fff, 0x000003ff
100 static const u32 golden_settings_gc_9_0_vg10
[] =
102 SOC15_REG_OFFSET(GC
, 0, mmCB_HW_CONTROL
), 0x0000f000, 0x00012107,
103 SOC15_REG_OFFSET(GC
, 0, mmCB_HW_CONTROL_3
), 0x30000000, 0x10000000,
104 SOC15_REG_OFFSET(GC
, 0, mmGB_ADDR_CONFIG
), 0xffff77ff, 0x2a114042,
105 SOC15_REG_OFFSET(GC
, 0, mmGB_ADDR_CONFIG_READ
), 0xffff77ff, 0x2a114042,
106 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_ENHANCE_1
), 0x00008000, 0x00048000,
107 SOC15_REG_OFFSET(GC
, 0, mmRMI_UTCL1_CNTL2
), 0x00030000, 0x00020000,
108 SOC15_REG_OFFSET(GC
, 0, mmTD_CNTL
), 0x00001800, 0x00000800,
109 SOC15_REG_OFFSET(GC
, 0, mmSPI_CONFIG_CNTL_1
),0x0000000f, 0x00000007
112 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
114 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
);
115 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
);
116 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
);
117 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
);
118 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
119 struct amdgpu_cu_info
*cu_info
);
120 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
);
121 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
);
123 static void gfx_v9_0_init_golden_registers(struct amdgpu_device
*adev
)
125 switch (adev
->asic_type
) {
127 amdgpu_program_register_sequence(adev
,
128 golden_settings_gc_9_0
,
129 (const u32
)ARRAY_SIZE(golden_settings_gc_9_0
));
130 amdgpu_program_register_sequence(adev
,
131 golden_settings_gc_9_0_vg10
,
132 (const u32
)ARRAY_SIZE(golden_settings_gc_9_0_vg10
));
139 static void gfx_v9_0_scratch_init(struct amdgpu_device
*adev
)
141 adev
->gfx
.scratch
.num_reg
= 7;
142 adev
->gfx
.scratch
.reg_base
= SOC15_REG_OFFSET(GC
, 0, mmSCRATCH_REG0
);
143 adev
->gfx
.scratch
.free_mask
= (1u << adev
->gfx
.scratch
.num_reg
) - 1;
146 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring
*ring
, int eng_sel
,
147 bool wc
, uint32_t reg
, uint32_t val
)
149 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
150 amdgpu_ring_write(ring
, WRITE_DATA_ENGINE_SEL(eng_sel
) |
151 WRITE_DATA_DST_SEL(0) |
152 (wc
? WR_CONFIRM
: 0));
153 amdgpu_ring_write(ring
, reg
);
154 amdgpu_ring_write(ring
, 0);
155 amdgpu_ring_write(ring
, val
);
158 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring
*ring
, int eng_sel
,
159 int mem_space
, int opt
, uint32_t addr0
,
160 uint32_t addr1
, uint32_t ref
, uint32_t mask
,
163 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
164 amdgpu_ring_write(ring
,
165 /* memory (1) or register (0) */
166 (WAIT_REG_MEM_MEM_SPACE(mem_space
) |
167 WAIT_REG_MEM_OPERATION(opt
) | /* wait */
168 WAIT_REG_MEM_FUNCTION(3) | /* equal */
169 WAIT_REG_MEM_ENGINE(eng_sel
)));
172 BUG_ON(addr0
& 0x3); /* Dword align */
173 amdgpu_ring_write(ring
, addr0
);
174 amdgpu_ring_write(ring
, addr1
);
175 amdgpu_ring_write(ring
, ref
);
176 amdgpu_ring_write(ring
, mask
);
177 amdgpu_ring_write(ring
, inv
); /* poll interval */
180 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring
*ring
)
182 struct amdgpu_device
*adev
= ring
->adev
;
188 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
190 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r
);
193 WREG32(scratch
, 0xCAFEDEAD);
194 r
= amdgpu_ring_alloc(ring
, 3);
196 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
198 amdgpu_gfx_scratch_free(adev
, scratch
);
201 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
202 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
203 amdgpu_ring_write(ring
, 0xDEADBEEF);
204 amdgpu_ring_commit(ring
);
206 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
207 tmp
= RREG32(scratch
);
208 if (tmp
== 0xDEADBEEF)
212 if (i
< adev
->usec_timeout
) {
213 DRM_INFO("ring test on %d succeeded in %d usecs\n",
216 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
217 ring
->idx
, scratch
, tmp
);
220 amdgpu_gfx_scratch_free(adev
, scratch
);
224 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
226 struct amdgpu_device
*adev
= ring
->adev
;
228 struct dma_fence
*f
= NULL
;
233 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
235 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r
);
238 WREG32(scratch
, 0xCAFEDEAD);
239 memset(&ib
, 0, sizeof(ib
));
240 r
= amdgpu_ib_get(adev
, NULL
, 256, &ib
);
242 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r
);
245 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
246 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
));
247 ib
.ptr
[2] = 0xDEADBEEF;
250 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
254 r
= dma_fence_wait_timeout(f
, false, timeout
);
256 DRM_ERROR("amdgpu: IB test timed out.\n");
260 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
263 tmp
= RREG32(scratch
);
264 if (tmp
== 0xDEADBEEF) {
265 DRM_INFO("ib test on ring %d succeeded\n", ring
->idx
);
268 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
273 amdgpu_ib_free(adev
, &ib
, NULL
);
276 amdgpu_gfx_scratch_free(adev
, scratch
);
280 static int gfx_v9_0_init_microcode(struct amdgpu_device
*adev
)
282 const char *chip_name
;
285 struct amdgpu_firmware_info
*info
= NULL
;
286 const struct common_firmware_header
*header
= NULL
;
287 const struct gfx_firmware_header_v1_0
*cp_hdr
;
291 switch (adev
->asic_type
) {
293 chip_name
= "vega10";
299 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp.bin", chip_name
);
300 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
303 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
306 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
307 adev
->gfx
.pfp_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
308 adev
->gfx
.pfp_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
310 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me.bin", chip_name
);
311 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
314 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
317 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
318 adev
->gfx
.me_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
319 adev
->gfx
.me_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
321 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce.bin", chip_name
);
322 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
325 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
328 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
329 adev
->gfx
.ce_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
330 adev
->gfx
.ce_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
332 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc.bin", chip_name
);
333 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
336 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
337 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.rlc_fw
->data
;
338 adev
->gfx
.rlc_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
339 adev
->gfx
.rlc_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
341 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec.bin", chip_name
);
342 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
345 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
348 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
349 adev
->gfx
.mec_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
350 adev
->gfx
.mec_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
353 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2.bin", chip_name
);
354 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
356 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
359 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
360 adev
->gfx
.mec2_fw
->data
;
361 adev
->gfx
.mec2_fw_version
=
362 le32_to_cpu(cp_hdr
->header
.ucode_version
);
363 adev
->gfx
.mec2_feature_version
=
364 le32_to_cpu(cp_hdr
->ucode_feature_version
);
367 adev
->gfx
.mec2_fw
= NULL
;
370 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
371 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
372 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
373 info
->fw
= adev
->gfx
.pfp_fw
;
374 header
= (const struct common_firmware_header
*)info
->fw
->data
;
375 adev
->firmware
.fw_size
+=
376 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
378 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
379 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
380 info
->fw
= adev
->gfx
.me_fw
;
381 header
= (const struct common_firmware_header
*)info
->fw
->data
;
382 adev
->firmware
.fw_size
+=
383 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
385 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
386 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
387 info
->fw
= adev
->gfx
.ce_fw
;
388 header
= (const struct common_firmware_header
*)info
->fw
->data
;
389 adev
->firmware
.fw_size
+=
390 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
392 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
393 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
394 info
->fw
= adev
->gfx
.rlc_fw
;
395 header
= (const struct common_firmware_header
*)info
->fw
->data
;
396 adev
->firmware
.fw_size
+=
397 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
399 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
400 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
401 info
->fw
= adev
->gfx
.mec_fw
;
402 header
= (const struct common_firmware_header
*)info
->fw
->data
;
403 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
404 adev
->firmware
.fw_size
+=
405 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
407 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1_JT
];
408 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1_JT
;
409 info
->fw
= adev
->gfx
.mec_fw
;
410 adev
->firmware
.fw_size
+=
411 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
413 if (adev
->gfx
.mec2_fw
) {
414 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
415 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
416 info
->fw
= adev
->gfx
.mec2_fw
;
417 header
= (const struct common_firmware_header
*)info
->fw
->data
;
418 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
419 adev
->firmware
.fw_size
+=
420 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
421 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2_JT
];
422 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2_JT
;
423 info
->fw
= adev
->gfx
.mec2_fw
;
424 adev
->firmware
.fw_size
+=
425 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
433 "gfx9: Failed to load firmware \"%s\"\n",
435 release_firmware(adev
->gfx
.pfp_fw
);
436 adev
->gfx
.pfp_fw
= NULL
;
437 release_firmware(adev
->gfx
.me_fw
);
438 adev
->gfx
.me_fw
= NULL
;
439 release_firmware(adev
->gfx
.ce_fw
);
440 adev
->gfx
.ce_fw
= NULL
;
441 release_firmware(adev
->gfx
.rlc_fw
);
442 adev
->gfx
.rlc_fw
= NULL
;
443 release_firmware(adev
->gfx
.mec_fw
);
444 adev
->gfx
.mec_fw
= NULL
;
445 release_firmware(adev
->gfx
.mec2_fw
);
446 adev
->gfx
.mec2_fw
= NULL
;
451 static void gfx_v9_0_mec_fini(struct amdgpu_device
*adev
)
455 if (adev
->gfx
.mec
.hpd_eop_obj
) {
456 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.hpd_eop_obj
, false);
457 if (unlikely(r
!= 0))
458 dev_warn(adev
->dev
, "(%d) reserve HPD EOP bo failed\n", r
);
459 amdgpu_bo_unpin(adev
->gfx
.mec
.hpd_eop_obj
);
460 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
462 amdgpu_bo_unref(&adev
->gfx
.mec
.hpd_eop_obj
);
463 adev
->gfx
.mec
.hpd_eop_obj
= NULL
;
465 if (adev
->gfx
.mec
.mec_fw_obj
) {
466 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.mec_fw_obj
, false);
467 if (unlikely(r
!= 0))
468 dev_warn(adev
->dev
, "(%d) reserve mec firmware bo failed\n", r
);
469 amdgpu_bo_unpin(adev
->gfx
.mec
.mec_fw_obj
);
470 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
472 amdgpu_bo_unref(&adev
->gfx
.mec
.mec_fw_obj
);
473 adev
->gfx
.mec
.mec_fw_obj
= NULL
;
477 #define MEC_HPD_SIZE 2048
479 static int gfx_v9_0_mec_init(struct amdgpu_device
*adev
)
483 const __le32
*fw_data
;
487 const struct gfx_firmware_header_v1_0
*mec_hdr
;
490 * we assign only 1 pipe because all other pipes will
493 adev
->gfx
.mec
.num_mec
= 1;
494 adev
->gfx
.mec
.num_pipe
= 1;
495 adev
->gfx
.mec
.num_queue
= adev
->gfx
.mec
.num_mec
* adev
->gfx
.mec
.num_pipe
* 8;
497 if (adev
->gfx
.mec
.hpd_eop_obj
== NULL
) {
498 r
= amdgpu_bo_create(adev
,
499 adev
->gfx
.mec
.num_queue
* MEC_HPD_SIZE
,
501 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
, NULL
,
502 &adev
->gfx
.mec
.hpd_eop_obj
);
504 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
509 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.hpd_eop_obj
, false);
510 if (unlikely(r
!= 0)) {
511 gfx_v9_0_mec_fini(adev
);
514 r
= amdgpu_bo_pin(adev
->gfx
.mec
.hpd_eop_obj
, AMDGPU_GEM_DOMAIN_GTT
,
515 &adev
->gfx
.mec
.hpd_eop_gpu_addr
);
517 dev_warn(adev
->dev
, "(%d) pin HDP EOP bo failed\n", r
);
518 gfx_v9_0_mec_fini(adev
);
521 r
= amdgpu_bo_kmap(adev
->gfx
.mec
.hpd_eop_obj
, (void **)&hpd
);
523 dev_warn(adev
->dev
, "(%d) map HDP EOP bo failed\n", r
);
524 gfx_v9_0_mec_fini(adev
);
528 memset(hpd
, 0, adev
->gfx
.mec
.hpd_eop_obj
->tbo
.mem
.size
);
530 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
531 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
533 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
535 fw_data
= (const __le32
*)
536 (adev
->gfx
.mec_fw
->data
+
537 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
538 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
) / 4;
540 if (adev
->gfx
.mec
.mec_fw_obj
== NULL
) {
541 r
= amdgpu_bo_create(adev
,
542 mec_hdr
->header
.ucode_size_bytes
,
544 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
, NULL
,
545 &adev
->gfx
.mec
.mec_fw_obj
);
547 dev_warn(adev
->dev
, "(%d) create mec firmware bo failed\n", r
);
552 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.mec_fw_obj
, false);
553 if (unlikely(r
!= 0)) {
554 gfx_v9_0_mec_fini(adev
);
557 r
= amdgpu_bo_pin(adev
->gfx
.mec
.mec_fw_obj
, AMDGPU_GEM_DOMAIN_GTT
,
558 &adev
->gfx
.mec
.mec_fw_gpu_addr
);
560 dev_warn(adev
->dev
, "(%d) pin mec firmware bo failed\n", r
);
561 gfx_v9_0_mec_fini(adev
);
564 r
= amdgpu_bo_kmap(adev
->gfx
.mec
.mec_fw_obj
, (void **)&fw
);
566 dev_warn(adev
->dev
, "(%d) map firmware bo failed\n", r
);
567 gfx_v9_0_mec_fini(adev
);
570 memcpy(fw
, fw_data
, fw_size
);
572 amdgpu_bo_kunmap(adev
->gfx
.mec
.mec_fw_obj
);
573 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
579 static void gfx_v9_0_kiq_fini(struct amdgpu_device
*adev
)
581 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
583 amdgpu_bo_free_kernel(&kiq
->eop_obj
, &kiq
->eop_gpu_addr
, NULL
);
586 static int gfx_v9_0_kiq_init(struct amdgpu_device
*adev
)
590 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
592 r
= amdgpu_bo_create_kernel(adev
, MEC_HPD_SIZE
, PAGE_SIZE
,
593 AMDGPU_GEM_DOMAIN_GTT
, &kiq
->eop_obj
,
594 &kiq
->eop_gpu_addr
, (void **)&hpd
);
596 dev_warn(adev
->dev
, "failed to create KIQ bo (%d).\n", r
);
600 memset(hpd
, 0, MEC_HPD_SIZE
);
602 r
= amdgpu_bo_reserve(kiq
->eop_obj
, false);
603 if (unlikely(r
!= 0))
604 dev_warn(adev
->dev
, "(%d) reserve kiq eop bo failed\n", r
);
605 amdgpu_bo_kunmap(kiq
->eop_obj
);
606 amdgpu_bo_unreserve(kiq
->eop_obj
);
611 static int gfx_v9_0_kiq_init_ring(struct amdgpu_device
*adev
,
612 struct amdgpu_ring
*ring
,
613 struct amdgpu_irq_src
*irq
)
615 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
618 r
= amdgpu_wb_get(adev
, &adev
->virt
.reg_val_offs
);
623 ring
->ring_obj
= NULL
;
624 ring
->use_doorbell
= true;
625 ring
->doorbell_index
= AMDGPU_DOORBELL_KIQ
;
626 if (adev
->gfx
.mec2_fw
) {
636 ring
->eop_gpu_addr
= kiq
->eop_gpu_addr
;
637 sprintf(ring
->name
, "kiq %d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
638 r
= amdgpu_ring_init(adev
, ring
, 1024,
639 irq
, AMDGPU_CP_KIQ_IRQ_DRIVER0
);
641 dev_warn(adev
->dev
, "(%d) failed to init kiq ring\n", r
);
645 static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring
*ring
,
646 struct amdgpu_irq_src
*irq
)
648 amdgpu_wb_free(ring
->adev
, ring
->adev
->virt
.reg_val_offs
);
649 amdgpu_ring_fini(ring
);
653 /* create MQD for each compute queue */
654 static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device
*adev
)
656 struct amdgpu_ring
*ring
= NULL
;
659 /* create MQD for KIQ */
660 ring
= &adev
->gfx
.kiq
.ring
;
661 if (!ring
->mqd_obj
) {
662 r
= amdgpu_bo_create_kernel(adev
, sizeof(struct v9_mqd
), PAGE_SIZE
,
663 AMDGPU_GEM_DOMAIN_GTT
, &ring
->mqd_obj
,
664 &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
666 dev_warn(adev
->dev
, "failed to create ring mqd ob (%d)", r
);
670 /*TODO: prepare MQD backup */
673 /* create MQD for each KCQ */
674 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
675 ring
= &adev
->gfx
.compute_ring
[i
];
676 if (!ring
->mqd_obj
) {
677 r
= amdgpu_bo_create_kernel(adev
, sizeof(struct v9_mqd
), PAGE_SIZE
,
678 AMDGPU_GEM_DOMAIN_GTT
, &ring
->mqd_obj
,
679 &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
681 dev_warn(adev
->dev
, "failed to create ring mqd ob (%d)", r
);
685 /* TODO: prepare MQD backup */
692 static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device
*adev
)
694 struct amdgpu_ring
*ring
= NULL
;
697 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
698 ring
= &adev
->gfx
.compute_ring
[i
];
699 amdgpu_bo_free_kernel(&ring
->mqd_obj
, &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
702 ring
= &adev
->gfx
.kiq
.ring
;
703 amdgpu_bo_free_kernel(&ring
->mqd_obj
, &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
706 static uint32_t wave_read_ind(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t address
)
708 WREG32(SOC15_REG_OFFSET(GC
, 0, mmSQ_IND_INDEX
),
709 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
710 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
711 (address
<< SQ_IND_INDEX__INDEX__SHIFT
) |
712 (SQ_IND_INDEX__FORCE_READ_MASK
));
713 return RREG32(SOC15_REG_OFFSET(GC
, 0, mmSQ_IND_DATA
));
716 static void wave_read_regs(struct amdgpu_device
*adev
, uint32_t simd
,
717 uint32_t wave
, uint32_t thread
,
718 uint32_t regno
, uint32_t num
, uint32_t *out
)
720 WREG32(SOC15_REG_OFFSET(GC
, 0, mmSQ_IND_INDEX
),
721 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
722 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
723 (regno
<< SQ_IND_INDEX__INDEX__SHIFT
) |
724 (thread
<< SQ_IND_INDEX__THREAD_ID__SHIFT
) |
725 (SQ_IND_INDEX__FORCE_READ_MASK
) |
726 (SQ_IND_INDEX__AUTO_INCR_MASK
));
728 *(out
++) = RREG32(SOC15_REG_OFFSET(GC
, 0, mmSQ_IND_DATA
));
731 static void gfx_v9_0_read_wave_data(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t *dst
, int *no_fields
)
733 /* type 1 wave data */
734 dst
[(*no_fields
)++] = 1;
735 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_STATUS
);
736 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_LO
);
737 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_HI
);
738 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_LO
);
739 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_HI
);
740 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_HW_ID
);
741 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW0
);
742 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW1
);
743 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_GPR_ALLOC
);
744 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_LDS_ALLOC
);
745 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TRAPSTS
);
746 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_STS
);
747 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_DBG0
);
748 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_M0
);
751 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device
*adev
, uint32_t simd
,
752 uint32_t wave
, uint32_t start
,
753 uint32_t size
, uint32_t *dst
)
757 start
+ SQIND_WAVE_SGPRS_OFFSET
, size
, dst
);
761 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs
= {
762 .get_gpu_clock_counter
= &gfx_v9_0_get_gpu_clock_counter
,
763 .select_se_sh
= &gfx_v9_0_select_se_sh
,
764 .read_wave_data
= &gfx_v9_0_read_wave_data
,
765 .read_wave_sgprs
= &gfx_v9_0_read_wave_sgprs
,
768 static void gfx_v9_0_gpu_early_init(struct amdgpu_device
*adev
)
772 adev
->gfx
.funcs
= &gfx_v9_0_gfx_funcs
;
774 switch (adev
->asic_type
) {
776 adev
->gfx
.config
.max_shader_engines
= 4;
777 adev
->gfx
.config
.max_tile_pipes
= 8; //??
778 adev
->gfx
.config
.max_cu_per_sh
= 16;
779 adev
->gfx
.config
.max_sh_per_se
= 1;
780 adev
->gfx
.config
.max_backends_per_se
= 4;
781 adev
->gfx
.config
.max_texture_channel_caches
= 16;
782 adev
->gfx
.config
.max_gprs
= 256;
783 adev
->gfx
.config
.max_gs_threads
= 32;
784 adev
->gfx
.config
.max_hw_contexts
= 8;
786 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
787 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
788 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
789 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
790 gb_addr_config
= VEGA10_GB_ADDR_CONFIG_GOLDEN
;
797 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
799 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
= 1 <<
801 adev
->gfx
.config
.gb_addr_config
,
804 adev
->gfx
.config
.gb_addr_config_fields
.num_banks
= 1 <<
806 adev
->gfx
.config
.gb_addr_config
,
809 adev
->gfx
.config
.gb_addr_config_fields
.max_compress_frags
= 1 <<
811 adev
->gfx
.config
.gb_addr_config
,
813 MAX_COMPRESSED_FRAGS
);
814 adev
->gfx
.config
.gb_addr_config_fields
.num_rb_per_se
= 1 <<
816 adev
->gfx
.config
.gb_addr_config
,
819 adev
->gfx
.config
.gb_addr_config_fields
.num_se
= 1 <<
821 adev
->gfx
.config
.gb_addr_config
,
824 adev
->gfx
.config
.gb_addr_config_fields
.pipe_interleave_size
= 1 << (8 +
826 adev
->gfx
.config
.gb_addr_config
,
828 PIPE_INTERLEAVE_SIZE
));
831 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device
*adev
,
832 struct amdgpu_ngg_buf
*ngg_buf
,
839 dev_err(adev
->dev
, "Buffer size is invalid: %d\n", size_se
);
842 size_se
= size_se
? size_se
: default_size_se
;
844 ngg_buf
->size
= size_se
* GFX9_NUM_SE
;
845 r
= amdgpu_bo_create_kernel(adev
, ngg_buf
->size
,
846 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_VRAM
,
851 dev_err(adev
->dev
, "(%d) failed to create NGG buffer\n", r
);
854 ngg_buf
->bo_size
= amdgpu_bo_size(ngg_buf
->bo
);
859 static int gfx_v9_0_ngg_fini(struct amdgpu_device
*adev
)
863 for (i
= 0; i
< NGG_BUF_MAX
; i
++)
864 amdgpu_bo_free_kernel(&adev
->gfx
.ngg
.buf
[i
].bo
,
865 &adev
->gfx
.ngg
.buf
[i
].gpu_addr
,
868 memset(&adev
->gfx
.ngg
.buf
[0], 0,
869 sizeof(struct amdgpu_ngg_buf
) * NGG_BUF_MAX
);
871 adev
->gfx
.ngg
.init
= false;
876 static int gfx_v9_0_ngg_init(struct amdgpu_device
*adev
)
880 if (!amdgpu_ngg
|| adev
->gfx
.ngg
.init
== true)
883 /* GDS reserve memory: 64 bytes alignment */
884 adev
->gfx
.ngg
.gds_reserve_size
= ALIGN(5 * 4, 0x40);
885 adev
->gds
.mem
.total_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
886 adev
->gds
.mem
.gfx_partition_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
887 adev
->gfx
.ngg
.gds_reserve_addr
= amdgpu_gds_reg_offset
[0].mem_base
;
888 adev
->gfx
.ngg
.gds_reserve_addr
+= adev
->gds
.mem
.gfx_partition_size
;
890 /* Primitive Buffer */
891 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[PRIM
],
892 amdgpu_prim_buf_per_se
,
895 dev_err(adev
->dev
, "Failed to create Primitive Buffer\n");
899 /* Position Buffer */
900 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[POS
],
901 amdgpu_pos_buf_per_se
,
904 dev_err(adev
->dev
, "Failed to create Position Buffer\n");
908 /* Control Sideband */
909 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[CNTL
],
910 amdgpu_cntl_sb_buf_per_se
,
913 dev_err(adev
->dev
, "Failed to create Control Sideband Buffer\n");
917 /* Parameter Cache, not created by default */
918 if (amdgpu_param_buf_per_se
<= 0)
921 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[PARAM
],
922 amdgpu_param_buf_per_se
,
925 dev_err(adev
->dev
, "Failed to create Parameter Cache\n");
930 adev
->gfx
.ngg
.init
= true;
933 gfx_v9_0_ngg_fini(adev
);
937 static int gfx_v9_0_ngg_en(struct amdgpu_device
*adev
)
939 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
948 /* Program buffer size */
950 size
= adev
->gfx
.ngg
.buf
[PRIM
].size
/ 256;
951 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_1
, INDEX_BUF_SIZE
, size
);
953 size
= adev
->gfx
.ngg
.buf
[POS
].size
/ 256;
954 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_1
, POS_BUF_SIZE
, size
);
956 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_BUF_RESOURCE_1
), data
);
959 size
= adev
->gfx
.ngg
.buf
[CNTL
].size
/ 256;
960 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_2
, CNTL_SB_BUF_SIZE
, size
);
962 size
= adev
->gfx
.ngg
.buf
[PARAM
].size
/ 1024;
963 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_2
, PARAM_BUF_SIZE
, size
);
965 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_BUF_RESOURCE_2
), data
);
967 /* Program buffer base address */
968 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[PRIM
].gpu_addr
);
969 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE
, BASE
, base
);
970 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_INDEX_BUF_BASE
), data
);
972 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[PRIM
].gpu_addr
);
973 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI
, BASE_HI
, base
);
974 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_INDEX_BUF_BASE_HI
), data
);
976 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[POS
].gpu_addr
);
977 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE
, BASE
, base
);
978 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_POS_BUF_BASE
), data
);
980 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[POS
].gpu_addr
);
981 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE_HI
, BASE_HI
, base
);
982 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_POS_BUF_BASE_HI
), data
);
984 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[CNTL
].gpu_addr
);
985 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE
, BASE
, base
);
986 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_CNTL_SB_BUF_BASE
), data
);
988 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[CNTL
].gpu_addr
);
989 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI
, BASE_HI
, base
);
990 WREG32(SOC15_REG_OFFSET(GC
, 0, mmWD_CNTL_SB_BUF_BASE_HI
), data
);
992 /* Clear GDS reserved memory */
993 r
= amdgpu_ring_alloc(ring
, 17);
995 DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
1000 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1001 amdgpu_gds_reg_offset
[0].mem_size
,
1002 (adev
->gds
.mem
.total_size
+
1003 adev
->gfx
.ngg
.gds_reserve_size
) >>
1006 amdgpu_ring_write(ring
, PACKET3(PACKET3_DMA_DATA
, 5));
1007 amdgpu_ring_write(ring
, (PACKET3_DMA_DATA_CP_SYNC
|
1008 PACKET3_DMA_DATA_SRC_SEL(2)));
1009 amdgpu_ring_write(ring
, 0);
1010 amdgpu_ring_write(ring
, 0);
1011 amdgpu_ring_write(ring
, adev
->gfx
.ngg
.gds_reserve_addr
);
1012 amdgpu_ring_write(ring
, 0);
1013 amdgpu_ring_write(ring
, adev
->gfx
.ngg
.gds_reserve_size
);
1016 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1017 amdgpu_gds_reg_offset
[0].mem_size
, 0);
1019 amdgpu_ring_commit(ring
);
1024 static int gfx_v9_0_sw_init(void *handle
)
1027 struct amdgpu_ring
*ring
;
1028 struct amdgpu_kiq
*kiq
;
1029 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1032 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 178, &adev
->gfx
.kiq
.irq
);
1037 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 181, &adev
->gfx
.eop_irq
);
1041 /* Privileged reg */
1042 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 184,
1043 &adev
->gfx
.priv_reg_irq
);
1047 /* Privileged inst */
1048 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 185,
1049 &adev
->gfx
.priv_inst_irq
);
1053 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
1055 gfx_v9_0_scratch_init(adev
);
1057 r
= gfx_v9_0_init_microcode(adev
);
1059 DRM_ERROR("Failed to load gfx firmware!\n");
1063 r
= gfx_v9_0_mec_init(adev
);
1065 DRM_ERROR("Failed to init MEC BOs!\n");
1069 /* set up the gfx ring */
1070 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
1071 ring
= &adev
->gfx
.gfx_ring
[i
];
1072 ring
->ring_obj
= NULL
;
1073 sprintf(ring
->name
, "gfx");
1074 ring
->use_doorbell
= true;
1075 ring
->doorbell_index
= AMDGPU_DOORBELL64_GFX_RING0
<< 1;
1076 r
= amdgpu_ring_init(adev
, ring
, 1024,
1077 &adev
->gfx
.eop_irq
, AMDGPU_CP_IRQ_GFX_EOP
);
1082 /* set up the compute queues */
1083 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
1086 /* max 32 queues per MEC */
1087 if ((i
>= 32) || (i
>= AMDGPU_MAX_COMPUTE_RINGS
)) {
1088 DRM_ERROR("Too many (%d) compute rings!\n", i
);
1091 ring
= &adev
->gfx
.compute_ring
[i
];
1092 ring
->ring_obj
= NULL
;
1093 ring
->use_doorbell
= true;
1094 ring
->doorbell_index
= (AMDGPU_DOORBELL64_MEC_RING0
+ i
) << 1;
1095 ring
->me
= 1; /* first MEC */
1097 ring
->queue
= i
% 8;
1098 ring
->eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
+ (i
* MEC_HPD_SIZE
);
1099 sprintf(ring
->name
, "comp %d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1100 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ring
->pipe
;
1101 /* type-2 packets are deprecated on MEC, use type-3 instead */
1102 r
= amdgpu_ring_init(adev
, ring
, 1024,
1103 &adev
->gfx
.eop_irq
, irq_type
);
1108 if (amdgpu_sriov_vf(adev
)) {
1109 r
= gfx_v9_0_kiq_init(adev
);
1111 DRM_ERROR("Failed to init KIQ BOs!\n");
1115 kiq
= &adev
->gfx
.kiq
;
1116 r
= gfx_v9_0_kiq_init_ring(adev
, &kiq
->ring
, &kiq
->irq
);
1120 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1121 r
= gfx_v9_0_compute_mqd_sw_init(adev
);
1126 /* reserve GDS, GWS and OA resource for gfx */
1127 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.mem
.gfx_partition_size
,
1128 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GDS
,
1129 &adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
1133 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.gws
.gfx_partition_size
,
1134 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GWS
,
1135 &adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
1139 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.oa
.gfx_partition_size
,
1140 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_OA
,
1141 &adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
1145 adev
->gfx
.ce_ram_size
= 0x8000;
1147 gfx_v9_0_gpu_early_init(adev
);
1149 r
= gfx_v9_0_ngg_init(adev
);
1157 static int gfx_v9_0_sw_fini(void *handle
)
1160 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1162 amdgpu_bo_free_kernel(&adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
1163 amdgpu_bo_free_kernel(&adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
1164 amdgpu_bo_free_kernel(&adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
1166 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1167 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
1168 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1169 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
1171 if (amdgpu_sriov_vf(adev
)) {
1172 gfx_v9_0_compute_mqd_sw_fini(adev
);
1173 gfx_v9_0_kiq_free_ring(&adev
->gfx
.kiq
.ring
, &adev
->gfx
.kiq
.irq
);
1174 gfx_v9_0_kiq_fini(adev
);
1177 gfx_v9_0_mec_fini(adev
);
1178 gfx_v9_0_ngg_fini(adev
);
1184 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
1189 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
)
1191 u32 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
, 1);
1193 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff)) {
1194 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1195 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1196 } else if (se_num
== 0xffffffff) {
1197 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1198 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1199 } else if (sh_num
== 0xffffffff) {
1200 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1201 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1203 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1204 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1206 WREG32( SOC15_REG_OFFSET(GC
, 0, mmGRBM_GFX_INDEX
), data
);
1209 static u32
gfx_v9_0_create_bitmask(u32 bit_width
)
1211 return (u32
)((1ULL << bit_width
) - 1);
1214 static u32
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device
*adev
)
1218 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCC_RB_BACKEND_DISABLE
));
1219 data
|= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGC_USER_RB_BACKEND_DISABLE
));
1221 data
&= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK
;
1222 data
>>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT
;
1224 mask
= gfx_v9_0_create_bitmask(adev
->gfx
.config
.max_backends_per_se
/
1225 adev
->gfx
.config
.max_sh_per_se
);
1227 return (~data
) & mask
;
1230 static void gfx_v9_0_setup_rb(struct amdgpu_device
*adev
)
1235 u32 rb_bitmap_width_per_sh
= adev
->gfx
.config
.max_backends_per_se
/
1236 adev
->gfx
.config
.max_sh_per_se
;
1238 mutex_lock(&adev
->grbm_idx_mutex
);
1239 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1240 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1241 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1242 data
= gfx_v9_0_get_rb_active_bitmap(adev
);
1243 active_rbs
|= data
<< ((i
* adev
->gfx
.config
.max_sh_per_se
+ j
) *
1244 rb_bitmap_width_per_sh
);
1247 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1248 mutex_unlock(&adev
->grbm_idx_mutex
);
1250 adev
->gfx
.config
.backend_enable_mask
= active_rbs
;
1251 adev
->gfx
.config
.num_rbs
= hweight32(active_rbs
);
1254 #define DEFAULT_SH_MEM_BASES (0x6000)
1255 #define FIRST_COMPUTE_VMID (8)
1256 #define LAST_COMPUTE_VMID (16)
1257 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device
*adev
)
1260 uint32_t sh_mem_config
;
1261 uint32_t sh_mem_bases
;
1264 * Configure apertures:
1265 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1266 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1267 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1269 sh_mem_bases
= DEFAULT_SH_MEM_BASES
| (DEFAULT_SH_MEM_BASES
<< 16);
1271 sh_mem_config
= SH_MEM_ADDRESS_MODE_64
|
1272 SH_MEM_ALIGNMENT_MODE_UNALIGNED
<<
1273 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT
;
1275 mutex_lock(&adev
->srbm_mutex
);
1276 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1277 soc15_grbm_select(adev
, 0, 0, 0, i
);
1278 /* CP and shaders */
1279 WREG32(SOC15_REG_OFFSET(GC
, 0, mmSH_MEM_CONFIG
), sh_mem_config
);
1280 WREG32(SOC15_REG_OFFSET(GC
, 0, mmSH_MEM_BASES
), sh_mem_bases
);
1282 soc15_grbm_select(adev
, 0, 0, 0, 0);
1283 mutex_unlock(&adev
->srbm_mutex
);
1286 static void gfx_v9_0_gpu_init(struct amdgpu_device
*adev
)
1291 WREG32_FIELD15(GC
, 0, GRBM_CNTL
, READ_TIMEOUT
, 0xff);
1293 gfx_v9_0_tiling_mode_table_init(adev
);
1295 gfx_v9_0_setup_rb(adev
);
1296 gfx_v9_0_get_cu_info(adev
, &adev
->gfx
.cu_info
);
1298 /* XXX SH_MEM regs */
1299 /* where to put LDS, scratch, GPUVM in FSA64 space */
1300 mutex_lock(&adev
->srbm_mutex
);
1301 for (i
= 0; i
< 16; i
++) {
1302 soc15_grbm_select(adev
, 0, 0, 0, i
);
1303 /* CP and shaders */
1305 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
1306 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
1307 WREG32(SOC15_REG_OFFSET(GC
, 0, mmSH_MEM_CONFIG
), tmp
);
1308 WREG32(SOC15_REG_OFFSET(GC
, 0, mmSH_MEM_BASES
), 0);
1310 soc15_grbm_select(adev
, 0, 0, 0, 0);
1312 mutex_unlock(&adev
->srbm_mutex
);
1314 gfx_v9_0_init_compute_vmid(adev
);
1316 mutex_lock(&adev
->grbm_idx_mutex
);
1318 * making sure that the following register writes will be broadcasted
1319 * to all the shaders
1321 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1323 WREG32(SOC15_REG_OFFSET(GC
, 0, mmPA_SC_FIFO_SIZE
),
1324 (adev
->gfx
.config
.sc_prim_fifo_size_frontend
<<
1325 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT
) |
1326 (adev
->gfx
.config
.sc_prim_fifo_size_backend
<<
1327 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT
) |
1328 (adev
->gfx
.config
.sc_hiz_tile_fifo_size
<<
1329 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT
) |
1330 (adev
->gfx
.config
.sc_earlyz_tile_fifo_size
<<
1331 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT
));
1332 mutex_unlock(&adev
->grbm_idx_mutex
);
1336 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device
*adev
)
1341 mutex_lock(&adev
->grbm_idx_mutex
);
1342 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1343 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1344 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1345 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1346 if (RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SERDES_CU_MASTER_BUSY
)) == 0)
1352 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1353 mutex_unlock(&adev
->grbm_idx_mutex
);
1355 mask
= RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK
|
1356 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK
|
1357 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK
|
1358 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK
;
1359 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1360 if ((RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SERDES_NONCU_MASTER_BUSY
)) & mask
) == 0)
1366 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
1369 u32 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_INT_CNTL_RING0
));
1374 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
, enable
? 1 : 0);
1375 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
, enable
? 1 : 0);
1376 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
, enable
? 1 : 0);
1377 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
, enable
? 1 : 0);
1379 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_INT_CNTL_RING0
), tmp
);
1382 void gfx_v9_0_rlc_stop(struct amdgpu_device
*adev
)
1384 u32 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CNTL
));
1386 tmp
= REG_SET_FIELD(tmp
, RLC_CNTL
, RLC_ENABLE_F32
, 0);
1387 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CNTL
), tmp
);
1389 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
1391 gfx_v9_0_wait_for_rlc_serdes(adev
);
1394 static void gfx_v9_0_rlc_reset(struct amdgpu_device
*adev
)
1396 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
1398 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
1402 static void gfx_v9_0_rlc_start(struct amdgpu_device
*adev
)
1404 #ifdef AMDGPU_RLC_DEBUG_RETRY
1408 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 1);
1410 /* carrizo do enable cp interrupt after cp inited */
1411 if (!(adev
->flags
& AMD_IS_APU
))
1412 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
1416 #ifdef AMDGPU_RLC_DEBUG_RETRY
1417 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
1418 rlc_ucode_ver
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_GENERAL_6
));
1419 if(rlc_ucode_ver
== 0x108) {
1420 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
1421 rlc_ucode_ver
, adev
->gfx
.rlc_fw_version
);
1422 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
1423 * default is 0x9C4 to create a 100us interval */
1424 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_TIMER_INT_3
), 0x9C4);
1425 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
1426 * to disable the page fault retry interrupts, default is
1428 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_GENERAL_12
), 0x100);
1433 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device
*adev
)
1435 const struct rlc_firmware_header_v2_0
*hdr
;
1436 const __le32
*fw_data
;
1437 unsigned i
, fw_size
;
1439 if (!adev
->gfx
.rlc_fw
)
1442 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
1443 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
1445 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
1446 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1447 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
1449 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_UCODE_ADDR
),
1450 RLCG_UCODE_LOADING_START_ADDRESS
);
1451 for (i
= 0; i
< fw_size
; i
++)
1452 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_UCODE_DATA
), le32_to_cpup(fw_data
++));
1453 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_UCODE_ADDR
), adev
->gfx
.rlc_fw_version
);
1458 static int gfx_v9_0_rlc_resume(struct amdgpu_device
*adev
)
1462 if (amdgpu_sriov_vf(adev
))
1465 gfx_v9_0_rlc_stop(adev
);
1468 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL
), 0);
1471 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), 0);
1473 gfx_v9_0_rlc_reset(adev
);
1475 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
1476 /* legacy rlc firmware loading */
1477 r
= gfx_v9_0_rlc_load_microcode(adev
);
1482 gfx_v9_0_rlc_start(adev
);
1487 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
1490 u32 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_ME_CNTL
));
1492 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, enable
? 0 : 1);
1493 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, enable
? 0 : 1);
1494 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, enable
? 0 : 1);
1496 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1497 adev
->gfx
.gfx_ring
[i
].ready
= false;
1499 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_ME_CNTL
), tmp
);
1503 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
1505 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
1506 const struct gfx_firmware_header_v1_0
*ce_hdr
;
1507 const struct gfx_firmware_header_v1_0
*me_hdr
;
1508 const __le32
*fw_data
;
1509 unsigned i
, fw_size
;
1511 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
1514 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
1515 adev
->gfx
.pfp_fw
->data
;
1516 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
1517 adev
->gfx
.ce_fw
->data
;
1518 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
1519 adev
->gfx
.me_fw
->data
;
1521 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
1522 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
1523 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
1525 gfx_v9_0_cp_gfx_enable(adev
, false);
1528 fw_data
= (const __le32
*)
1529 (adev
->gfx
.pfp_fw
->data
+
1530 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
1531 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
) / 4;
1532 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_PFP_UCODE_ADDR
), 0);
1533 for (i
= 0; i
< fw_size
; i
++)
1534 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_PFP_UCODE_DATA
), le32_to_cpup(fw_data
++));
1535 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_PFP_UCODE_ADDR
), adev
->gfx
.pfp_fw_version
);
1538 fw_data
= (const __le32
*)
1539 (adev
->gfx
.ce_fw
->data
+
1540 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
1541 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
) / 4;
1542 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_CE_UCODE_ADDR
), 0);
1543 for (i
= 0; i
< fw_size
; i
++)
1544 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_CE_UCODE_DATA
), le32_to_cpup(fw_data
++));
1545 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_CE_UCODE_ADDR
), adev
->gfx
.ce_fw_version
);
1548 fw_data
= (const __le32
*)
1549 (adev
->gfx
.me_fw
->data
+
1550 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
1551 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
) / 4;
1552 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_ME_RAM_WADDR
), 0);
1553 for (i
= 0; i
< fw_size
; i
++)
1554 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_ME_RAM_DATA
), le32_to_cpup(fw_data
++));
1555 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_ME_RAM_WADDR
), adev
->gfx
.me_fw_version
);
1560 static u32
gfx_v9_0_get_csb_size(struct amdgpu_device
*adev
)
1563 const struct cs_section_def
*sect
= NULL
;
1564 const struct cs_extent_def
*ext
= NULL
;
1566 /* begin clear state */
1568 /* context control state */
1571 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
1572 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
1573 if (sect
->id
== SECT_CONTEXT
)
1574 count
+= 2 + ext
->reg_count
;
1579 /* pa_sc_raster_config/pa_sc_raster_config1 */
1581 /* end clear state */
1589 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device
*adev
)
1591 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
1592 const struct cs_section_def
*sect
= NULL
;
1593 const struct cs_extent_def
*ext
= NULL
;
1597 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MAX_CONTEXT
), adev
->gfx
.config
.max_hw_contexts
- 1);
1598 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_DEVICE_ID
), 1);
1600 gfx_v9_0_cp_gfx_enable(adev
, true);
1602 r
= amdgpu_ring_alloc(ring
, gfx_v9_0_get_csb_size(adev
) + 4);
1604 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
1608 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1609 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
1611 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
1612 amdgpu_ring_write(ring
, 0x80000000);
1613 amdgpu_ring_write(ring
, 0x80000000);
1615 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
1616 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
1617 if (sect
->id
== SECT_CONTEXT
) {
1618 amdgpu_ring_write(ring
,
1619 PACKET3(PACKET3_SET_CONTEXT_REG
,
1621 amdgpu_ring_write(ring
,
1622 ext
->reg_index
- PACKET3_SET_CONTEXT_REG_START
);
1623 for (i
= 0; i
< ext
->reg_count
; i
++)
1624 amdgpu_ring_write(ring
, ext
->extent
[i
]);
1629 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1630 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
1632 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
1633 amdgpu_ring_write(ring
, 0);
1635 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
1636 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
1637 amdgpu_ring_write(ring
, 0x8000);
1638 amdgpu_ring_write(ring
, 0x8000);
1640 amdgpu_ring_commit(ring
);
1645 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device
*adev
)
1647 struct amdgpu_ring
*ring
;
1650 u64 rb_addr
, rptr_addr
, wptr_gpu_addr
;
1652 /* Set the write pointer delay */
1653 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_DELAY
), 0);
1655 /* set the RB to use vmid 0 */
1656 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_VMID
), 0);
1658 /* Set ring buffer size */
1659 ring
= &adev
->gfx
.gfx_ring
[0];
1660 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
1661 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
1662 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
1664 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
1666 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_CNTL
), tmp
);
1668 /* Initialize the ring buffer's write pointers */
1670 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_WPTR
), lower_32_bits(ring
->wptr
));
1671 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_WPTR_HI
), upper_32_bits(ring
->wptr
));
1673 /* set the wb address wether it's enabled or not */
1674 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
1675 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_RPTR_ADDR
), lower_32_bits(rptr_addr
));
1676 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_RPTR_ADDR_HI
), upper_32_bits(rptr_addr
) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
1678 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
1679 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
), lower_32_bits(wptr_gpu_addr
));
1680 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
), upper_32_bits(wptr_gpu_addr
));
1683 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_CNTL
), tmp
);
1685 rb_addr
= ring
->gpu_addr
>> 8;
1686 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_BASE
), rb_addr
);
1687 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_BASE_HI
), upper_32_bits(rb_addr
));
1689 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_DOORBELL_CONTROL
));
1690 if (ring
->use_doorbell
) {
1691 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
1692 DOORBELL_OFFSET
, ring
->doorbell_index
);
1693 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
1696 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
1698 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_DOORBELL_CONTROL
), tmp
);
1700 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
1701 DOORBELL_RANGE_LOWER
, ring
->doorbell_index
);
1702 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_DOORBELL_RANGE_LOWER
), tmp
);
1704 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_DOORBELL_RANGE_UPPER
),
1705 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
1708 /* start the ring */
1709 gfx_v9_0_cp_gfx_start(adev
);
1715 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
1720 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_CNTL
), 0);
1722 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_CNTL
),
1723 (CP_MEC_CNTL__MEC_ME1_HALT_MASK
| CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
1724 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1725 adev
->gfx
.compute_ring
[i
].ready
= false;
1726 adev
->gfx
.kiq
.ring
.ready
= false;
1731 static int gfx_v9_0_cp_compute_start(struct amdgpu_device
*adev
)
1733 gfx_v9_0_cp_compute_enable(adev
, true);
1738 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
1740 const struct gfx_firmware_header_v1_0
*mec_hdr
;
1741 const __le32
*fw_data
;
1745 if (!adev
->gfx
.mec_fw
)
1748 gfx_v9_0_cp_compute_enable(adev
, false);
1750 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1751 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
1753 fw_data
= (const __le32
*)
1754 (adev
->gfx
.mec_fw
->data
+
1755 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
1757 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, VMID
, 0);
1758 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, CACHE_POLICY
, 0);
1759 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_CPC_IC_BASE_CNTL
), tmp
);
1761 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_CPC_IC_BASE_LO
),
1762 adev
->gfx
.mec
.mec_fw_gpu_addr
& 0xFFFFF000);
1763 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_CPC_IC_BASE_HI
),
1764 upper_32_bits(adev
->gfx
.mec
.mec_fw_gpu_addr
));
1767 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
),
1768 mec_hdr
->jt_offset
);
1769 for (i
= 0; i
< mec_hdr
->jt_size
; i
++)
1770 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_ME1_UCODE_DATA
),
1771 le32_to_cpup(fw_data
+ mec_hdr
->jt_offset
+ i
));
1773 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
),
1774 adev
->gfx
.mec_fw_version
);
1775 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
1780 static void gfx_v9_0_cp_compute_fini(struct amdgpu_device
*adev
)
1784 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
1785 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
1787 if (ring
->mqd_obj
) {
1788 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
1789 if (unlikely(r
!= 0))
1790 dev_warn(adev
->dev
, "(%d) reserve MQD bo failed\n", r
);
1792 amdgpu_bo_unpin(ring
->mqd_obj
);
1793 amdgpu_bo_unreserve(ring
->mqd_obj
);
1795 amdgpu_bo_unref(&ring
->mqd_obj
);
1796 ring
->mqd_obj
= NULL
;
1801 static int gfx_v9_0_init_queue(struct amdgpu_ring
*ring
);
1803 static int gfx_v9_0_cp_compute_resume(struct amdgpu_device
*adev
)
1806 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
1807 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
1808 if (gfx_v9_0_init_queue(ring
))
1809 dev_warn(adev
->dev
, "compute queue %d init failed!\n", i
);
1812 r
= gfx_v9_0_cp_compute_start(adev
);
1820 static void gfx_v9_0_kiq_setting(struct amdgpu_ring
*ring
)
1823 struct amdgpu_device
*adev
= ring
->adev
;
1825 /* tell RLC which is KIQ queue */
1826 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CP_SCHEDULERS
));
1828 tmp
|= (ring
->me
<< 5) | (ring
->pipe
<< 3) | (ring
->queue
);
1829 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CP_SCHEDULERS
), tmp
);
1831 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CP_SCHEDULERS
), tmp
);
1834 static void gfx_v9_0_kiq_enable(struct amdgpu_ring
*ring
)
1836 amdgpu_ring_alloc(ring
, 8);
1838 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_RESOURCES
, 6));
1839 amdgpu_ring_write(ring
, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
1840 amdgpu_ring_write(ring
, 0x000000FF); /* queue mask lo */
1841 amdgpu_ring_write(ring
, 0); /* queue mask hi */
1842 amdgpu_ring_write(ring
, 0); /* gws mask lo */
1843 amdgpu_ring_write(ring
, 0); /* gws mask hi */
1844 amdgpu_ring_write(ring
, 0); /* oac mask */
1845 amdgpu_ring_write(ring
, 0); /* gds heap base:0, gds heap size:0 */
1846 amdgpu_ring_commit(ring
);
1850 static void gfx_v9_0_map_queue_enable(struct amdgpu_ring
*kiq_ring
,
1851 struct amdgpu_ring
*ring
)
1853 struct amdgpu_device
*adev
= kiq_ring
->adev
;
1854 uint64_t mqd_addr
, wptr_addr
;
1856 mqd_addr
= amdgpu_bo_gpu_offset(ring
->mqd_obj
);
1857 wptr_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
1858 amdgpu_ring_alloc(kiq_ring
, 8);
1860 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
1861 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
1862 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
1863 (0 << 4) | /* Queue_Sel */
1864 (0 << 8) | /* VMID */
1865 (ring
->queue
<< 13 ) |
1866 (ring
->pipe
<< 16) |
1867 ((ring
->me
== 1 ? 0 : 1) << 18) |
1868 (0 << 21) | /*queue_type: normal compute queue */
1869 (1 << 24) | /* alloc format: all_on_one_pipe */
1870 (0 << 26) | /* engine_sel: compute */
1871 (1 << 29)); /* num_queues: must be 1 */
1872 amdgpu_ring_write(kiq_ring
, (ring
->doorbell_index
<< 2));
1873 amdgpu_ring_write(kiq_ring
, lower_32_bits(mqd_addr
));
1874 amdgpu_ring_write(kiq_ring
, upper_32_bits(mqd_addr
));
1875 amdgpu_ring_write(kiq_ring
, lower_32_bits(wptr_addr
));
1876 amdgpu_ring_write(kiq_ring
, upper_32_bits(wptr_addr
));
1877 amdgpu_ring_commit(kiq_ring
);
1881 static int gfx_v9_0_mqd_init(struct amdgpu_ring
*ring
)
1883 struct amdgpu_device
*adev
= ring
->adev
;
1884 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
1885 uint64_t hqd_gpu_addr
, wb_gpu_addr
, eop_base_addr
;
1888 mqd
->header
= 0xC0310800;
1889 mqd
->compute_pipelinestat_enable
= 0x00000001;
1890 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
1891 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
1892 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
1893 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
1894 mqd
->compute_misc_reserved
= 0x00000003;
1896 eop_base_addr
= ring
->eop_gpu_addr
>> 8;
1897 mqd
->cp_hqd_eop_base_addr_lo
= eop_base_addr
;
1898 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_base_addr
);
1900 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1901 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_CONTROL
));
1902 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
1903 (order_base_2(MEC_HPD_SIZE
/ 4) - 1));
1905 mqd
->cp_hqd_eop_control
= tmp
;
1907 /* enable doorbell? */
1908 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
));
1910 if (ring
->use_doorbell
) {
1911 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1912 DOORBELL_OFFSET
, ring
->doorbell_index
);
1913 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1915 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1916 DOORBELL_SOURCE
, 0);
1917 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1921 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1924 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
1926 /* disable the queue if it's active */
1928 mqd
->cp_hqd_dequeue_request
= 0;
1929 mqd
->cp_hqd_pq_rptr
= 0;
1930 mqd
->cp_hqd_pq_wptr_lo
= 0;
1931 mqd
->cp_hqd_pq_wptr_hi
= 0;
1933 /* set the pointer to the MQD */
1934 mqd
->cp_mqd_base_addr_lo
= ring
->mqd_gpu_addr
& 0xfffffffc;
1935 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
1937 /* set MQD vmid to 0 */
1938 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_CONTROL
));
1939 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
1940 mqd
->cp_mqd_control
= tmp
;
1942 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1943 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
1944 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
1945 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
1947 /* set up the HQD, this is similar to CP_RB0_CNTL */
1948 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_CONTROL
));
1949 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
1950 (order_base_2(ring
->ring_size
/ 4) - 1));
1951 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
1952 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
1954 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
1956 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
1957 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
1958 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
1959 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
1960 mqd
->cp_hqd_pq_control
= tmp
;
1962 /* set the wb address whether it's enabled or not */
1963 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
1964 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
1965 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
1966 upper_32_bits(wb_gpu_addr
) & 0xffff;
1968 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1969 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
1970 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
1971 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
1974 /* enable the doorbell if requested */
1975 if (ring
->use_doorbell
) {
1976 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
));
1977 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1978 DOORBELL_OFFSET
, ring
->doorbell_index
);
1980 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1982 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1983 DOORBELL_SOURCE
, 0);
1984 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1988 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
1990 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
1992 mqd
->cp_hqd_pq_rptr
= RREG32(mmCP_HQD_PQ_RPTR
);
1994 /* set the vmid for the queue */
1995 mqd
->cp_hqd_vmid
= 0;
1997 tmp
= RREG32(mmCP_HQD_PERSISTENT_STATE
);
1998 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
1999 mqd
->cp_hqd_persistent_state
= tmp
;
2001 /* activate the queue */
2002 mqd
->cp_hqd_active
= 1;
2007 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring
*ring
)
2009 struct amdgpu_device
*adev
= ring
->adev
;
2010 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2013 /* disable wptr polling */
2014 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
2016 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
),
2017 mqd
->cp_hqd_eop_base_addr_lo
);
2018 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
),
2019 mqd
->cp_hqd_eop_base_addr_hi
);
2021 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2022 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_CONTROL
),
2023 mqd
->cp_hqd_eop_control
);
2025 /* enable doorbell? */
2026 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
),
2027 mqd
->cp_hqd_pq_doorbell_control
);
2029 /* disable the queue if it's active */
2030 if (RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
)) & 1) {
2031 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
), 1);
2032 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
2033 if (!(RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
)) & 1))
2037 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
),
2038 mqd
->cp_hqd_dequeue_request
);
2039 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_RPTR
),
2040 mqd
->cp_hqd_pq_rptr
);
2041 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_LO
),
2042 mqd
->cp_hqd_pq_wptr_lo
);
2043 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_HI
),
2044 mqd
->cp_hqd_pq_wptr_hi
);
2047 /* set the pointer to the MQD */
2048 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_BASE_ADDR
),
2049 mqd
->cp_mqd_base_addr_lo
);
2050 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_BASE_ADDR_HI
),
2051 mqd
->cp_mqd_base_addr_hi
);
2053 /* set MQD vmid to 0 */
2054 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_CONTROL
),
2055 mqd
->cp_mqd_control
);
2057 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2058 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_BASE
),
2059 mqd
->cp_hqd_pq_base_lo
);
2060 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_BASE_HI
),
2061 mqd
->cp_hqd_pq_base_hi
);
2063 /* set up the HQD, this is similar to CP_RB0_CNTL */
2064 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_CONTROL
),
2065 mqd
->cp_hqd_pq_control
);
2067 /* set the wb address whether it's enabled or not */
2068 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
),
2069 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
2070 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
),
2071 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
2073 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2074 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
),
2075 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
2076 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
),
2077 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
2079 /* enable the doorbell if requested */
2080 if (ring
->use_doorbell
) {
2081 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
),
2082 (AMDGPU_DOORBELL64_KIQ
*2) << 2);
2083 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
),
2084 (AMDGPU_DOORBELL64_USERQUEUE_END
* 2) << 2);
2087 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
),
2088 mqd
->cp_hqd_pq_doorbell_control
);
2090 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2091 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_LO
),
2092 mqd
->cp_hqd_pq_wptr_lo
);
2093 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_HI
),
2094 mqd
->cp_hqd_pq_wptr_hi
);
2096 /* set the vmid for the queue */
2097 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_VMID
), mqd
->cp_hqd_vmid
);
2099 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PERSISTENT_STATE
),
2100 mqd
->cp_hqd_persistent_state
);
2102 /* activate the queue */
2103 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
),
2104 mqd
->cp_hqd_active
);
2106 if (ring
->use_doorbell
)
2107 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
2112 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring
*ring
)
2114 struct amdgpu_device
*adev
= ring
->adev
;
2115 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
2116 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2117 bool is_kiq
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_KIQ
);
2118 int mqd_idx
= AMDGPU_MAX_COMPUTE_RINGS
;
2121 gfx_v9_0_kiq_setting(&kiq
->ring
);
2123 mqd_idx
= ring
- &adev
->gfx
.compute_ring
[0];
2126 if (!adev
->gfx
.in_reset
) {
2127 memset((void *)mqd
, 0, sizeof(*mqd
));
2128 mutex_lock(&adev
->srbm_mutex
);
2129 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2130 gfx_v9_0_mqd_init(ring
);
2132 gfx_v9_0_kiq_init_register(ring
);
2133 soc15_grbm_select(adev
, 0, 0, 0, 0);
2134 mutex_unlock(&adev
->srbm_mutex
);
2136 } else { /* for GPU_RESET case */
2137 /* reset MQD to a clean status */
2139 /* reset ring buffer */
2143 mutex_lock(&adev
->srbm_mutex
);
2144 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2145 gfx_v9_0_kiq_init_register(ring
);
2146 soc15_grbm_select(adev
, 0, 0, 0, 0);
2147 mutex_unlock(&adev
->srbm_mutex
);
2152 gfx_v9_0_kiq_enable(ring
);
2154 gfx_v9_0_map_queue_enable(&kiq
->ring
, ring
);
2159 static int gfx_v9_0_kiq_resume(struct amdgpu_device
*adev
)
2161 struct amdgpu_ring
*ring
= NULL
;
2164 gfx_v9_0_cp_compute_enable(adev
, true);
2166 ring
= &adev
->gfx
.kiq
.ring
;
2168 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
2169 if (unlikely(r
!= 0))
2172 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
2174 r
= gfx_v9_0_kiq_init_queue(ring
);
2175 amdgpu_bo_kunmap(ring
->mqd_obj
);
2176 ring
->mqd_ptr
= NULL
;
2178 amdgpu_bo_unreserve(ring
->mqd_obj
);
2182 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2183 ring
= &adev
->gfx
.compute_ring
[i
];
2185 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
2186 if (unlikely(r
!= 0))
2188 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
2190 r
= gfx_v9_0_kiq_init_queue(ring
);
2191 amdgpu_bo_kunmap(ring
->mqd_obj
);
2192 ring
->mqd_ptr
= NULL
;
2194 amdgpu_bo_unreserve(ring
->mqd_obj
);
2203 static int gfx_v9_0_cp_resume(struct amdgpu_device
*adev
)
2206 struct amdgpu_ring
*ring
;
2208 if (!(adev
->flags
& AMD_IS_APU
))
2209 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
2211 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
2212 /* legacy firmware loading */
2213 r
= gfx_v9_0_cp_gfx_load_microcode(adev
);
2217 r
= gfx_v9_0_cp_compute_load_microcode(adev
);
2222 r
= gfx_v9_0_cp_gfx_resume(adev
);
2226 if (amdgpu_sriov_vf(adev
))
2227 r
= gfx_v9_0_kiq_resume(adev
);
2229 r
= gfx_v9_0_cp_compute_resume(adev
);
2233 ring
= &adev
->gfx
.gfx_ring
[0];
2234 r
= amdgpu_ring_test_ring(ring
);
2236 ring
->ready
= false;
2239 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2240 ring
= &adev
->gfx
.compute_ring
[i
];
2243 r
= amdgpu_ring_test_ring(ring
);
2245 ring
->ready
= false;
2248 if (amdgpu_sriov_vf(adev
)) {
2249 ring
= &adev
->gfx
.kiq
.ring
;
2251 r
= amdgpu_ring_test_ring(ring
);
2253 ring
->ready
= false;
2256 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
2261 static void gfx_v9_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
2263 gfx_v9_0_cp_gfx_enable(adev
, enable
);
2264 gfx_v9_0_cp_compute_enable(adev
, enable
);
2267 static int gfx_v9_0_hw_init(void *handle
)
2270 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2272 gfx_v9_0_init_golden_registers(adev
);
2274 gfx_v9_0_gpu_init(adev
);
2276 r
= gfx_v9_0_rlc_resume(adev
);
2280 r
= gfx_v9_0_cp_resume(adev
);
2284 r
= gfx_v9_0_ngg_en(adev
);
2291 static int gfx_v9_0_hw_fini(void *handle
)
2293 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2295 amdgpu_irq_put(adev
, &adev
->gfx
.priv_reg_irq
, 0);
2296 amdgpu_irq_put(adev
, &adev
->gfx
.priv_inst_irq
, 0);
2297 if (amdgpu_sriov_vf(adev
)) {
2298 pr_debug("For SRIOV client, shouldn't do anything.\n");
2301 gfx_v9_0_cp_enable(adev
, false);
2302 gfx_v9_0_rlc_stop(adev
);
2303 gfx_v9_0_cp_compute_fini(adev
);
2308 static int gfx_v9_0_suspend(void *handle
)
2310 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2312 return gfx_v9_0_hw_fini(adev
);
2315 static int gfx_v9_0_resume(void *handle
)
2317 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2319 return gfx_v9_0_hw_init(adev
);
2322 static bool gfx_v9_0_is_idle(void *handle
)
2324 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2326 if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_STATUS
)),
2327 GRBM_STATUS
, GUI_ACTIVE
))
2333 static int gfx_v9_0_wait_for_idle(void *handle
)
2337 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2339 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2340 /* read MC_STATUS */
2341 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_STATUS
)) &
2342 GRBM_STATUS__GUI_ACTIVE_MASK
;
2344 if (!REG_GET_FIELD(tmp
, GRBM_STATUS
, GUI_ACTIVE
))
2351 static int gfx_v9_0_soft_reset(void *handle
)
2353 u32 grbm_soft_reset
= 0;
2355 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2358 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_STATUS
));
2359 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
2360 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
2361 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__VGT_BUSY_MASK
|
2362 GRBM_STATUS__DB_BUSY_MASK
| GRBM_STATUS__CB_BUSY_MASK
|
2363 GRBM_STATUS__GDS_BUSY_MASK
| GRBM_STATUS__SPI_BUSY_MASK
|
2364 GRBM_STATUS__IA_BUSY_MASK
| GRBM_STATUS__IA_BUSY_NO_DMA_MASK
)) {
2365 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2366 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
2367 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2368 GRBM_SOFT_RESET
, SOFT_RESET_GFX
, 1);
2371 if (tmp
& (GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
2372 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2373 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
2377 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_STATUS2
));
2378 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
2379 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2380 GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
2383 if (grbm_soft_reset
) {
2385 gfx_v9_0_rlc_stop(adev
);
2387 /* Disable GFX parsing/prefetching */
2388 gfx_v9_0_cp_gfx_enable(adev
, false);
2390 /* Disable MEC parsing/prefetching */
2391 gfx_v9_0_cp_compute_enable(adev
, false);
2393 if (grbm_soft_reset
) {
2394 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_SOFT_RESET
));
2395 tmp
|= grbm_soft_reset
;
2396 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
2397 WREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_SOFT_RESET
), tmp
);
2398 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_SOFT_RESET
));
2402 tmp
&= ~grbm_soft_reset
;
2403 WREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_SOFT_RESET
), tmp
);
2404 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGRBM_SOFT_RESET
));
2407 /* Wait a little for things to settle down */
2413 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
2417 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
2418 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT
), 1);
2419 clock
= (uint64_t)RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPU_CLOCK_COUNT_LSB
)) |
2420 ((uint64_t)RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPU_CLOCK_COUNT_MSB
)) << 32ULL);
2421 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
2425 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
2427 uint32_t gds_base
, uint32_t gds_size
,
2428 uint32_t gws_base
, uint32_t gws_size
,
2429 uint32_t oa_base
, uint32_t oa_size
)
2431 gds_base
= gds_base
>> AMDGPU_GDS_SHIFT
;
2432 gds_size
= gds_size
>> AMDGPU_GDS_SHIFT
;
2434 gws_base
= gws_base
>> AMDGPU_GWS_SHIFT
;
2435 gws_size
= gws_size
>> AMDGPU_GWS_SHIFT
;
2437 oa_base
= oa_base
>> AMDGPU_OA_SHIFT
;
2438 oa_size
= oa_size
>> AMDGPU_OA_SHIFT
;
2441 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2442 amdgpu_gds_reg_offset
[vmid
].mem_base
,
2446 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2447 amdgpu_gds_reg_offset
[vmid
].mem_size
,
2451 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2452 amdgpu_gds_reg_offset
[vmid
].gws
,
2453 gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
2456 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2457 amdgpu_gds_reg_offset
[vmid
].oa
,
2458 (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
2461 static int gfx_v9_0_early_init(void *handle
)
2463 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2465 adev
->gfx
.num_gfx_rings
= GFX9_NUM_GFX_RINGS
;
2466 adev
->gfx
.num_compute_rings
= GFX9_NUM_COMPUTE_RINGS
;
2467 gfx_v9_0_set_ring_funcs(adev
);
2468 gfx_v9_0_set_irq_funcs(adev
);
2469 gfx_v9_0_set_gds_init(adev
);
2470 gfx_v9_0_set_rlc_funcs(adev
);
2475 static int gfx_v9_0_late_init(void *handle
)
2477 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2480 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_reg_irq
, 0);
2484 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_inst_irq
, 0);
2491 static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device
*adev
)
2493 uint32_t rlc_setting
, data
;
2496 if (adev
->gfx
.rlc
.in_safe_mode
)
2499 /* if RLC is not enabled, do nothing */
2500 rlc_setting
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CNTL
));
2501 if (!(rlc_setting
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
2504 if (adev
->cg_flags
&
2505 (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
|
2506 AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
2507 data
= RLC_SAFE_MODE__CMD_MASK
;
2508 data
|= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT
);
2509 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SAFE_MODE
), data
);
2511 /* wait for RLC_SAFE_MODE */
2512 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2513 if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC
, 0, mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
2517 adev
->gfx
.rlc
.in_safe_mode
= true;
2521 static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device
*adev
)
2523 uint32_t rlc_setting
, data
;
2525 if (!adev
->gfx
.rlc
.in_safe_mode
)
2528 /* if RLC is not enabled, do nothing */
2529 rlc_setting
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CNTL
));
2530 if (!(rlc_setting
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
2533 if (adev
->cg_flags
&
2534 (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
)) {
2536 * Try to exit safe mode only if it is already in safe
2539 data
= RLC_SAFE_MODE__CMD_MASK
;
2540 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SAFE_MODE
), data
);
2541 adev
->gfx
.rlc
.in_safe_mode
= false;
2545 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device
*adev
,
2550 /* It is disabled by HW by default */
2551 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
)) {
2552 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
2553 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
));
2554 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
|
2555 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
2556 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
2557 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
2559 /* only for Vega10 & Raven1 */
2560 data
|= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
;
2563 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
), data
);
2565 /* MGLS is a global flag to control all MGLS in GFX */
2566 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
2567 /* 2 - RLC memory Light sleep */
2568 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
) {
2569 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_MEM_SLP_CNTL
));
2570 data
|= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
2572 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_MEM_SLP_CNTL
), data
);
2574 /* 3 - CP memory Light sleep */
2575 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
) {
2576 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEM_SLP_CNTL
));
2577 data
|= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
2579 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEM_SLP_CNTL
), data
);
2583 /* 1 - MGCG_OVERRIDE */
2584 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
));
2585 data
|= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
|
2586 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
|
2587 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
2588 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
2589 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
2591 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
), data
);
2593 /* 2 - disable MGLS in RLC */
2594 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_MEM_SLP_CNTL
));
2595 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
) {
2596 data
&= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
2597 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_MEM_SLP_CNTL
), data
);
2600 /* 3 - disable MGLS in CP */
2601 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEM_SLP_CNTL
));
2602 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
) {
2603 data
&= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
2604 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEM_SLP_CNTL
), data
);
2609 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device
*adev
,
2614 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
2616 /* Enable 3D CGCG/CGLS */
2617 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
2618 /* write cmd to clear cgcg/cgls ov */
2619 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
));
2620 /* unset CGCG override */
2621 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK
;
2622 /* update CGCG and CGLS override bits */
2624 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
), data
);
2625 /* enable 3Dcgcg FSM(0x0020003f) */
2626 def
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
));
2627 data
= (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
2628 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
;
2629 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGLS
)
2630 data
|= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
2631 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
;
2633 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
), data
);
2635 /* set IDLE_POLL_COUNT(0x00900100) */
2636 def
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
));
2637 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
2638 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
2640 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
), data
);
2642 /* Disable CGCG/CGLS */
2643 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
));
2644 /* disable cgcg, cgls should be disabled */
2645 data
&= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
|
2646 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
);
2647 /* disable cgcg and cgls in FSM */
2649 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
), data
);
2652 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
2655 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device
*adev
,
2660 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
2662 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
)) {
2663 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
));
2664 /* unset CGCG override */
2665 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK
;
2666 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
2667 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
2669 data
|= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
2670 /* update CGCG and CGLS override bits */
2672 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
), data
);
2674 /* enable cgcg FSM(0x0020003F) */
2675 def
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL
));
2676 data
= (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
2677 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
;
2678 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
2679 data
|= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
2680 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
2682 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL
), data
);
2684 /* set IDLE_POLL_COUNT(0x00900100) */
2685 def
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
));
2686 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
2687 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
2689 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
), data
);
2691 def
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL
));
2692 /* reset CGCG/CGLS bits */
2693 data
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
| RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
2694 /* disable cgcg and cgls in FSM */
2696 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL
), data
);
2699 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
2702 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device
*adev
,
2706 /* CGCG/CGLS should be enabled after MGCG/MGLS
2707 * === MGCG + MGLS ===
2709 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
2710 /* === CGCG /CGLS for GFX 3D Only === */
2711 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
2712 /* === CGCG + CGLS === */
2713 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
2715 /* CGCG/CGLS should be disabled before MGCG/MGLS
2716 * === CGCG + CGLS ===
2718 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
2719 /* === CGCG /CGLS for GFX 3D Only === */
2720 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
2721 /* === MGCG + MGLS === */
2722 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
2727 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs
= {
2728 .enter_safe_mode
= gfx_v9_0_enter_rlc_safe_mode
,
2729 .exit_safe_mode
= gfx_v9_0_exit_rlc_safe_mode
2732 static int gfx_v9_0_set_powergating_state(void *handle
,
2733 enum amd_powergating_state state
)
2738 static int gfx_v9_0_set_clockgating_state(void *handle
,
2739 enum amd_clockgating_state state
)
2741 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2743 switch (adev
->asic_type
) {
2745 gfx_v9_0_update_gfx_clock_gating(adev
,
2746 state
== AMD_CG_STATE_GATE
? true : false);
2754 static void gfx_v9_0_get_clockgating_state(void *handle
, u32
*flags
)
2756 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2759 if (amdgpu_sriov_vf(adev
))
2762 /* AMD_CG_SUPPORT_GFX_MGCG */
2763 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
));
2764 if (!(data
& RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
))
2765 *flags
|= AMD_CG_SUPPORT_GFX_MGCG
;
2767 /* AMD_CG_SUPPORT_GFX_CGCG */
2768 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL
));
2769 if (data
& RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
)
2770 *flags
|= AMD_CG_SUPPORT_GFX_CGCG
;
2772 /* AMD_CG_SUPPORT_GFX_CGLS */
2773 if (data
& RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
)
2774 *flags
|= AMD_CG_SUPPORT_GFX_CGLS
;
2776 /* AMD_CG_SUPPORT_GFX_RLC_LS */
2777 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_MEM_SLP_CNTL
));
2778 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
)
2779 *flags
|= AMD_CG_SUPPORT_GFX_RLC_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
2781 /* AMD_CG_SUPPORT_GFX_CP_LS */
2782 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEM_SLP_CNTL
));
2783 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
)
2784 *flags
|= AMD_CG_SUPPORT_GFX_CP_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
2786 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
2787 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
));
2788 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
)
2789 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGCG
;
2791 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
2792 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
)
2793 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGLS
;
2796 static u64
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring
*ring
)
2798 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 is 32bit rptr*/
2801 static u64
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
2803 struct amdgpu_device
*adev
= ring
->adev
;
2806 /* XXX check if swapping is necessary on BE */
2807 if (ring
->use_doorbell
) {
2808 wptr
= atomic64_read((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
]);
2810 wptr
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_WPTR
));
2811 wptr
+= (u64
)RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_WPTR_HI
)) << 32;
2817 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
2819 struct amdgpu_device
*adev
= ring
->adev
;
2821 if (ring
->use_doorbell
) {
2822 /* XXX check if swapping is necessary on BE */
2823 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
2824 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
2826 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_WPTR
), lower_32_bits(ring
->wptr
));
2827 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB0_WPTR_HI
), upper_32_bits(ring
->wptr
));
2831 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring
*ring
)
2833 u32 ref_and_mask
, reg_mem_engine
;
2834 struct nbio_hdp_flush_reg
*nbio_hf_reg
;
2836 if (ring
->adev
->asic_type
== CHIP_VEGA10
)
2837 nbio_hf_reg
= &nbio_v6_1_hdp_flush_reg
;
2839 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
2842 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp2
<< ring
->pipe
;
2845 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp6
<< ring
->pipe
;
2852 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp0
;
2853 reg_mem_engine
= 1; /* pfp */
2856 gfx_v9_0_wait_reg_mem(ring
, reg_mem_engine
, 0, 1,
2857 nbio_hf_reg
->hdp_flush_req_offset
,
2858 nbio_hf_reg
->hdp_flush_done_offset
,
2859 ref_and_mask
, ref_and_mask
, 0x20);
2862 static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring
*ring
)
2864 gfx_v9_0_write_data_to_reg(ring
, 0, true,
2865 SOC15_REG_OFFSET(HDP
, 0, mmHDP_DEBUG0
), 1);
2868 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring
*ring
,
2869 struct amdgpu_ib
*ib
,
2870 unsigned vm_id
, bool ctx_switch
)
2872 u32 header
, control
= 0;
2874 if (ib
->flags
& AMDGPU_IB_FLAG_CE
)
2875 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
2877 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
2879 control
|= ib
->length_dw
| (vm_id
<< 24);
2881 if (amdgpu_sriov_vf(ring
->adev
) && (ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
))
2882 control
|= INDIRECT_BUFFER_PRE_ENB(1);
2884 amdgpu_ring_write(ring
, header
);
2885 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
2886 amdgpu_ring_write(ring
,
2890 lower_32_bits(ib
->gpu_addr
));
2891 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
2892 amdgpu_ring_write(ring
, control
);
2895 #define INDIRECT_BUFFER_VALID (1 << 23)
2897 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring
*ring
,
2898 struct amdgpu_ib
*ib
,
2899 unsigned vm_id
, bool ctx_switch
)
2901 u32 control
= INDIRECT_BUFFER_VALID
| ib
->length_dw
| (vm_id
<< 24);
2903 amdgpu_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
2904 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
2905 amdgpu_ring_write(ring
,
2909 lower_32_bits(ib
->gpu_addr
));
2910 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
2911 amdgpu_ring_write(ring
, control
);
2914 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
,
2915 u64 seq
, unsigned flags
)
2917 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
2918 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
2920 /* RELEASE_MEM - flush caches, send int */
2921 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 6));
2922 amdgpu_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
2924 EOP_TC_WB_ACTION_EN
|
2925 EOP_TC_MD_ACTION_EN
|
2926 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
2928 amdgpu_ring_write(ring
, DATA_SEL(write64bit
? 2 : 1) | INT_SEL(int_sel
? 2 : 0));
2931 * the address should be Qword aligned if 64bit write, Dword
2932 * aligned if only send 32bit data low (discard data high)
2938 amdgpu_ring_write(ring
, lower_32_bits(addr
));
2939 amdgpu_ring_write(ring
, upper_32_bits(addr
));
2940 amdgpu_ring_write(ring
, lower_32_bits(seq
));
2941 amdgpu_ring_write(ring
, upper_32_bits(seq
));
2942 amdgpu_ring_write(ring
, 0);
2945 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
2947 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
2948 uint32_t seq
= ring
->fence_drv
.sync_seq
;
2949 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
2951 gfx_v9_0_wait_reg_mem(ring
, usepfp
, 1, 0,
2952 lower_32_bits(addr
), upper_32_bits(addr
),
2953 seq
, 0xffffffff, 4);
2956 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
2957 unsigned vm_id
, uint64_t pd_addr
)
2959 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
2960 uint32_t req
= ring
->adev
->gart
.gart_funcs
->get_invalidate_req(vm_id
);
2961 unsigned eng
= ring
->idx
;
2964 pd_addr
= pd_addr
| 0x1; /* valid bit */
2965 /* now only use physical base address of PDE and valid */
2966 BUG_ON(pd_addr
& 0xFFFF00000000003EULL
);
2968 for (i
= 0; i
< AMDGPU_MAX_VMHUBS
; ++i
) {
2969 struct amdgpu_vmhub
*hub
= &ring
->adev
->vmhub
[i
];
2971 gfx_v9_0_write_data_to_reg(ring
, usepfp
, true,
2972 hub
->ctx0_ptb_addr_lo32
2974 lower_32_bits(pd_addr
));
2976 gfx_v9_0_write_data_to_reg(ring
, usepfp
, true,
2977 hub
->ctx0_ptb_addr_hi32
2979 upper_32_bits(pd_addr
));
2981 gfx_v9_0_write_data_to_reg(ring
, usepfp
, true,
2982 hub
->vm_inv_eng0_req
+ eng
, req
);
2984 /* wait for the invalidate to complete */
2985 gfx_v9_0_wait_reg_mem(ring
, 0, 0, 0, hub
->vm_inv_eng0_ack
+
2986 eng
, 0, 1 << vm_id
, 1 << vm_id
, 0x20);
2989 /* compute doesn't have PFP */
2991 /* sync PFP to ME, otherwise we might get invalid PFP reads */
2992 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
2993 amdgpu_ring_write(ring
, 0x0);
2997 static u64
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring
*ring
)
2999 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 hardware is 32bit rptr */
3002 static u64
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
3006 /* XXX check if swapping is necessary on BE */
3007 if (ring
->use_doorbell
)
3008 wptr
= atomic64_read((atomic64_t
*)&ring
->adev
->wb
.wb
[ring
->wptr_offs
]);
3014 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
3016 struct amdgpu_device
*adev
= ring
->adev
;
3018 /* XXX check if swapping is necessary on BE */
3019 if (ring
->use_doorbell
) {
3020 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
3021 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
3023 BUG(); /* only DOORBELL method supported on gfx9 now */
3027 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring
*ring
, u64 addr
,
3028 u64 seq
, unsigned int flags
)
3030 /* we only allocate 32bit for each seq wb address */
3031 BUG_ON(flags
& AMDGPU_FENCE_FLAG_64BIT
);
3033 /* write fence seq to the "addr" */
3034 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3035 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3036 WRITE_DATA_DST_SEL(5) | WR_CONFIRM
));
3037 amdgpu_ring_write(ring
, lower_32_bits(addr
));
3038 amdgpu_ring_write(ring
, upper_32_bits(addr
));
3039 amdgpu_ring_write(ring
, lower_32_bits(seq
));
3041 if (flags
& AMDGPU_FENCE_FLAG_INT
) {
3042 /* set register to trigger INT */
3043 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3044 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3045 WRITE_DATA_DST_SEL(0) | WR_CONFIRM
));
3046 amdgpu_ring_write(ring
, SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_STATUS
));
3047 amdgpu_ring_write(ring
, 0);
3048 amdgpu_ring_write(ring
, 0x20000000); /* src_id is 178 */
3052 static void gfx_v9_ring_emit_sb(struct amdgpu_ring
*ring
)
3054 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
3055 amdgpu_ring_write(ring
, 0);
3058 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
)
3060 static struct v9_ce_ib_state ce_payload
= {0};
3064 cnt
= (sizeof(ce_payload
) >> 2) + 4 - 2;
3065 csa_addr
= AMDGPU_VA_RESERVED_SIZE
- 2 * 4096;
3067 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
3068 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
3069 WRITE_DATA_DST_SEL(8) |
3071 WRITE_DATA_CACHE_POLICY(0));
3072 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
3073 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
3074 amdgpu_ring_write_multiple(ring
, (void *)&ce_payload
, sizeof(ce_payload
) >> 2);
3077 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring
*ring
)
3079 static struct v9_de_ib_state de_payload
= {0};
3080 uint64_t csa_addr
, gds_addr
;
3083 csa_addr
= AMDGPU_VA_RESERVED_SIZE
- 2 * 4096;
3084 gds_addr
= csa_addr
+ 4096;
3085 de_payload
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
3086 de_payload
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
3088 cnt
= (sizeof(de_payload
) >> 2) + 4 - 2;
3089 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
3090 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(1) |
3091 WRITE_DATA_DST_SEL(8) |
3093 WRITE_DATA_CACHE_POLICY(0));
3094 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
3095 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
3096 amdgpu_ring_write_multiple(ring
, (void *)&de_payload
, sizeof(de_payload
) >> 2);
3099 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring
*ring
, uint32_t flags
)
3103 if (amdgpu_sriov_vf(ring
->adev
))
3104 gfx_v9_0_ring_emit_ce_meta(ring
);
3106 dw2
|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
3107 if (flags
& AMDGPU_HAVE_CTX_SWITCH
) {
3108 /* set load_global_config & load_global_uconfig */
3110 /* set load_cs_sh_regs */
3112 /* set load_per_context_state & load_gfx_sh_regs for GFX */
3115 /* set load_ce_ram if preamble presented */
3116 if (AMDGPU_PREAMBLE_IB_PRESENT
& flags
)
3119 /* still load_ce_ram if this is the first time preamble presented
3120 * although there is no context switch happens.
3122 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST
& flags
)
3126 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
3127 amdgpu_ring_write(ring
, dw2
);
3128 amdgpu_ring_write(ring
, 0);
3130 if (amdgpu_sriov_vf(ring
->adev
))
3131 gfx_v9_0_ring_emit_de_meta(ring
);
3134 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring
*ring
)
3137 amdgpu_ring_write(ring
, PACKET3(PACKET3_COND_EXEC
, 3));
3138 amdgpu_ring_write(ring
, lower_32_bits(ring
->cond_exe_gpu_addr
));
3139 amdgpu_ring_write(ring
, upper_32_bits(ring
->cond_exe_gpu_addr
));
3140 amdgpu_ring_write(ring
, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
3141 ret
= ring
->wptr
& ring
->buf_mask
;
3142 amdgpu_ring_write(ring
, 0x55aa55aa); /* patch dummy value later */
3146 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring
*ring
, unsigned offset
)
3149 BUG_ON(offset
> ring
->buf_mask
);
3150 BUG_ON(ring
->ring
[offset
] != 0x55aa55aa);
3152 cur
= (ring
->wptr
& ring
->buf_mask
) - 1;
3153 if (likely(cur
> offset
))
3154 ring
->ring
[offset
] = cur
- offset
;
3156 ring
->ring
[offset
] = (ring
->ring_size
>>2) - offset
+ cur
;
3159 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring
*ring
, uint32_t reg
)
3161 struct amdgpu_device
*adev
= ring
->adev
;
3163 amdgpu_ring_write(ring
, PACKET3(PACKET3_COPY_DATA
, 4));
3164 amdgpu_ring_write(ring
, 0 | /* src: register*/
3165 (5 << 8) | /* dst: memory */
3166 (1 << 20)); /* write confirm */
3167 amdgpu_ring_write(ring
, reg
);
3168 amdgpu_ring_write(ring
, 0);
3169 amdgpu_ring_write(ring
, lower_32_bits(adev
->wb
.gpu_addr
+
3170 adev
->virt
.reg_val_offs
* 4));
3171 amdgpu_ring_write(ring
, upper_32_bits(adev
->wb
.gpu_addr
+
3172 adev
->virt
.reg_val_offs
* 4));
3175 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring
*ring
, uint32_t reg
,
3178 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3179 amdgpu_ring_write(ring
, (1 << 16)); /* no inc addr */
3180 amdgpu_ring_write(ring
, reg
);
3181 amdgpu_ring_write(ring
, 0);
3182 amdgpu_ring_write(ring
, val
);
3185 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
3186 enum amdgpu_interrupt_state state
)
3189 case AMDGPU_IRQ_STATE_DISABLE
:
3190 case AMDGPU_IRQ_STATE_ENABLE
:
3191 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
3192 TIME_STAMP_INT_ENABLE
,
3193 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
3200 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
3202 enum amdgpu_interrupt_state state
)
3204 u32 mec_int_cntl
, mec_int_cntl_reg
;
3207 * amdgpu controls only pipe 0 of MEC1. That's why this function only
3208 * handles the setting of interrupts for this specific pipe. All other
3209 * pipes' interrupts are set by amdkfd.
3215 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
3218 DRM_DEBUG("invalid pipe %d\n", pipe
);
3222 DRM_DEBUG("invalid me %d\n", me
);
3227 case AMDGPU_IRQ_STATE_DISABLE
:
3228 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
3229 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
3230 TIME_STAMP_INT_ENABLE
, 0);
3231 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
3233 case AMDGPU_IRQ_STATE_ENABLE
:
3234 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
3235 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
3236 TIME_STAMP_INT_ENABLE
, 1);
3237 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
3244 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
3245 struct amdgpu_irq_src
*source
,
3247 enum amdgpu_interrupt_state state
)
3250 case AMDGPU_IRQ_STATE_DISABLE
:
3251 case AMDGPU_IRQ_STATE_ENABLE
:
3252 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
3253 PRIV_REG_INT_ENABLE
,
3254 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
3263 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
3264 struct amdgpu_irq_src
*source
,
3266 enum amdgpu_interrupt_state state
)
3269 case AMDGPU_IRQ_STATE_DISABLE
:
3270 case AMDGPU_IRQ_STATE_ENABLE
:
3271 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
3272 PRIV_INSTR_INT_ENABLE
,
3273 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
3281 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
3282 struct amdgpu_irq_src
*src
,
3284 enum amdgpu_interrupt_state state
)
3287 case AMDGPU_CP_IRQ_GFX_EOP
:
3288 gfx_v9_0_set_gfx_eop_interrupt_state(adev
, state
);
3290 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
3291 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
3293 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
3294 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
3296 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
3297 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
3299 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
3300 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
3302 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
3303 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
3305 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
3306 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
3308 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
3309 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
3311 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
3312 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
3320 static int gfx_v9_0_eop_irq(struct amdgpu_device
*adev
,
3321 struct amdgpu_irq_src
*source
,
3322 struct amdgpu_iv_entry
*entry
)
3325 u8 me_id
, pipe_id
, queue_id
;
3326 struct amdgpu_ring
*ring
;
3328 DRM_DEBUG("IH: CP EOP\n");
3329 me_id
= (entry
->ring_id
& 0x0c) >> 2;
3330 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
3331 queue_id
= (entry
->ring_id
& 0x70) >> 4;
3335 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
3339 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3340 ring
= &adev
->gfx
.compute_ring
[i
];
3341 /* Per-queue interrupt is supported for MEC starting from VI.
3342 * The interrupt can only be enabled/disabled per pipe instead of per queue.
3344 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
3345 amdgpu_fence_process(ring
);
3352 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device
*adev
,
3353 struct amdgpu_irq_src
*source
,
3354 struct amdgpu_iv_entry
*entry
)
3356 DRM_ERROR("Illegal register access in command stream\n");
3357 schedule_work(&adev
->reset_work
);
3361 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device
*adev
,
3362 struct amdgpu_irq_src
*source
,
3363 struct amdgpu_iv_entry
*entry
)
3365 DRM_ERROR("Illegal instruction in command stream\n");
3366 schedule_work(&adev
->reset_work
);
3370 static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device
*adev
,
3371 struct amdgpu_irq_src
*src
,
3373 enum amdgpu_interrupt_state state
)
3375 uint32_t tmp
, target
;
3376 struct amdgpu_ring
*ring
= (struct amdgpu_ring
*)src
->data
;
3378 BUG_ON(!ring
|| (ring
->funcs
->type
!= AMDGPU_RING_TYPE_KIQ
));
3381 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
3383 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME2_PIPE0_INT_CNTL
);
3384 target
+= ring
->pipe
;
3387 case AMDGPU_CP_KIQ_IRQ_DRIVER0
:
3388 if (state
== AMDGPU_IRQ_STATE_DISABLE
) {
3389 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_CNTL
));
3390 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
3391 GENERIC2_INT_ENABLE
, 0);
3392 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_CNTL
), tmp
);
3394 tmp
= RREG32(target
);
3395 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
3396 GENERIC2_INT_ENABLE
, 0);
3397 WREG32(target
, tmp
);
3399 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_CNTL
));
3400 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
3401 GENERIC2_INT_ENABLE
, 1);
3402 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_CNTL
), tmp
);
3404 tmp
= RREG32(target
);
3405 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
3406 GENERIC2_INT_ENABLE
, 1);
3407 WREG32(target
, tmp
);
3411 BUG(); /* kiq only support GENERIC2_INT now */
3417 static int gfx_v9_0_kiq_irq(struct amdgpu_device
*adev
,
3418 struct amdgpu_irq_src
*source
,
3419 struct amdgpu_iv_entry
*entry
)
3421 u8 me_id
, pipe_id
, queue_id
;
3422 struct amdgpu_ring
*ring
= (struct amdgpu_ring
*)source
->data
;
3424 BUG_ON(!ring
|| (ring
->funcs
->type
!= AMDGPU_RING_TYPE_KIQ
));
3426 me_id
= (entry
->ring_id
& 0x0c) >> 2;
3427 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
3428 queue_id
= (entry
->ring_id
& 0x70) >> 4;
3429 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
3430 me_id
, pipe_id
, queue_id
);
3432 amdgpu_fence_process(ring
);
3436 const struct amd_ip_funcs gfx_v9_0_ip_funcs
= {
3438 .early_init
= gfx_v9_0_early_init
,
3439 .late_init
= gfx_v9_0_late_init
,
3440 .sw_init
= gfx_v9_0_sw_init
,
3441 .sw_fini
= gfx_v9_0_sw_fini
,
3442 .hw_init
= gfx_v9_0_hw_init
,
3443 .hw_fini
= gfx_v9_0_hw_fini
,
3444 .suspend
= gfx_v9_0_suspend
,
3445 .resume
= gfx_v9_0_resume
,
3446 .is_idle
= gfx_v9_0_is_idle
,
3447 .wait_for_idle
= gfx_v9_0_wait_for_idle
,
3448 .soft_reset
= gfx_v9_0_soft_reset
,
3449 .set_clockgating_state
= gfx_v9_0_set_clockgating_state
,
3450 .set_powergating_state
= gfx_v9_0_set_powergating_state
,
3451 .get_clockgating_state
= gfx_v9_0_get_clockgating_state
,
3454 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx
= {
3455 .type
= AMDGPU_RING_TYPE_GFX
,
3457 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
3458 .support_64bit_ptrs
= true,
3459 .get_rptr
= gfx_v9_0_ring_get_rptr_gfx
,
3460 .get_wptr
= gfx_v9_0_ring_get_wptr_gfx
,
3461 .set_wptr
= gfx_v9_0_ring_set_wptr_gfx
,
3462 .emit_frame_size
= /* totally 242 maximum if 16 IBs */
3464 7 + /* PIPELINE_SYNC */
3466 8 + /* FENCE for VM_FLUSH */
3467 20 + /* GDS switch */
3468 4 + /* double SWITCH_BUFFER,
3469 the first COND_EXEC jump to the place just
3470 prior to this double SWITCH_BUFFER */
3478 8 + 8 + /* FENCE x2 */
3479 2, /* SWITCH_BUFFER */
3480 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_gfx */
3481 .emit_ib
= gfx_v9_0_ring_emit_ib_gfx
,
3482 .emit_fence
= gfx_v9_0_ring_emit_fence
,
3483 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
3484 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
3485 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
3486 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
3487 .emit_hdp_invalidate
= gfx_v9_0_ring_emit_hdp_invalidate
,
3488 .test_ring
= gfx_v9_0_ring_test_ring
,
3489 .test_ib
= gfx_v9_0_ring_test_ib
,
3490 .insert_nop
= amdgpu_ring_insert_nop
,
3491 .pad_ib
= amdgpu_ring_generic_pad_ib
,
3492 .emit_switch_buffer
= gfx_v9_ring_emit_sb
,
3493 .emit_cntxcntl
= gfx_v9_ring_emit_cntxcntl
,
3494 .init_cond_exec
= gfx_v9_0_ring_emit_init_cond_exec
,
3495 .patch_cond_exec
= gfx_v9_0_ring_emit_patch_cond_exec
,
3498 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute
= {
3499 .type
= AMDGPU_RING_TYPE_COMPUTE
,
3501 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
3502 .support_64bit_ptrs
= true,
3503 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
3504 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
3505 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
3507 20 + /* gfx_v9_0_ring_emit_gds_switch */
3508 7 + /* gfx_v9_0_ring_emit_hdp_flush */
3509 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
3510 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
3511 64 + /* gfx_v9_0_ring_emit_vm_flush */
3512 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
3513 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_compute */
3514 .emit_ib
= gfx_v9_0_ring_emit_ib_compute
,
3515 .emit_fence
= gfx_v9_0_ring_emit_fence
,
3516 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
3517 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
3518 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
3519 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
3520 .emit_hdp_invalidate
= gfx_v9_0_ring_emit_hdp_invalidate
,
3521 .test_ring
= gfx_v9_0_ring_test_ring
,
3522 .test_ib
= gfx_v9_0_ring_test_ib
,
3523 .insert_nop
= amdgpu_ring_insert_nop
,
3524 .pad_ib
= amdgpu_ring_generic_pad_ib
,
3527 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq
= {
3528 .type
= AMDGPU_RING_TYPE_KIQ
,
3530 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
3531 .support_64bit_ptrs
= true,
3532 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
3533 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
3534 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
3536 20 + /* gfx_v9_0_ring_emit_gds_switch */
3537 7 + /* gfx_v9_0_ring_emit_hdp_flush */
3538 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
3539 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
3540 64 + /* gfx_v9_0_ring_emit_vm_flush */
3541 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
3542 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_compute */
3543 .emit_ib
= gfx_v9_0_ring_emit_ib_compute
,
3544 .emit_fence
= gfx_v9_0_ring_emit_fence_kiq
,
3545 .test_ring
= gfx_v9_0_ring_test_ring
,
3546 .test_ib
= gfx_v9_0_ring_test_ib
,
3547 .insert_nop
= amdgpu_ring_insert_nop
,
3548 .pad_ib
= amdgpu_ring_generic_pad_ib
,
3549 .emit_rreg
= gfx_v9_0_ring_emit_rreg
,
3550 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
3553 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
)
3557 adev
->gfx
.kiq
.ring
.funcs
= &gfx_v9_0_ring_funcs_kiq
;
3559 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
3560 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_gfx
;
3562 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
3563 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_compute
;
3566 static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs
= {
3567 .set
= gfx_v9_0_kiq_set_interrupt_state
,
3568 .process
= gfx_v9_0_kiq_irq
,
3571 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs
= {
3572 .set
= gfx_v9_0_set_eop_interrupt_state
,
3573 .process
= gfx_v9_0_eop_irq
,
3576 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs
= {
3577 .set
= gfx_v9_0_set_priv_reg_fault_state
,
3578 .process
= gfx_v9_0_priv_reg_irq
,
3581 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs
= {
3582 .set
= gfx_v9_0_set_priv_inst_fault_state
,
3583 .process
= gfx_v9_0_priv_inst_irq
,
3586 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
)
3588 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
3589 adev
->gfx
.eop_irq
.funcs
= &gfx_v9_0_eop_irq_funcs
;
3591 adev
->gfx
.priv_reg_irq
.num_types
= 1;
3592 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v9_0_priv_reg_irq_funcs
;
3594 adev
->gfx
.priv_inst_irq
.num_types
= 1;
3595 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v9_0_priv_inst_irq_funcs
;
3597 adev
->gfx
.kiq
.irq
.num_types
= AMDGPU_CP_KIQ_IRQ_LAST
;
3598 adev
->gfx
.kiq
.irq
.funcs
= &gfx_v9_0_kiq_irq_funcs
;
3601 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
)
3603 switch (adev
->asic_type
) {
3605 adev
->gfx
.rlc
.funcs
= &gfx_v9_0_rlc_funcs
;
3612 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
)
3614 /* init asci gds info */
3615 adev
->gds
.mem
.total_size
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
));
3616 adev
->gds
.gws
.total_size
= 64;
3617 adev
->gds
.oa
.total_size
= 16;
3619 if (adev
->gds
.mem
.total_size
== 64 * 1024) {
3620 adev
->gds
.mem
.gfx_partition_size
= 4096;
3621 adev
->gds
.mem
.cs_partition_size
= 4096;
3623 adev
->gds
.gws
.gfx_partition_size
= 4;
3624 adev
->gds
.gws
.cs_partition_size
= 4;
3626 adev
->gds
.oa
.gfx_partition_size
= 4;
3627 adev
->gds
.oa
.cs_partition_size
= 1;
3629 adev
->gds
.mem
.gfx_partition_size
= 1024;
3630 adev
->gds
.mem
.cs_partition_size
= 1024;
3632 adev
->gds
.gws
.gfx_partition_size
= 16;
3633 adev
->gds
.gws
.cs_partition_size
= 16;
3635 adev
->gds
.oa
.gfx_partition_size
= 4;
3636 adev
->gds
.oa
.cs_partition_size
= 4;
3640 static u32
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device
*adev
)
3644 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCC_GC_SHADER_ARRAY_CONFIG
));
3645 data
|= RREG32(SOC15_REG_OFFSET(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
));
3647 data
&= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK
;
3648 data
>>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT
;
3650 mask
= gfx_v9_0_create_bitmask(adev
->gfx
.config
.max_cu_per_sh
);
3652 return (~data
) & mask
;
3655 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
3656 struct amdgpu_cu_info
*cu_info
)
3658 int i
, j
, k
, counter
, active_cu_number
= 0;
3659 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
3661 if (!adev
|| !cu_info
)
3664 memset(cu_info
, 0, sizeof(*cu_info
));
3666 mutex_lock(&adev
->grbm_idx_mutex
);
3667 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
3668 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
3672 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
3673 bitmap
= gfx_v9_0_get_cu_active_bitmap(adev
);
3674 cu_info
->bitmap
[i
][j
] = bitmap
;
3676 for (k
= 0; k
< 16; k
++) {
3677 if (bitmap
& mask
) {
3684 active_cu_number
+= counter
;
3685 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
3688 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
3689 mutex_unlock(&adev
->grbm_idx_mutex
);
3691 cu_info
->number
= active_cu_number
;
3692 cu_info
->ao_cu_mask
= ao_cu_mask
;
3697 static int gfx_v9_0_init_queue(struct amdgpu_ring
*ring
)
3701 bool use_doorbell
= true;
3708 struct amdgpu_device
*adev
;
3711 if (ring
->mqd_obj
== NULL
) {
3712 r
= amdgpu_bo_create(adev
,
3713 sizeof(struct v9_mqd
),
3715 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
,
3716 NULL
, &ring
->mqd_obj
);
3718 dev_warn(adev
->dev
, "(%d) create MQD bo failed\n", r
);
3723 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3724 if (unlikely(r
!= 0)) {
3725 gfx_v9_0_cp_compute_fini(adev
);
3729 r
= amdgpu_bo_pin(ring
->mqd_obj
, AMDGPU_GEM_DOMAIN_GTT
,
3732 dev_warn(adev
->dev
, "(%d) pin MQD bo failed\n", r
);
3733 gfx_v9_0_cp_compute_fini(adev
);
3736 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&buf
);
3738 dev_warn(adev
->dev
, "(%d) map MQD bo failed\n", r
);
3739 gfx_v9_0_cp_compute_fini(adev
);
3743 /* init the mqd struct */
3744 memset(buf
, 0, sizeof(struct v9_mqd
));
3746 mqd
= (struct v9_mqd
*)buf
;
3747 mqd
->header
= 0xC0310800;
3748 mqd
->compute_pipelinestat_enable
= 0x00000001;
3749 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
3750 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
3751 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
3752 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
3753 mqd
->compute_misc_reserved
= 0x00000003;
3754 mutex_lock(&adev
->srbm_mutex
);
3755 soc15_grbm_select(adev
, ring
->me
,
3758 /* disable wptr polling */
3759 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
3761 /* write the EOP addr */
3762 BUG_ON(ring
->me
!= 1 || ring
->pipe
!= 0); /* can't handle other cases eop address */
3763 eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
+ (ring
->queue
* MEC_HPD_SIZE
);
3766 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
), lower_32_bits(eop_gpu_addr
));
3767 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
), upper_32_bits(eop_gpu_addr
));
3768 mqd
->cp_hqd_eop_base_addr_lo
= lower_32_bits(eop_gpu_addr
);
3769 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_gpu_addr
);
3771 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3772 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_CONTROL
));
3773 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
3774 (order_base_2(MEC_HPD_SIZE
/ 4) - 1));
3775 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_CONTROL
), tmp
);
3777 /* enable doorbell? */
3778 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
));
3780 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 1);
3782 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
3784 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
), tmp
);
3785 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3787 /* disable the queue if it's active */
3789 mqd
->cp_hqd_dequeue_request
= 0;
3790 mqd
->cp_hqd_pq_rptr
= 0;
3791 mqd
->cp_hqd_pq_wptr_lo
= 0;
3792 mqd
->cp_hqd_pq_wptr_hi
= 0;
3793 if (RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
)) & 1) {
3794 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
), 1);
3795 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
3796 if (!(RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
)) & 1))
3800 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
), mqd
->cp_hqd_dequeue_request
);
3801 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_RPTR
), mqd
->cp_hqd_pq_rptr
);
3802 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_LO
), mqd
->cp_hqd_pq_wptr_lo
);
3803 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_HI
), mqd
->cp_hqd_pq_wptr_hi
);
3806 /* set the pointer to the MQD */
3807 mqd
->cp_mqd_base_addr_lo
= mqd_gpu_addr
& 0xfffffffc;
3808 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(mqd_gpu_addr
);
3809 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_BASE_ADDR
), mqd
->cp_mqd_base_addr_lo
);
3810 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_BASE_ADDR_HI
), mqd
->cp_mqd_base_addr_hi
);
3812 /* set MQD vmid to 0 */
3813 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_CONTROL
));
3814 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
3815 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_CONTROL
), tmp
);
3816 mqd
->cp_mqd_control
= tmp
;
3818 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3819 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
3820 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
3821 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
3822 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_BASE
), mqd
->cp_hqd_pq_base_lo
);
3823 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_BASE_HI
), mqd
->cp_hqd_pq_base_hi
);
3825 /* set up the HQD, this is similar to CP_RB0_CNTL */
3826 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_CONTROL
));
3827 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
3828 (order_base_2(ring
->ring_size
/ 4) - 1));
3829 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
3830 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
3832 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
3834 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
3835 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
3836 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
3837 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
3838 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_CONTROL
), tmp
);
3839 mqd
->cp_hqd_pq_control
= tmp
;
3841 /* set the wb address wether it's enabled or not */
3842 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
3843 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3844 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
3845 upper_32_bits(wb_gpu_addr
) & 0xffff;
3846 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
),
3847 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
3848 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
),
3849 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
3851 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3852 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
3853 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3854 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3855 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
),
3856 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
3857 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
),
3858 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
3860 /* enable the doorbell if requested */
3862 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
),
3863 (AMDGPU_DOORBELL64_KIQ
* 2) << 2);
3864 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
),
3865 (AMDGPU_DOORBELL64_MEC_RING7
* 2) << 2);
3866 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
));
3867 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3868 DOORBELL_OFFSET
, ring
->doorbell_index
);
3869 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 1);
3870 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_SOURCE
, 0);
3871 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_HIT
, 0);
3872 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3875 mqd
->cp_hqd_pq_doorbell_control
= 0;
3877 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
),
3878 mqd
->cp_hqd_pq_doorbell_control
);
3880 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3881 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_LO
), mqd
->cp_hqd_pq_wptr_lo
);
3882 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_HI
), mqd
->cp_hqd_pq_wptr_hi
);
3884 /* set the vmid for the queue */
3885 mqd
->cp_hqd_vmid
= 0;
3886 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_VMID
), mqd
->cp_hqd_vmid
);
3888 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PERSISTENT_STATE
));
3889 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
3890 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PERSISTENT_STATE
), tmp
);
3891 mqd
->cp_hqd_persistent_state
= tmp
;
3893 /* activate the queue */
3894 mqd
->cp_hqd_active
= 1;
3895 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
), mqd
->cp_hqd_active
);
3897 soc15_grbm_select(adev
, 0, 0, 0, 0);
3898 mutex_unlock(&adev
->srbm_mutex
);
3900 amdgpu_bo_kunmap(ring
->mqd_obj
);
3901 amdgpu_bo_unreserve(ring
->mqd_obj
);
3904 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
3909 const struct amdgpu_ip_block_version gfx_v9_0_ip_block
=
3911 .type
= AMD_IP_BLOCK_TYPE_GFX
,
3915 .funcs
= &gfx_v9_0_ip_funcs
,