]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
drm/amdgpu: move struct gart_funcs into amdgpu_gmc.h
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34
35 #include "vce/vce_4_0_offset.h"
36 #include "vce/vce_4_0_default.h"
37 #include "vce/vce_4_0_sh_mask.h"
38 #include "mmhub/mmhub_1_0_offset.h"
39 #include "mmhub/mmhub_1_0_sh_mask.h"
40
41 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
42
43 #define VCE_V4_0_FW_SIZE (384 * 1024)
44 #define VCE_V4_0_STACK_SIZE (64 * 1024)
45 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
46
47 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
48 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
49 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
50
51 /**
52 * vce_v4_0_ring_get_rptr - get read pointer
53 *
54 * @ring: amdgpu_ring pointer
55 *
56 * Returns the current hardware read pointer
57 */
58 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
59 {
60 struct amdgpu_device *adev = ring->adev;
61
62 if (ring == &adev->vce.ring[0])
63 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
64 else if (ring == &adev->vce.ring[1])
65 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
66 else
67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
68 }
69
70 /**
71 * vce_v4_0_ring_get_wptr - get write pointer
72 *
73 * @ring: amdgpu_ring pointer
74 *
75 * Returns the current hardware write pointer
76 */
77 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
78 {
79 struct amdgpu_device *adev = ring->adev;
80
81 if (ring->use_doorbell)
82 return adev->wb.wb[ring->wptr_offs];
83
84 if (ring == &adev->vce.ring[0])
85 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
86 else if (ring == &adev->vce.ring[1])
87 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
88 else
89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
90 }
91
92 /**
93 * vce_v4_0_ring_set_wptr - set write pointer
94 *
95 * @ring: amdgpu_ring pointer
96 *
97 * Commits the write pointer to the hardware
98 */
99 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
100 {
101 struct amdgpu_device *adev = ring->adev;
102
103 if (ring->use_doorbell) {
104 /* XXX check if swapping is necessary on BE */
105 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
106 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
107 return;
108 }
109
110 if (ring == &adev->vce.ring[0])
111 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
112 lower_32_bits(ring->wptr));
113 else if (ring == &adev->vce.ring[1])
114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
115 lower_32_bits(ring->wptr));
116 else
117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
118 lower_32_bits(ring->wptr));
119 }
120
121 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
122 {
123 int i, j;
124
125 for (i = 0; i < 10; ++i) {
126 for (j = 0; j < 100; ++j) {
127 uint32_t status =
128 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
129
130 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
131 return 0;
132 mdelay(10);
133 }
134
135 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
136 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
137 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
138 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
139 mdelay(10);
140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 mdelay(10);
143
144 }
145
146 return -ETIMEDOUT;
147 }
148
149 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
150 struct amdgpu_mm_table *table)
151 {
152 uint32_t data = 0, loop;
153 uint64_t addr = table->gpu_addr;
154 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
155 uint32_t size;
156
157 size = header->header_size + header->vce_table_size + header->uvd_table_size;
158
159 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
160 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
162
163 /* 2, update vmid of descriptor */
164 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
165 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
166 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
167 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
168
169 /* 3, notify mmsch about the size of this descriptor */
170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
171
172 /* 4, set resp to zero */
173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
174
175 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
176 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
177 adev->vce.ring[0].wptr = 0;
178 adev->vce.ring[0].wptr_old = 0;
179
180 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
181 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
182
183 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184 loop = 1000;
185 while ((data & 0x10000002) != 0x10000002) {
186 udelay(10);
187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 loop--;
189 if (!loop)
190 break;
191 }
192
193 if (!loop) {
194 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
195 return -EBUSY;
196 }
197
198 return 0;
199 }
200
201 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
202 {
203 struct amdgpu_ring *ring;
204 uint32_t offset, size;
205 uint32_t table_size = 0;
206 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
207 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
208 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
209 struct mmsch_v1_0_cmd_end end = { { 0 } };
210 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
211 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
212
213 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
214 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
215 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
216 end.cmd_header.command_type = MMSCH_COMMAND__END;
217
218 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
219 header->version = MMSCH_VERSION;
220 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
221
222 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
223 header->vce_table_offset = header->header_size;
224 else
225 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
226
227 init_table += header->vce_table_offset;
228
229 ring = &adev->vce.ring[0];
230 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
231 lower_32_bits(ring->gpu_addr));
232 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
233 upper_32_bits(ring->gpu_addr));
234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
235 ring->ring_size / 4);
236
237 /* BEGING OF MC_RESUME */
238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
239 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
240 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
243
244 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
246 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
247 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
249 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
250 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
251 } else {
252 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
253 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
254 adev->vce.gpu_addr >> 8);
255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 (adev->vce.gpu_addr >> 40) & 0xff);
258 }
259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
260 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
261 adev->vce.gpu_addr >> 8);
262 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
264 (adev->vce.gpu_addr >> 40) & 0xff);
265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
266 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
267 adev->vce.gpu_addr >> 8);
268 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
269 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
270 (adev->vce.gpu_addr >> 40) & 0xff);
271
272 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
273 size = VCE_V4_0_FW_SIZE;
274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
275 offset & ~0x0f000000);
276 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
277
278 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
279 size = VCE_V4_0_STACK_SIZE;
280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
281 (offset & ~0x0f000000) | (1 << 24));
282 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
283
284 offset += size;
285 size = VCE_V4_0_DATA_SIZE;
286 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
287 (offset & ~0x0f000000) | (2 << 24));
288 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
289
290 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
291 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
292 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
293 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
294
295 /* end of MC_RESUME */
296 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
297 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
298 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
299 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
301 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
302
303 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
304 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
305 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
306
307 /* clear BUSY flag */
308 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
309 ~VCE_STATUS__JOB_BUSY_MASK, 0);
310
311 /* add end packet */
312 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
313 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
314 header->vce_table_size = table_size;
315 }
316
317 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
318 }
319
320 /**
321 * vce_v4_0_start - start VCE block
322 *
323 * @adev: amdgpu_device pointer
324 *
325 * Setup and start the VCE block
326 */
327 static int vce_v4_0_start(struct amdgpu_device *adev)
328 {
329 struct amdgpu_ring *ring;
330 int r;
331
332 ring = &adev->vce.ring[0];
333
334 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
335 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
339
340 ring = &adev->vce.ring[1];
341
342 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
347
348 ring = &adev->vce.ring[2];
349
350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
355
356 vce_v4_0_mc_resume(adev);
357 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
358 ~VCE_STATUS__JOB_BUSY_MASK);
359
360 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
361
362 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
363 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
364 mdelay(100);
365
366 r = vce_v4_0_firmware_loaded(adev);
367
368 /* clear BUSY flag */
369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
370
371 if (r) {
372 DRM_ERROR("VCE not responding, giving up!!!\n");
373 return r;
374 }
375
376 return 0;
377 }
378
379 static int vce_v4_0_stop(struct amdgpu_device *adev)
380 {
381
382 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
383
384 /* hold on ECPU */
385 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
386 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
387 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
388
389 /* clear BUSY flag */
390 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
391
392 /* Set Clock-Gating off */
393 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
394 vce_v4_0_set_vce_sw_clock_gating(adev, false);
395 */
396
397 return 0;
398 }
399
400 static int vce_v4_0_early_init(void *handle)
401 {
402 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
403
404 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
405 adev->vce.num_rings = 1;
406 else
407 adev->vce.num_rings = 3;
408
409 vce_v4_0_set_ring_funcs(adev);
410 vce_v4_0_set_irq_funcs(adev);
411
412 return 0;
413 }
414
415 static int vce_v4_0_sw_init(void *handle)
416 {
417 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
418 struct amdgpu_ring *ring;
419 unsigned size;
420 int r, i;
421
422 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
423 if (r)
424 return r;
425
426 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
427 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
428 size += VCE_V4_0_FW_SIZE;
429
430 r = amdgpu_vce_sw_init(adev, size);
431 if (r)
432 return r;
433
434 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
435 const struct common_firmware_header *hdr;
436 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
437
438 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
439 if (!adev->vce.saved_bo)
440 return -ENOMEM;
441
442 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
443 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
444 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
445 adev->firmware.fw_size +=
446 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
447 DRM_INFO("PSP loading VCE firmware\n");
448 } else {
449 r = amdgpu_vce_resume(adev);
450 if (r)
451 return r;
452 }
453
454 for (i = 0; i < adev->vce.num_rings; i++) {
455 ring = &adev->vce.ring[i];
456 sprintf(ring->name, "vce%d", i);
457 if (amdgpu_sriov_vf(adev)) {
458 /* DOORBELL only works under SRIOV */
459 ring->use_doorbell = true;
460
461 /* currently only use the first encoding ring for sriov,
462 * so set unused location for other unused rings.
463 */
464 if (i == 0)
465 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
466 else
467 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
468 }
469 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
470 if (r)
471 return r;
472 }
473
474 r = amdgpu_virt_alloc_mm_table(adev);
475 if (r)
476 return r;
477
478 return r;
479 }
480
481 static int vce_v4_0_sw_fini(void *handle)
482 {
483 int r;
484 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
485
486 /* free MM table */
487 amdgpu_virt_free_mm_table(adev);
488
489 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
490 kfree(adev->vce.saved_bo);
491 adev->vce.saved_bo = NULL;
492 }
493
494 r = amdgpu_vce_suspend(adev);
495 if (r)
496 return r;
497
498 return amdgpu_vce_sw_fini(adev);
499 }
500
501 static int vce_v4_0_hw_init(void *handle)
502 {
503 int r, i;
504 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
505
506 if (amdgpu_sriov_vf(adev))
507 r = vce_v4_0_sriov_start(adev);
508 else
509 r = vce_v4_0_start(adev);
510 if (r)
511 return r;
512
513 for (i = 0; i < adev->vce.num_rings; i++)
514 adev->vce.ring[i].ready = false;
515
516 for (i = 0; i < adev->vce.num_rings; i++) {
517 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
518 if (r)
519 return r;
520 else
521 adev->vce.ring[i].ready = true;
522 }
523
524 DRM_INFO("VCE initialized successfully.\n");
525
526 return 0;
527 }
528
529 static int vce_v4_0_hw_fini(void *handle)
530 {
531 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
532 int i;
533
534 if (!amdgpu_sriov_vf(adev)) {
535 /* vce_v4_0_wait_for_idle(handle); */
536 vce_v4_0_stop(adev);
537 } else {
538 /* full access mode, so don't touch any VCE register */
539 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
540 }
541
542 for (i = 0; i < adev->vce.num_rings; i++)
543 adev->vce.ring[i].ready = false;
544
545 return 0;
546 }
547
548 static int vce_v4_0_suspend(void *handle)
549 {
550 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
551 int r;
552
553 if (adev->vce.vcpu_bo == NULL)
554 return 0;
555
556 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
557 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
558 void *ptr = adev->vce.cpu_addr;
559
560 memcpy_fromio(adev->vce.saved_bo, ptr, size);
561 }
562
563 r = vce_v4_0_hw_fini(adev);
564 if (r)
565 return r;
566
567 return amdgpu_vce_suspend(adev);
568 }
569
570 static int vce_v4_0_resume(void *handle)
571 {
572 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
573 int r;
574
575 if (adev->vce.vcpu_bo == NULL)
576 return -EINVAL;
577
578 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
579 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
580 void *ptr = adev->vce.cpu_addr;
581
582 memcpy_toio(ptr, adev->vce.saved_bo, size);
583 } else {
584 r = amdgpu_vce_resume(adev);
585 if (r)
586 return r;
587 }
588
589 return vce_v4_0_hw_init(adev);
590 }
591
592 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
593 {
594 uint32_t offset, size;
595
596 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
597 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
598 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
599 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
600
601 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
602 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
603 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
604 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
605 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
606
607 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
608 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
609 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
611 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
612 } else {
613 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
614 (adev->vce.gpu_addr >> 8));
615 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
616 (adev->vce.gpu_addr >> 40) & 0xff);
617 }
618
619 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
620 size = VCE_V4_0_FW_SIZE;
621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
623
624 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
626 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
627 size = VCE_V4_0_STACK_SIZE;
628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
629 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
630
631 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
633 offset += size;
634 size = VCE_V4_0_DATA_SIZE;
635 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
637
638 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
639 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
640 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
641 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
642 }
643
644 static int vce_v4_0_set_clockgating_state(void *handle,
645 enum amd_clockgating_state state)
646 {
647 /* needed for driver unload*/
648 return 0;
649 }
650
651 #if 0
652 static bool vce_v4_0_is_idle(void *handle)
653 {
654 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
655 u32 mask = 0;
656
657 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
658 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
659
660 return !(RREG32(mmSRBM_STATUS2) & mask);
661 }
662
663 static int vce_v4_0_wait_for_idle(void *handle)
664 {
665 unsigned i;
666 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
667
668 for (i = 0; i < adev->usec_timeout; i++)
669 if (vce_v4_0_is_idle(handle))
670 return 0;
671
672 return -ETIMEDOUT;
673 }
674
675 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
676 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
677 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
678 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
679 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
680
681 static bool vce_v4_0_check_soft_reset(void *handle)
682 {
683 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
684 u32 srbm_soft_reset = 0;
685
686 /* According to VCE team , we should use VCE_STATUS instead
687 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
688 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
689 * instance's registers are accessed
690 * (0 for 1st instance, 10 for 2nd instance).
691 *
692 *VCE_STATUS
693 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
694 *|----+----+-----------+----+----+----+----------+---------+----|
695 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
696 *
697 * VCE team suggest use bit 3--bit 6 for busy status check
698 */
699 mutex_lock(&adev->grbm_idx_mutex);
700 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
701 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
702 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
703 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
704 }
705 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
706 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
707 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
708 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
709 }
710 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
711 mutex_unlock(&adev->grbm_idx_mutex);
712
713 if (srbm_soft_reset) {
714 adev->vce.srbm_soft_reset = srbm_soft_reset;
715 return true;
716 } else {
717 adev->vce.srbm_soft_reset = 0;
718 return false;
719 }
720 }
721
722 static int vce_v4_0_soft_reset(void *handle)
723 {
724 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
725 u32 srbm_soft_reset;
726
727 if (!adev->vce.srbm_soft_reset)
728 return 0;
729 srbm_soft_reset = adev->vce.srbm_soft_reset;
730
731 if (srbm_soft_reset) {
732 u32 tmp;
733
734 tmp = RREG32(mmSRBM_SOFT_RESET);
735 tmp |= srbm_soft_reset;
736 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
737 WREG32(mmSRBM_SOFT_RESET, tmp);
738 tmp = RREG32(mmSRBM_SOFT_RESET);
739
740 udelay(50);
741
742 tmp &= ~srbm_soft_reset;
743 WREG32(mmSRBM_SOFT_RESET, tmp);
744 tmp = RREG32(mmSRBM_SOFT_RESET);
745
746 /* Wait a little for things to settle down */
747 udelay(50);
748 }
749
750 return 0;
751 }
752
753 static int vce_v4_0_pre_soft_reset(void *handle)
754 {
755 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
756
757 if (!adev->vce.srbm_soft_reset)
758 return 0;
759
760 mdelay(5);
761
762 return vce_v4_0_suspend(adev);
763 }
764
765
766 static int vce_v4_0_post_soft_reset(void *handle)
767 {
768 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
769
770 if (!adev->vce.srbm_soft_reset)
771 return 0;
772
773 mdelay(5);
774
775 return vce_v4_0_resume(adev);
776 }
777
778 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
779 {
780 u32 tmp, data;
781
782 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
783 if (override)
784 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
785 else
786 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
787
788 if (tmp != data)
789 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
790 }
791
792 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
793 bool gated)
794 {
795 u32 data;
796
797 /* Set Override to disable Clock Gating */
798 vce_v4_0_override_vce_clock_gating(adev, true);
799
800 /* This function enables MGCG which is controlled by firmware.
801 With the clocks in the gated state the core is still
802 accessible but the firmware will throttle the clocks on the
803 fly as necessary.
804 */
805 if (gated) {
806 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
807 data |= 0x1ff;
808 data &= ~0xef0000;
809 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
810
811 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
812 data |= 0x3ff000;
813 data &= ~0xffc00000;
814 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
815
816 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
817 data |= 0x2;
818 data &= ~0x00010000;
819 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
820
821 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
822 data |= 0x37f;
823 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
824
825 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
826 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
827 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
828 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
829 0x8;
830 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
831 } else {
832 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
833 data &= ~0x80010;
834 data |= 0xe70008;
835 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
836
837 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
838 data |= 0xffc00000;
839 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
840
841 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
842 data |= 0x10000;
843 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
844
845 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
846 data &= ~0xffc00000;
847 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
848
849 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
850 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
851 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
852 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
853 0x8);
854 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
855 }
856 vce_v4_0_override_vce_clock_gating(adev, false);
857 }
858
859 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
860 {
861 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
862
863 if (enable)
864 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
865 else
866 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
867
868 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
869 }
870
871 static int vce_v4_0_set_clockgating_state(void *handle,
872 enum amd_clockgating_state state)
873 {
874 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
875 bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
876 int i;
877
878 if ((adev->asic_type == CHIP_POLARIS10) ||
879 (adev->asic_type == CHIP_TONGA) ||
880 (adev->asic_type == CHIP_FIJI))
881 vce_v4_0_set_bypass_mode(adev, enable);
882
883 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
884 return 0;
885
886 mutex_lock(&adev->grbm_idx_mutex);
887 for (i = 0; i < 2; i++) {
888 /* Program VCE Instance 0 or 1 if not harvested */
889 if (adev->vce.harvest_config & (1 << i))
890 continue;
891
892 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
893
894 if (enable) {
895 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
896 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
897 data &= ~(0xf | 0xff0);
898 data |= ((0x0 << 0) | (0x04 << 4));
899 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
900
901 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
902 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
903 data &= ~(0xf | 0xff0);
904 data |= ((0x0 << 0) | (0x04 << 4));
905 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
906 }
907
908 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
909 }
910
911 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
912 mutex_unlock(&adev->grbm_idx_mutex);
913
914 return 0;
915 }
916
917 static int vce_v4_0_set_powergating_state(void *handle,
918 enum amd_powergating_state state)
919 {
920 /* This doesn't actually powergate the VCE block.
921 * That's done in the dpm code via the SMC. This
922 * just re-inits the block as necessary. The actual
923 * gating still happens in the dpm code. We should
924 * revisit this when there is a cleaner line between
925 * the smc and the hw blocks
926 */
927 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
928
929 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
930 return 0;
931
932 if (state == AMD_PG_STATE_GATE)
933 /* XXX do we need a vce_v4_0_stop()? */
934 return 0;
935 else
936 return vce_v4_0_start(adev);
937 }
938 #endif
939
940 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
941 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
942 {
943 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
944 amdgpu_ring_write(ring, vmid);
945 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
946 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
947 amdgpu_ring_write(ring, ib->length_dw);
948 }
949
950 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
951 u64 seq, unsigned flags)
952 {
953 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
954
955 amdgpu_ring_write(ring, VCE_CMD_FENCE);
956 amdgpu_ring_write(ring, addr);
957 amdgpu_ring_write(ring, upper_32_bits(addr));
958 amdgpu_ring_write(ring, seq);
959 amdgpu_ring_write(ring, VCE_CMD_TRAP);
960 }
961
962 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
963 {
964 amdgpu_ring_write(ring, VCE_CMD_END);
965 }
966
967 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
968 unsigned int vmid, uint64_t pd_addr)
969 {
970 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
971 uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid);
972 uint64_t flags = AMDGPU_PTE_VALID;
973 unsigned eng = ring->vm_inv_eng;
974
975 amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
976 pd_addr |= flags;
977
978 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
979 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
980 amdgpu_ring_write(ring, upper_32_bits(pd_addr));
981
982 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
983 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
984 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
985
986 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
987 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
988 amdgpu_ring_write(ring, 0xffffffff);
989 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
990
991 /* flush TLB */
992 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
994 amdgpu_ring_write(ring, req);
995
996 /* wait for flush */
997 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
998 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
999 amdgpu_ring_write(ring, 1 << vmid);
1000 amdgpu_ring_write(ring, 1 << vmid);
1001 }
1002
1003 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1004 struct amdgpu_irq_src *source,
1005 unsigned type,
1006 enum amdgpu_interrupt_state state)
1007 {
1008 uint32_t val = 0;
1009
1010 if (!amdgpu_sriov_vf(adev)) {
1011 if (state == AMDGPU_IRQ_STATE_ENABLE)
1012 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1013
1014 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1015 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1016 }
1017 return 0;
1018 }
1019
1020 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1021 struct amdgpu_irq_src *source,
1022 struct amdgpu_iv_entry *entry)
1023 {
1024 DRM_DEBUG("IH: VCE\n");
1025
1026 switch (entry->src_data[0]) {
1027 case 0:
1028 case 1:
1029 case 2:
1030 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1031 break;
1032 default:
1033 DRM_ERROR("Unhandled interrupt: %d %d\n",
1034 entry->src_id, entry->src_data[0]);
1035 break;
1036 }
1037
1038 return 0;
1039 }
1040
1041 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1042 .name = "vce_v4_0",
1043 .early_init = vce_v4_0_early_init,
1044 .late_init = NULL,
1045 .sw_init = vce_v4_0_sw_init,
1046 .sw_fini = vce_v4_0_sw_fini,
1047 .hw_init = vce_v4_0_hw_init,
1048 .hw_fini = vce_v4_0_hw_fini,
1049 .suspend = vce_v4_0_suspend,
1050 .resume = vce_v4_0_resume,
1051 .is_idle = NULL /* vce_v4_0_is_idle */,
1052 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1053 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1054 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1055 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1056 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1057 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1058 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1059 };
1060
1061 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1062 .type = AMDGPU_RING_TYPE_VCE,
1063 .align_mask = 0x3f,
1064 .nop = VCE_CMD_NO_OP,
1065 .support_64bit_ptrs = false,
1066 .vmhub = AMDGPU_MMHUB,
1067 .get_rptr = vce_v4_0_ring_get_rptr,
1068 .get_wptr = vce_v4_0_ring_get_wptr,
1069 .set_wptr = vce_v4_0_ring_set_wptr,
1070 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1071 .emit_frame_size =
1072 17 + /* vce_v4_0_emit_vm_flush */
1073 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1074 1, /* vce_v4_0_ring_insert_end */
1075 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1076 .emit_ib = vce_v4_0_ring_emit_ib,
1077 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1078 .emit_fence = vce_v4_0_ring_emit_fence,
1079 .test_ring = amdgpu_vce_ring_test_ring,
1080 .test_ib = amdgpu_vce_ring_test_ib,
1081 .insert_nop = amdgpu_ring_insert_nop,
1082 .insert_end = vce_v4_0_ring_insert_end,
1083 .pad_ib = amdgpu_ring_generic_pad_ib,
1084 .begin_use = amdgpu_vce_ring_begin_use,
1085 .end_use = amdgpu_vce_ring_end_use,
1086 };
1087
1088 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1089 {
1090 int i;
1091
1092 for (i = 0; i < adev->vce.num_rings; i++)
1093 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1094 DRM_INFO("VCE enabled in VM mode\n");
1095 }
1096
1097 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1098 .set = vce_v4_0_set_interrupt_state,
1099 .process = vce_v4_0_process_interrupt,
1100 };
1101
1102 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1103 {
1104 adev->vce.irq.num_types = 1;
1105 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1106 };
1107
1108 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1109 {
1110 .type = AMD_IP_BLOCK_TYPE_VCE,
1111 .major = 4,
1112 .minor = 0,
1113 .rev = 0,
1114 .funcs = &vce_v4_0_ip_funcs,
1115 };