2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/firmware.h>
25 #include <linux/pci.h>
27 #include <drm/drm_cache.h>
31 #include "amdgpu_atomfirmware.h"
32 #include "amdgpu_gem.h"
34 #include "hdp/hdp_4_0_offset.h"
35 #include "hdp/hdp_4_0_sh_mask.h"
36 #include "gc/gc_9_0_sh_mask.h"
37 #include "dce/dce_12_0_offset.h"
38 #include "dce/dce_12_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "athub/athub_1_0_sh_mask.h"
42 #include "athub/athub_1_0_offset.h"
43 #include "oss/osssys_4_0_offset.h"
47 #include "soc15_common.h"
48 #include "umc/umc_6_0_sh_mask.h"
50 #include "gfxhub_v1_0.h"
51 #include "mmhub_v1_0.h"
52 #include "athub_v1_0.h"
53 #include "gfxhub_v1_1.h"
54 #include "mmhub_v9_4.h"
58 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
60 #include "amdgpu_ras.h"
61 #include "amdgpu_xgmi.h"
63 /* add these here since we already include dce12 headers and these are for DCN */
64 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
65 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
66 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
67 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
68 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
69 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
70 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
71 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
74 static const char *gfxhub_client_ids
[] = {
90 static const char *mmhub_client_ids_raven
[][2] = {
115 static const char *mmhub_client_ids_renoir
[][2] = {
143 static const char *mmhub_client_ids_vega10
[][2] = {
156 [32+14][0] = "SDMA0",
169 [32+4][1] = "DCEDWB",
172 [32+14][1] = "SDMA1",
175 static const char *mmhub_client_ids_vega12
[][2] = {
188 [32+15][0] = "SDMA0",
198 [32+1][1] = "DCEDWB",
204 [32+15][1] = "SDMA1",
207 static const char *mmhub_client_ids_vega20
[][2] = {
221 [32+12][0] = "UTCL2",
222 [32+14][0] = "SDMA1",
240 [32+14][1] = "SDMA1",
243 static const char *mmhub_client_ids_arcturus
[][2] = {
250 [32+15][0] = "SDMA1",
251 [64+15][0] = "SDMA2",
252 [96+15][0] = "SDMA3",
253 [128+15][0] = "SDMA4",
254 [160+11][0] = "JPEG",
256 [160+13][0] = "VCNU",
257 [160+15][0] = "SDMA5",
258 [192+10][0] = "UTCL2",
259 [192+11][0] = "JPEG1",
260 [192+12][0] = "VCN1",
261 [192+13][0] = "VCN1U",
262 [192+15][0] = "SDMA6",
263 [224+15][0] = "SDMA7",
271 [32+15][1] = "SDMA1",
272 [64+15][1] = "SDMA2",
273 [96+15][1] = "SDMA3",
274 [128+15][1] = "SDMA4",
275 [160+11][1] = "JPEG",
277 [160+13][1] = "VCNU",
278 [160+15][1] = "SDMA5",
279 [192+11][1] = "JPEG1",
280 [192+12][1] = "VCN1",
281 [192+13][1] = "VCN1U",
282 [192+15][1] = "SDMA6",
283 [224+15][1] = "SDMA7",
286 static const u32 golden_settings_vega10_hdp
[] =
288 0xf64, 0x0fffffff, 0x00000000,
289 0xf65, 0x0fffffff, 0x00000000,
290 0xf66, 0x0fffffff, 0x00000000,
291 0xf67, 0x0fffffff, 0x00000000,
292 0xf68, 0x0fffffff, 0x00000000,
293 0xf6a, 0x0fffffff, 0x00000000,
294 0xf6b, 0x0fffffff, 0x00000000,
295 0xf6c, 0x0fffffff, 0x00000000,
296 0xf6d, 0x0fffffff, 0x00000000,
297 0xf6e, 0x0fffffff, 0x00000000,
300 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0
[] =
302 SOC15_REG_GOLDEN_VALUE(MMHUB
, 0, mmDAGB1_WRCLI2
, 0x00000007, 0xfe5fe0fa),
303 SOC15_REG_GOLDEN_VALUE(MMHUB
, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0
, 0x00000030, 0x55555565)
306 static const struct soc15_reg_golden golden_settings_athub_1_0_0
[] =
308 SOC15_REG_GOLDEN_VALUE(ATHUB
, 0, mmRPB_ARB_CNTL
, 0x0000ff00, 0x00000800),
309 SOC15_REG_GOLDEN_VALUE(ATHUB
, 0, mmRPB_ARB_CNTL2
, 0x00ff00ff, 0x00080008)
312 static const uint32_t ecc_umc_mcumc_ctrl_addrs
[] = {
313 (0x000143c0 + 0x00000000),
314 (0x000143c0 + 0x00000800),
315 (0x000143c0 + 0x00001000),
316 (0x000143c0 + 0x00001800),
317 (0x000543c0 + 0x00000000),
318 (0x000543c0 + 0x00000800),
319 (0x000543c0 + 0x00001000),
320 (0x000543c0 + 0x00001800),
321 (0x000943c0 + 0x00000000),
322 (0x000943c0 + 0x00000800),
323 (0x000943c0 + 0x00001000),
324 (0x000943c0 + 0x00001800),
325 (0x000d43c0 + 0x00000000),
326 (0x000d43c0 + 0x00000800),
327 (0x000d43c0 + 0x00001000),
328 (0x000d43c0 + 0x00001800),
329 (0x001143c0 + 0x00000000),
330 (0x001143c0 + 0x00000800),
331 (0x001143c0 + 0x00001000),
332 (0x001143c0 + 0x00001800),
333 (0x001543c0 + 0x00000000),
334 (0x001543c0 + 0x00000800),
335 (0x001543c0 + 0x00001000),
336 (0x001543c0 + 0x00001800),
337 (0x001943c0 + 0x00000000),
338 (0x001943c0 + 0x00000800),
339 (0x001943c0 + 0x00001000),
340 (0x001943c0 + 0x00001800),
341 (0x001d43c0 + 0x00000000),
342 (0x001d43c0 + 0x00000800),
343 (0x001d43c0 + 0x00001000),
344 (0x001d43c0 + 0x00001800),
347 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs
[] = {
348 (0x000143e0 + 0x00000000),
349 (0x000143e0 + 0x00000800),
350 (0x000143e0 + 0x00001000),
351 (0x000143e0 + 0x00001800),
352 (0x000543e0 + 0x00000000),
353 (0x000543e0 + 0x00000800),
354 (0x000543e0 + 0x00001000),
355 (0x000543e0 + 0x00001800),
356 (0x000943e0 + 0x00000000),
357 (0x000943e0 + 0x00000800),
358 (0x000943e0 + 0x00001000),
359 (0x000943e0 + 0x00001800),
360 (0x000d43e0 + 0x00000000),
361 (0x000d43e0 + 0x00000800),
362 (0x000d43e0 + 0x00001000),
363 (0x000d43e0 + 0x00001800),
364 (0x001143e0 + 0x00000000),
365 (0x001143e0 + 0x00000800),
366 (0x001143e0 + 0x00001000),
367 (0x001143e0 + 0x00001800),
368 (0x001543e0 + 0x00000000),
369 (0x001543e0 + 0x00000800),
370 (0x001543e0 + 0x00001000),
371 (0x001543e0 + 0x00001800),
372 (0x001943e0 + 0x00000000),
373 (0x001943e0 + 0x00000800),
374 (0x001943e0 + 0x00001000),
375 (0x001943e0 + 0x00001800),
376 (0x001d43e0 + 0x00000000),
377 (0x001d43e0 + 0x00000800),
378 (0x001d43e0 + 0x00001000),
379 (0x001d43e0 + 0x00001800),
382 static const uint32_t ecc_umc_mcumc_status_addrs
[] = {
383 (0x000143c2 + 0x00000000),
384 (0x000143c2 + 0x00000800),
385 (0x000143c2 + 0x00001000),
386 (0x000143c2 + 0x00001800),
387 (0x000543c2 + 0x00000000),
388 (0x000543c2 + 0x00000800),
389 (0x000543c2 + 0x00001000),
390 (0x000543c2 + 0x00001800),
391 (0x000943c2 + 0x00000000),
392 (0x000943c2 + 0x00000800),
393 (0x000943c2 + 0x00001000),
394 (0x000943c2 + 0x00001800),
395 (0x000d43c2 + 0x00000000),
396 (0x000d43c2 + 0x00000800),
397 (0x000d43c2 + 0x00001000),
398 (0x000d43c2 + 0x00001800),
399 (0x001143c2 + 0x00000000),
400 (0x001143c2 + 0x00000800),
401 (0x001143c2 + 0x00001000),
402 (0x001143c2 + 0x00001800),
403 (0x001543c2 + 0x00000000),
404 (0x001543c2 + 0x00000800),
405 (0x001543c2 + 0x00001000),
406 (0x001543c2 + 0x00001800),
407 (0x001943c2 + 0x00000000),
408 (0x001943c2 + 0x00000800),
409 (0x001943c2 + 0x00001000),
410 (0x001943c2 + 0x00001800),
411 (0x001d43c2 + 0x00000000),
412 (0x001d43c2 + 0x00000800),
413 (0x001d43c2 + 0x00001000),
414 (0x001d43c2 + 0x00001800),
417 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device
*adev
,
418 struct amdgpu_irq_src
*src
,
420 enum amdgpu_interrupt_state state
)
422 u32 bits
, i
, tmp
, reg
;
424 /* Devices newer then VEGA10/12 shall have these programming
425 sequences performed by PSP BL */
426 if (adev
->asic_type
>= CHIP_VEGA20
)
432 case AMDGPU_IRQ_STATE_DISABLE
:
433 for (i
= 0; i
< ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs
); i
++) {
434 reg
= ecc_umc_mcumc_ctrl_addrs
[i
];
439 for (i
= 0; i
< ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs
); i
++) {
440 reg
= ecc_umc_mcumc_ctrl_mask_addrs
[i
];
446 case AMDGPU_IRQ_STATE_ENABLE
:
447 for (i
= 0; i
< ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs
); i
++) {
448 reg
= ecc_umc_mcumc_ctrl_addrs
[i
];
453 for (i
= 0; i
< ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs
); i
++) {
454 reg
= ecc_umc_mcumc_ctrl_mask_addrs
[i
];
467 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device
*adev
,
468 struct amdgpu_irq_src
*src
,
470 enum amdgpu_interrupt_state state
)
472 struct amdgpu_vmhub
*hub
;
473 u32 tmp
, reg
, bits
, i
, j
;
475 bits
= VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
476 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
477 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
478 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
479 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
480 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
481 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
;
484 case AMDGPU_IRQ_STATE_DISABLE
:
485 for (j
= 0; j
< adev
->num_vmhubs
; j
++) {
486 hub
= &adev
->vmhub
[j
];
487 for (i
= 0; i
< 16; i
++) {
488 reg
= hub
->vm_context0_cntl
+ i
;
495 case AMDGPU_IRQ_STATE_ENABLE
:
496 for (j
= 0; j
< adev
->num_vmhubs
; j
++) {
497 hub
= &adev
->vmhub
[j
];
498 for (i
= 0; i
< 16; i
++) {
499 reg
= hub
->vm_context0_cntl
+ i
;
513 static int gmc_v9_0_process_interrupt(struct amdgpu_device
*adev
,
514 struct amdgpu_irq_src
*source
,
515 struct amdgpu_iv_entry
*entry
)
517 bool retry_fault
= !!(entry
->src_data
[1] & 0x80);
518 uint32_t status
= 0, cid
= 0, rw
= 0;
519 struct amdgpu_task_info task_info
;
520 struct amdgpu_vmhub
*hub
;
521 const char *mmhub_cid
;
522 const char *hub_name
;
525 addr
= (u64
)entry
->src_data
[0] << 12;
526 addr
|= ((u64
)entry
->src_data
[1] & 0xf) << 44;
528 if (retry_fault
&& amdgpu_gmc_filter_faults(adev
, addr
, entry
->pasid
,
530 return 1; /* This also prevents sending it to KFD */
532 /* If it's the first fault for this address, process it normally */
533 if (retry_fault
&& !in_interrupt() &&
534 amdgpu_vm_handle_fault(adev
, entry
->pasid
, addr
))
535 return 1; /* This also prevents sending it to KFD */
537 if (!printk_ratelimit())
540 if (entry
->client_id
== SOC15_IH_CLIENTID_VMC
) {
542 hub
= &adev
->vmhub
[AMDGPU_MMHUB_0
];
543 } else if (entry
->client_id
== SOC15_IH_CLIENTID_VMC1
) {
545 hub
= &adev
->vmhub
[AMDGPU_MMHUB_1
];
547 hub_name
= "gfxhub0";
548 hub
= &adev
->vmhub
[AMDGPU_GFXHUB_0
];
551 memset(&task_info
, 0, sizeof(struct amdgpu_task_info
));
552 amdgpu_vm_get_task_info(adev
, entry
->pasid
, &task_info
);
555 "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
556 "pasid:%u, for process %s pid %d thread %s pid %d)\n",
557 hub_name
, retry_fault
? "retry" : "no-retry",
558 entry
->src_id
, entry
->ring_id
, entry
->vmid
,
559 entry
->pasid
, task_info
.process_name
, task_info
.tgid
,
560 task_info
.task_name
, task_info
.pid
);
561 dev_err(adev
->dev
, " in page starting at address 0x%016llx from client %d\n",
562 addr
, entry
->client_id
);
564 if (amdgpu_sriov_vf(adev
))
568 * Issue a dummy read to wait for the status register to
569 * be updated to avoid reading an incorrect value due to
570 * the new fast GRBM interface.
572 if (entry
->vmid_src
== AMDGPU_GFXHUB_0
)
573 RREG32(hub
->vm_l2_pro_fault_status
);
575 status
= RREG32(hub
->vm_l2_pro_fault_status
);
576 cid
= REG_GET_FIELD(status
, VM_L2_PROTECTION_FAULT_STATUS
, CID
);
577 rw
= REG_GET_FIELD(status
, VM_L2_PROTECTION_FAULT_STATUS
, RW
);
578 WREG32_P(hub
->vm_l2_pro_fault_cntl
, 1, ~1);
582 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
584 if (hub
== &adev
->vmhub
[AMDGPU_GFXHUB_0
]) {
585 dev_err(adev
->dev
, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
586 cid
>= ARRAY_SIZE(gfxhub_client_ids
) ? "unknown" :
587 gfxhub_client_ids
[cid
],
590 switch (adev
->asic_type
) {
592 mmhub_cid
= mmhub_client_ids_vega10
[cid
][rw
];
595 mmhub_cid
= mmhub_client_ids_vega12
[cid
][rw
];
598 mmhub_cid
= mmhub_client_ids_vega20
[cid
][rw
];
601 mmhub_cid
= mmhub_client_ids_arcturus
[cid
][rw
];
604 mmhub_cid
= mmhub_client_ids_raven
[cid
][rw
];
607 mmhub_cid
= mmhub_client_ids_renoir
[cid
][rw
];
613 dev_err(adev
->dev
, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
614 mmhub_cid
? mmhub_cid
: "unknown", cid
);
616 dev_err(adev
->dev
, "\t MORE_FAULTS: 0x%lx\n",
617 REG_GET_FIELD(status
,
618 VM_L2_PROTECTION_FAULT_STATUS
, MORE_FAULTS
));
619 dev_err(adev
->dev
, "\t WALKER_ERROR: 0x%lx\n",
620 REG_GET_FIELD(status
,
621 VM_L2_PROTECTION_FAULT_STATUS
, WALKER_ERROR
));
622 dev_err(adev
->dev
, "\t PERMISSION_FAULTS: 0x%lx\n",
623 REG_GET_FIELD(status
,
624 VM_L2_PROTECTION_FAULT_STATUS
, PERMISSION_FAULTS
));
625 dev_err(adev
->dev
, "\t MAPPING_ERROR: 0x%lx\n",
626 REG_GET_FIELD(status
,
627 VM_L2_PROTECTION_FAULT_STATUS
, MAPPING_ERROR
));
628 dev_err(adev
->dev
, "\t RW: 0x%x\n", rw
);
632 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs
= {
633 .set
= gmc_v9_0_vm_fault_interrupt_state
,
634 .process
= gmc_v9_0_process_interrupt
,
638 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs
= {
639 .set
= gmc_v9_0_ecc_interrupt_state
,
640 .process
= amdgpu_umc_process_ecc_irq
,
643 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device
*adev
)
645 adev
->gmc
.vm_fault
.num_types
= 1;
646 adev
->gmc
.vm_fault
.funcs
= &gmc_v9_0_irq_funcs
;
648 if (!amdgpu_sriov_vf(adev
)) {
649 adev
->gmc
.ecc_irq
.num_types
= 1;
650 adev
->gmc
.ecc_irq
.funcs
= &gmc_v9_0_ecc_funcs
;
654 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid
,
659 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
,
660 PER_VMID_INVALIDATE_REQ
, 1 << vmid
);
661 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
, FLUSH_TYPE
, flush_type
);
662 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PTES
, 1);
663 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PDE0
, 1);
664 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PDE1
, 1);
665 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PDE2
, 1);
666 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
, INVALIDATE_L1_PTES
, 1);
667 req
= REG_SET_FIELD(req
, VM_INVALIDATE_ENG0_REQ
,
668 CLEAR_PROTECTION_FAULT_STATUS_ADDR
, 0);
674 * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
676 * @adev: amdgpu_device pointer
680 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device
*adev
,
683 return ((vmhub
== AMDGPU_MMHUB_0
||
684 vmhub
== AMDGPU_MMHUB_1
) &&
685 (!amdgpu_sriov_vf(adev
)) &&
686 (!(!(adev
->apu_flags
& AMD_APU_IS_RAVEN2
) &&
687 (adev
->apu_flags
& AMD_APU_IS_PICASSO
))));
690 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device
*adev
,
691 uint8_t vmid
, uint16_t *p_pasid
)
695 value
= RREG32(SOC15_REG_OFFSET(ATHUB
, 0, mmATC_VMID0_PASID_MAPPING
)
697 *p_pasid
= value
& ATC_VMID0_PASID_MAPPING__PASID_MASK
;
699 return !!(value
& ATC_VMID0_PASID_MAPPING__VALID_MASK
);
704 * VMID 0 is the physical GPU addresses as used by the kernel.
705 * VMIDs 1-15 are used for userspace clients and are handled
706 * by the amdgpu vm/hsa code.
710 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
712 * @adev: amdgpu_device pointer
713 * @vmid: vm instance to flush
714 * @flush_type: the flush type
716 * Flush the TLB for the requested page table using certain type.
718 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device
*adev
, uint32_t vmid
,
719 uint32_t vmhub
, uint32_t flush_type
)
721 bool use_semaphore
= gmc_v9_0_use_invalidate_semaphore(adev
, vmhub
);
722 const unsigned eng
= 17;
723 u32 j
, inv_req
, inv_req2
, tmp
;
724 struct amdgpu_vmhub
*hub
;
726 BUG_ON(vmhub
>= adev
->num_vmhubs
);
728 hub
= &adev
->vmhub
[vmhub
];
729 if (adev
->gmc
.xgmi
.num_physical_nodes
&&
730 adev
->asic_type
== CHIP_VEGA20
) {
731 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
732 * heavy-weight TLB flush (type 2), which flushes
733 * both. Due to a race condition with concurrent
734 * memory accesses using the same TLB cache line, we
735 * still need a second TLB flush after this.
737 inv_req
= gmc_v9_0_get_invalidate_req(vmid
, 2);
738 inv_req2
= gmc_v9_0_get_invalidate_req(vmid
, flush_type
);
740 inv_req
= gmc_v9_0_get_invalidate_req(vmid
, flush_type
);
744 /* This is necessary for a HW workaround under SRIOV as well
745 * as GFXOFF under bare metal
747 if (adev
->gfx
.kiq
.ring
.sched
.ready
&&
748 (amdgpu_sriov_runtime(adev
) || !amdgpu_sriov_vf(adev
)) &&
749 down_read_trylock(&adev
->reset_sem
)) {
750 uint32_t req
= hub
->vm_inv_eng0_req
+ hub
->eng_distance
* eng
;
751 uint32_t ack
= hub
->vm_inv_eng0_ack
+ hub
->eng_distance
* eng
;
753 amdgpu_virt_kiq_reg_write_reg_wait(adev
, req
, ack
, inv_req
,
755 up_read(&adev
->reset_sem
);
759 spin_lock(&adev
->gmc
.invalidate_lock
);
762 * It may lose gpuvm invalidate acknowldege state across power-gating
763 * off cycle, add semaphore acquire before invalidation and semaphore
764 * release after invalidation to avoid entering power gated state
768 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
770 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
771 /* a read return value of 1 means semaphore acuqire */
772 tmp
= RREG32_NO_KIQ(hub
->vm_inv_eng0_sem
+
773 hub
->eng_distance
* eng
);
779 if (j
>= adev
->usec_timeout
)
780 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
784 WREG32_NO_KIQ(hub
->vm_inv_eng0_req
+
785 hub
->eng_distance
* eng
, inv_req
);
788 * Issue a dummy read to wait for the ACK register to
789 * be cleared to avoid a false ACK due to the new fast
792 if (vmhub
== AMDGPU_GFXHUB_0
)
793 RREG32_NO_KIQ(hub
->vm_inv_eng0_req
+
794 hub
->eng_distance
* eng
);
796 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
797 tmp
= RREG32_NO_KIQ(hub
->vm_inv_eng0_ack
+
798 hub
->eng_distance
* eng
);
799 if (tmp
& (1 << vmid
))
808 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
811 * add semaphore release after invalidation,
812 * write with 0 means semaphore release
814 WREG32_NO_KIQ(hub
->vm_inv_eng0_sem
+
815 hub
->eng_distance
* eng
, 0);
817 spin_unlock(&adev
->gmc
.invalidate_lock
);
819 if (j
< adev
->usec_timeout
)
822 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
826 * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
828 * @adev: amdgpu_device pointer
829 * @pasid: pasid to be flush
831 * Flush the TLB for the requested pasid.
833 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device
*adev
,
834 uint16_t pasid
, uint32_t flush_type
,
840 uint16_t queried_pasid
;
842 struct amdgpu_ring
*ring
= &adev
->gfx
.kiq
.ring
;
843 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
845 if (amdgpu_in_reset(adev
))
848 if (ring
->sched
.ready
&& down_read_trylock(&adev
->reset_sem
)) {
849 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
850 * heavy-weight TLB flush (type 2), which flushes
851 * both. Due to a race condition with concurrent
852 * memory accesses using the same TLB cache line, we
853 * still need a second TLB flush after this.
855 bool vega20_xgmi_wa
= (adev
->gmc
.xgmi
.num_physical_nodes
&&
856 adev
->asic_type
== CHIP_VEGA20
);
857 /* 2 dwords flush + 8 dwords fence */
858 unsigned int ndw
= kiq
->pmf
->invalidate_tlbs_size
+ 8;
861 ndw
+= kiq
->pmf
->invalidate_tlbs_size
;
863 spin_lock(&adev
->gfx
.kiq
.ring_lock
);
864 /* 2 dwords flush + 8 dwords fence */
865 amdgpu_ring_alloc(ring
, ndw
);
867 kiq
->pmf
->kiq_invalidate_tlbs(ring
,
869 kiq
->pmf
->kiq_invalidate_tlbs(ring
,
870 pasid
, flush_type
, all_hub
);
871 r
= amdgpu_fence_emit_polling(ring
, &seq
, MAX_KIQ_REG_WAIT
);
873 amdgpu_ring_undo(ring
);
874 spin_unlock(&adev
->gfx
.kiq
.ring_lock
);
875 up_read(&adev
->reset_sem
);
879 amdgpu_ring_commit(ring
);
880 spin_unlock(&adev
->gfx
.kiq
.ring_lock
);
881 r
= amdgpu_fence_wait_polling(ring
, seq
, adev
->usec_timeout
);
883 dev_err(adev
->dev
, "wait for kiq fence error: %ld.\n", r
);
884 up_read(&adev
->reset_sem
);
887 up_read(&adev
->reset_sem
);
891 for (vmid
= 1; vmid
< 16; vmid
++) {
893 ret
= gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev
, vmid
,
895 if (ret
&& queried_pasid
== pasid
) {
897 for (i
= 0; i
< adev
->num_vmhubs
; i
++)
898 gmc_v9_0_flush_gpu_tlb(adev
, vmid
,
901 gmc_v9_0_flush_gpu_tlb(adev
, vmid
,
902 AMDGPU_GFXHUB_0
, flush_type
);
912 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring
*ring
,
913 unsigned vmid
, uint64_t pd_addr
)
915 bool use_semaphore
= gmc_v9_0_use_invalidate_semaphore(ring
->adev
, ring
->funcs
->vmhub
);
916 struct amdgpu_device
*adev
= ring
->adev
;
917 struct amdgpu_vmhub
*hub
= &adev
->vmhub
[ring
->funcs
->vmhub
];
918 uint32_t req
= gmc_v9_0_get_invalidate_req(vmid
, 0);
919 unsigned eng
= ring
->vm_inv_eng
;
922 * It may lose gpuvm invalidate acknowldege state across power-gating
923 * off cycle, add semaphore acquire before invalidation and semaphore
924 * release after invalidation to avoid entering power gated state
928 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
930 /* a read return value of 1 means semaphore acuqire */
931 amdgpu_ring_emit_reg_wait(ring
,
932 hub
->vm_inv_eng0_sem
+
933 hub
->eng_distance
* eng
, 0x1, 0x1);
935 amdgpu_ring_emit_wreg(ring
, hub
->ctx0_ptb_addr_lo32
+
936 (hub
->ctx_addr_distance
* vmid
),
937 lower_32_bits(pd_addr
));
939 amdgpu_ring_emit_wreg(ring
, hub
->ctx0_ptb_addr_hi32
+
940 (hub
->ctx_addr_distance
* vmid
),
941 upper_32_bits(pd_addr
));
943 amdgpu_ring_emit_reg_write_reg_wait(ring
, hub
->vm_inv_eng0_req
+
944 hub
->eng_distance
* eng
,
945 hub
->vm_inv_eng0_ack
+
946 hub
->eng_distance
* eng
,
949 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
952 * add semaphore release after invalidation,
953 * write with 0 means semaphore release
955 amdgpu_ring_emit_wreg(ring
, hub
->vm_inv_eng0_sem
+
956 hub
->eng_distance
* eng
, 0);
961 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring
*ring
, unsigned vmid
,
964 struct amdgpu_device
*adev
= ring
->adev
;
967 /* Do nothing because there's no lut register for mmhub1. */
968 if (ring
->funcs
->vmhub
== AMDGPU_MMHUB_1
)
971 if (ring
->funcs
->vmhub
== AMDGPU_GFXHUB_0
)
972 reg
= SOC15_REG_OFFSET(OSSSYS
, 0, mmIH_VMID_0_LUT
) + vmid
;
974 reg
= SOC15_REG_OFFSET(OSSSYS
, 0, mmIH_VMID_0_LUT_MM
) + vmid
;
976 amdgpu_ring_emit_wreg(ring
, reg
, pasid
);
980 * PTE format on VEGA 10:
989 * 47:12 4k physical page base address
999 * PDE format on VEGA 10:
1000 * 63:59 block fragment size
1004 * 47:6 physical base address of PD or PTE
1011 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device
*adev
, uint32_t flags
)
1015 case AMDGPU_VM_MTYPE_DEFAULT
:
1016 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC
);
1017 case AMDGPU_VM_MTYPE_NC
:
1018 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC
);
1019 case AMDGPU_VM_MTYPE_WC
:
1020 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC
);
1021 case AMDGPU_VM_MTYPE_RW
:
1022 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW
);
1023 case AMDGPU_VM_MTYPE_CC
:
1024 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC
);
1025 case AMDGPU_VM_MTYPE_UC
:
1026 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC
);
1028 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC
);
1032 static void gmc_v9_0_get_vm_pde(struct amdgpu_device
*adev
, int level
,
1033 uint64_t *addr
, uint64_t *flags
)
1035 if (!(*flags
& AMDGPU_PDE_PTE
) && !(*flags
& AMDGPU_PTE_SYSTEM
))
1036 *addr
= adev
->vm_manager
.vram_base_offset
+ *addr
-
1037 adev
->gmc
.vram_start
;
1038 BUG_ON(*addr
& 0xFFFF00000000003FULL
);
1040 if (!adev
->gmc
.translate_further
)
1043 if (level
== AMDGPU_VM_PDB1
) {
1044 /* Set the block fragment size */
1045 if (!(*flags
& AMDGPU_PDE_PTE
))
1046 *flags
|= AMDGPU_PDE_BFS(0x9);
1048 } else if (level
== AMDGPU_VM_PDB0
) {
1049 if (*flags
& AMDGPU_PDE_PTE
)
1050 *flags
&= ~AMDGPU_PDE_PTE
;
1052 *flags
|= AMDGPU_PTE_TF
;
1056 static void gmc_v9_0_get_vm_pte(struct amdgpu_device
*adev
,
1057 struct amdgpu_bo_va_mapping
*mapping
,
1060 *flags
&= ~AMDGPU_PTE_EXECUTABLE
;
1061 *flags
|= mapping
->flags
& AMDGPU_PTE_EXECUTABLE
;
1063 *flags
&= ~AMDGPU_PTE_MTYPE_VG10_MASK
;
1064 *flags
|= mapping
->flags
& AMDGPU_PTE_MTYPE_VG10_MASK
;
1066 if (mapping
->flags
& AMDGPU_PTE_PRT
) {
1067 *flags
|= AMDGPU_PTE_PRT
;
1068 *flags
&= ~AMDGPU_PTE_VALID
;
1071 if (adev
->asic_type
== CHIP_ARCTURUS
&&
1072 !(*flags
& AMDGPU_PTE_SYSTEM
) &&
1073 mapping
->bo_va
->is_xgmi
)
1074 *flags
|= AMDGPU_PTE_SNOOPED
;
1077 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device
*adev
)
1079 u32 d1vga_control
= RREG32_SOC15(DCE
, 0, mmD1VGA_CONTROL
);
1082 if (REG_GET_FIELD(d1vga_control
, D1VGA_CONTROL
, D1VGA_MODE_ENABLE
)) {
1083 size
= AMDGPU_VBIOS_VGA_ALLOCATION
;
1087 switch (adev
->asic_type
) {
1090 viewport
= RREG32_SOC15(DCE
, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION
);
1091 size
= (REG_GET_FIELD(viewport
,
1092 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION
, PRI_VIEWPORT_HEIGHT
) *
1093 REG_GET_FIELD(viewport
,
1094 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION
, PRI_VIEWPORT_WIDTH
) *
1101 viewport
= RREG32_SOC15(DCE
, 0, mmSCL0_VIEWPORT_SIZE
);
1102 size
= (REG_GET_FIELD(viewport
, SCL0_VIEWPORT_SIZE
, VIEWPORT_HEIGHT
) *
1103 REG_GET_FIELD(viewport
, SCL0_VIEWPORT_SIZE
, VIEWPORT_WIDTH
) *
1112 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs
= {
1113 .flush_gpu_tlb
= gmc_v9_0_flush_gpu_tlb
,
1114 .flush_gpu_tlb_pasid
= gmc_v9_0_flush_gpu_tlb_pasid
,
1115 .emit_flush_gpu_tlb
= gmc_v9_0_emit_flush_gpu_tlb
,
1116 .emit_pasid_mapping
= gmc_v9_0_emit_pasid_mapping
,
1117 .map_mtype
= gmc_v9_0_map_mtype
,
1118 .get_vm_pde
= gmc_v9_0_get_vm_pde
,
1119 .get_vm_pte
= gmc_v9_0_get_vm_pte
,
1120 .get_vbios_fb_size
= gmc_v9_0_get_vbios_fb_size
,
1123 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device
*adev
)
1125 adev
->gmc
.gmc_funcs
= &gmc_v9_0_gmc_funcs
;
1128 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device
*adev
)
1130 switch (adev
->asic_type
) {
1132 adev
->umc
.funcs
= &umc_v6_0_funcs
;
1135 adev
->umc
.max_ras_err_cnt_per_query
= UMC_V6_1_TOTAL_CHANNEL_NUM
;
1136 adev
->umc
.channel_inst_num
= UMC_V6_1_CHANNEL_INSTANCE_NUM
;
1137 adev
->umc
.umc_inst_num
= UMC_V6_1_UMC_INSTANCE_NUM
;
1138 adev
->umc
.channel_offs
= UMC_V6_1_PER_CHANNEL_OFFSET_VG20
;
1139 adev
->umc
.channel_idx_tbl
= &umc_v6_1_channel_idx_tbl
[0][0];
1140 adev
->umc
.funcs
= &umc_v6_1_funcs
;
1143 adev
->umc
.max_ras_err_cnt_per_query
= UMC_V6_1_TOTAL_CHANNEL_NUM
;
1144 adev
->umc
.channel_inst_num
= UMC_V6_1_CHANNEL_INSTANCE_NUM
;
1145 adev
->umc
.umc_inst_num
= UMC_V6_1_UMC_INSTANCE_NUM
;
1146 adev
->umc
.channel_offs
= UMC_V6_1_PER_CHANNEL_OFFSET_ARCT
;
1147 adev
->umc
.channel_idx_tbl
= &umc_v6_1_channel_idx_tbl
[0][0];
1148 adev
->umc
.funcs
= &umc_v6_1_funcs
;
1155 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device
*adev
)
1157 switch (adev
->asic_type
) {
1159 adev
->mmhub
.funcs
= &mmhub_v9_4_funcs
;
1162 adev
->mmhub
.funcs
= &mmhub_v1_0_funcs
;
1167 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device
*adev
)
1169 adev
->gfxhub
.funcs
= &gfxhub_v1_0_funcs
;
1172 static int gmc_v9_0_early_init(void *handle
)
1174 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1176 gmc_v9_0_set_gmc_funcs(adev
);
1177 gmc_v9_0_set_irq_funcs(adev
);
1178 gmc_v9_0_set_umc_funcs(adev
);
1179 gmc_v9_0_set_mmhub_funcs(adev
);
1180 gmc_v9_0_set_gfxhub_funcs(adev
);
1182 adev
->gmc
.shared_aperture_start
= 0x2000000000000000ULL
;
1183 adev
->gmc
.shared_aperture_end
=
1184 adev
->gmc
.shared_aperture_start
+ (4ULL << 30) - 1;
1185 adev
->gmc
.private_aperture_start
= 0x1000000000000000ULL
;
1186 adev
->gmc
.private_aperture_end
=
1187 adev
->gmc
.private_aperture_start
+ (4ULL << 30) - 1;
1192 static int gmc_v9_0_late_init(void *handle
)
1194 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1197 amdgpu_bo_late_init(adev
);
1199 r
= amdgpu_gmc_allocate_vm_inv_eng(adev
);
1204 * Workaround performance drop issue with VBIOS enables partial
1205 * writes, while disables HBM ECC for vega10.
1207 if (!amdgpu_sriov_vf(adev
) && (adev
->asic_type
== CHIP_VEGA10
)) {
1208 if (!(adev
->ras_features
& (1 << AMDGPU_RAS_BLOCK__UMC
))) {
1209 if (adev
->df
.funcs
->enable_ecc_force_par_wr_rmw
)
1210 adev
->df
.funcs
->enable_ecc_force_par_wr_rmw(adev
, false);
1214 if (adev
->mmhub
.funcs
&& adev
->mmhub
.funcs
->reset_ras_error_count
)
1215 adev
->mmhub
.funcs
->reset_ras_error_count(adev
);
1217 r
= amdgpu_gmc_ras_late_init(adev
);
1221 return amdgpu_irq_get(adev
, &adev
->gmc
.vm_fault
, 0);
1224 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device
*adev
,
1225 struct amdgpu_gmc
*mc
)
1229 if (!amdgpu_sriov_vf(adev
))
1230 base
= adev
->mmhub
.funcs
->get_fb_location(adev
);
1232 /* add the xgmi offset of the physical node */
1233 base
+= adev
->gmc
.xgmi
.physical_node_id
* adev
->gmc
.xgmi
.node_segment_size
;
1234 amdgpu_gmc_vram_location(adev
, mc
, base
);
1235 amdgpu_gmc_gart_location(adev
, mc
);
1236 amdgpu_gmc_agp_location(adev
, mc
);
1237 /* base offset of vram pages */
1238 adev
->vm_manager
.vram_base_offset
= adev
->gfxhub
.funcs
->get_mc_fb_offset(adev
);
1240 /* XXX: add the xgmi offset of the physical node? */
1241 adev
->vm_manager
.vram_base_offset
+=
1242 adev
->gmc
.xgmi
.physical_node_id
* adev
->gmc
.xgmi
.node_segment_size
;
1246 * gmc_v9_0_mc_init - initialize the memory controller driver params
1248 * @adev: amdgpu_device pointer
1250 * Look up the amount of vram, vram width, and decide how to place
1251 * vram and gart within the GPU's physical address space.
1252 * Returns 0 for success.
1254 static int gmc_v9_0_mc_init(struct amdgpu_device
*adev
)
1258 /* size in MB on si */
1259 adev
->gmc
.mc_vram_size
=
1260 adev
->nbio
.funcs
->get_memsize(adev
) * 1024ULL * 1024ULL;
1261 adev
->gmc
.real_vram_size
= adev
->gmc
.mc_vram_size
;
1263 if (!(adev
->flags
& AMD_IS_APU
)) {
1264 r
= amdgpu_device_resize_fb_bar(adev
);
1268 adev
->gmc
.aper_base
= pci_resource_start(adev
->pdev
, 0);
1269 adev
->gmc
.aper_size
= pci_resource_len(adev
->pdev
, 0);
1271 #ifdef CONFIG_X86_64
1272 if (adev
->flags
& AMD_IS_APU
) {
1273 adev
->gmc
.aper_base
= adev
->gfxhub
.funcs
->get_mc_fb_offset(adev
);
1274 adev
->gmc
.aper_size
= adev
->gmc
.real_vram_size
;
1277 /* In case the PCI BAR is larger than the actual amount of vram */
1278 adev
->gmc
.visible_vram_size
= adev
->gmc
.aper_size
;
1279 if (adev
->gmc
.visible_vram_size
> adev
->gmc
.real_vram_size
)
1280 adev
->gmc
.visible_vram_size
= adev
->gmc
.real_vram_size
;
1282 /* set the gart size */
1283 if (amdgpu_gart_size
== -1) {
1284 switch (adev
->asic_type
) {
1285 case CHIP_VEGA10
: /* all engines support GPUVM */
1286 case CHIP_VEGA12
: /* all engines support GPUVM */
1290 adev
->gmc
.gart_size
= 512ULL << 20;
1292 case CHIP_RAVEN
: /* DCE SG support */
1294 adev
->gmc
.gart_size
= 1024ULL << 20;
1298 adev
->gmc
.gart_size
= (u64
)amdgpu_gart_size
<< 20;
1301 gmc_v9_0_vram_gtt_location(adev
, &adev
->gmc
);
1306 static int gmc_v9_0_gart_init(struct amdgpu_device
*adev
)
1310 if (adev
->gart
.bo
) {
1311 WARN(1, "VEGA10 PCIE GART already initialized\n");
1314 /* Initialize common gart structure */
1315 r
= amdgpu_gart_init(adev
);
1318 adev
->gart
.table_size
= adev
->gart
.num_gpu_pages
* 8;
1319 adev
->gart
.gart_pte_flags
= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC
) |
1320 AMDGPU_PTE_EXECUTABLE
;
1321 return amdgpu_gart_table_vram_alloc(adev
);
1325 * gmc_v9_0_save_registers - saves regs
1327 * @adev: amdgpu_device pointer
1329 * This saves potential register values that should be
1330 * restored upon resume
1332 static void gmc_v9_0_save_registers(struct amdgpu_device
*adev
)
1334 if (adev
->asic_type
== CHIP_RAVEN
)
1335 adev
->gmc
.sdpif_register
= RREG32_SOC15(DCE
, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0
);
1338 static int gmc_v9_0_sw_init(void *handle
)
1340 int r
, vram_width
= 0, vram_type
= 0, vram_vendor
= 0;
1341 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1343 adev
->gfxhub
.funcs
->init(adev
);
1345 adev
->mmhub
.funcs
->init(adev
);
1347 spin_lock_init(&adev
->gmc
.invalidate_lock
);
1349 r
= amdgpu_atomfirmware_get_vram_info(adev
,
1350 &vram_width
, &vram_type
, &vram_vendor
);
1351 if (amdgpu_sriov_vf(adev
))
1352 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1353 * and DF related registers is not readable, seems hardcord is the
1354 * only way to set the correct vram_width
1356 adev
->gmc
.vram_width
= 2048;
1357 else if (amdgpu_emu_mode
!= 1)
1358 adev
->gmc
.vram_width
= vram_width
;
1360 if (!adev
->gmc
.vram_width
) {
1361 int chansize
, numchan
;
1363 /* hbm memory channel size */
1364 if (adev
->flags
& AMD_IS_APU
)
1369 numchan
= adev
->df
.funcs
->get_hbm_channel_number(adev
);
1370 adev
->gmc
.vram_width
= numchan
* chansize
;
1373 adev
->gmc
.vram_type
= vram_type
;
1374 adev
->gmc
.vram_vendor
= vram_vendor
;
1375 switch (adev
->asic_type
) {
1377 adev
->num_vmhubs
= 2;
1379 if (adev
->rev_id
== 0x0 || adev
->rev_id
== 0x1) {
1380 amdgpu_vm_adjust_size(adev
, 256 * 1024, 9, 3, 48);
1382 /* vm_size is 128TB + 512GB for legacy 3-level page support */
1383 amdgpu_vm_adjust_size(adev
, 128 * 1024 + 512, 9, 2, 48);
1384 adev
->gmc
.translate_further
=
1385 adev
->vm_manager
.num_level
> 1;
1392 adev
->num_vmhubs
= 2;
1396 * To fulfill 4-level page support,
1397 * vm size is 256TB (48bit), maximum size of Vega10,
1398 * block size 512 (9bit)
1400 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1401 if (amdgpu_sriov_vf(adev
))
1402 amdgpu_vm_adjust_size(adev
, 256 * 1024, 9, 3, 47);
1404 amdgpu_vm_adjust_size(adev
, 256 * 1024, 9, 3, 48);
1407 adev
->num_vmhubs
= 3;
1409 /* Keep the vm size same with Vega20 */
1410 amdgpu_vm_adjust_size(adev
, 256 * 1024, 9, 3, 48);
1416 /* This interrupt is VMC page fault.*/
1417 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_VMC
, VMC_1_0__SRCID__VM_FAULT
,
1418 &adev
->gmc
.vm_fault
);
1422 if (adev
->asic_type
== CHIP_ARCTURUS
) {
1423 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_VMC1
, VMC_1_0__SRCID__VM_FAULT
,
1424 &adev
->gmc
.vm_fault
);
1429 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_UTCL2
, UTCL2_1_0__SRCID__FAULT
,
1430 &adev
->gmc
.vm_fault
);
1435 if (!amdgpu_sriov_vf(adev
)) {
1436 /* interrupt sent to DF. */
1437 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_DF
, 0,
1438 &adev
->gmc
.ecc_irq
);
1443 /* Set the internal MC address mask
1444 * This is the max address of the GPU's
1445 * internal address space.
1447 adev
->gmc
.mc_mask
= 0xffffffffffffULL
; /* 48 bit MC */
1449 r
= dma_set_mask_and_coherent(adev
->dev
, DMA_BIT_MASK(44));
1451 printk(KERN_WARNING
"amdgpu: No suitable DMA available.\n");
1454 adev
->need_swiotlb
= drm_need_swiotlb(44);
1456 if (adev
->gmc
.xgmi
.supported
) {
1457 r
= adev
->gfxhub
.funcs
->get_xgmi_info(adev
);
1462 r
= gmc_v9_0_mc_init(adev
);
1466 amdgpu_gmc_get_vbios_allocations(adev
);
1468 /* Memory manager */
1469 r
= amdgpu_bo_init(adev
);
1473 r
= gmc_v9_0_gart_init(adev
);
1479 * VMID 0 is reserved for System
1480 * amdgpu graphics/compute will use VMIDs 1..n-1
1481 * amdkfd will use VMIDs n..15
1483 * The first KFD VMID is 8 for GPUs with graphics, 3 for
1484 * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1485 * for video processing.
1487 adev
->vm_manager
.first_kfd_vmid
=
1488 adev
->asic_type
== CHIP_ARCTURUS
? 3 : 8;
1490 amdgpu_vm_manager_init(adev
);
1492 gmc_v9_0_save_registers(adev
);
1497 static int gmc_v9_0_sw_fini(void *handle
)
1499 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1501 amdgpu_gmc_ras_fini(adev
);
1502 amdgpu_gem_force_release(adev
);
1503 amdgpu_vm_manager_fini(adev
);
1504 amdgpu_gart_table_vram_free(adev
);
1505 amdgpu_bo_fini(adev
);
1506 amdgpu_gart_fini(adev
);
1511 static void gmc_v9_0_init_golden_registers(struct amdgpu_device
*adev
)
1514 switch (adev
->asic_type
) {
1516 if (amdgpu_sriov_vf(adev
))
1520 soc15_program_register_sequence(adev
,
1521 golden_settings_mmhub_1_0_0
,
1522 ARRAY_SIZE(golden_settings_mmhub_1_0_0
));
1523 soc15_program_register_sequence(adev
,
1524 golden_settings_athub_1_0_0
,
1525 ARRAY_SIZE(golden_settings_athub_1_0_0
));
1530 /* TODO for renoir */
1531 soc15_program_register_sequence(adev
,
1532 golden_settings_athub_1_0_0
,
1533 ARRAY_SIZE(golden_settings_athub_1_0_0
));
1541 * gmc_v9_0_restore_registers - restores regs
1543 * @adev: amdgpu_device pointer
1545 * This restores register values, saved at suspend.
1547 void gmc_v9_0_restore_registers(struct amdgpu_device
*adev
)
1549 if (adev
->asic_type
== CHIP_RAVEN
) {
1550 WREG32_SOC15(DCE
, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0
, adev
->gmc
.sdpif_register
);
1551 WARN_ON(adev
->gmc
.sdpif_register
!=
1552 RREG32_SOC15(DCE
, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0
));
1557 * gmc_v9_0_gart_enable - gart enable
1559 * @adev: amdgpu_device pointer
1561 static int gmc_v9_0_gart_enable(struct amdgpu_device
*adev
)
1565 if (adev
->gart
.bo
== NULL
) {
1566 dev_err(adev
->dev
, "No VRAM object for PCIE GART.\n");
1569 r
= amdgpu_gart_table_vram_pin(adev
);
1573 r
= adev
->gfxhub
.funcs
->gart_enable(adev
);
1577 r
= adev
->mmhub
.funcs
->gart_enable(adev
);
1581 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1582 (unsigned)(adev
->gmc
.gart_size
>> 20),
1583 (unsigned long long)amdgpu_bo_gpu_offset(adev
->gart
.bo
));
1584 adev
->gart
.ready
= true;
1588 static int gmc_v9_0_hw_init(void *handle
)
1590 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1595 /* The sequence of these two function calls matters.*/
1596 gmc_v9_0_init_golden_registers(adev
);
1598 if (adev
->mode_info
.num_crtc
) {
1599 if (adev
->asic_type
!= CHIP_ARCTURUS
) {
1600 /* Lockout access through VGA aperture*/
1601 WREG32_FIELD15(DCE
, 0, VGA_HDP_CONTROL
, VGA_MEMORY_DISABLE
, 1);
1603 /* disable VGA render */
1604 WREG32_FIELD15(DCE
, 0, VGA_RENDER_CONTROL
, VGA_VSTATUS_CNTL
, 0);
1608 amdgpu_device_program_register_sequence(adev
,
1609 golden_settings_vega10_hdp
,
1610 ARRAY_SIZE(golden_settings_vega10_hdp
));
1612 if (adev
->mmhub
.funcs
->update_power_gating
)
1613 adev
->mmhub
.funcs
->update_power_gating(adev
, true);
1615 switch (adev
->asic_type
) {
1617 WREG32_FIELD15(HDP
, 0, HDP_MMHUB_CNTL
, HDP_MMHUB_GCC
, 1);
1623 WREG32_FIELD15(HDP
, 0, HDP_MISC_CNTL
, FLUSH_INVALIDATE_CACHE
, 1);
1625 tmp
= RREG32_SOC15(HDP
, 0, mmHDP_HOST_PATH_CNTL
);
1626 WREG32_SOC15(HDP
, 0, mmHDP_HOST_PATH_CNTL
, tmp
);
1628 WREG32_SOC15(HDP
, 0, mmHDP_NONSURFACE_BASE
, (adev
->gmc
.vram_start
>> 8));
1629 WREG32_SOC15(HDP
, 0, mmHDP_NONSURFACE_BASE_HI
, (adev
->gmc
.vram_start
>> 40));
1631 /* After HDP is initialized, flush HDP.*/
1632 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
1634 if (amdgpu_vm_fault_stop
== AMDGPU_VM_FAULT_STOP_ALWAYS
)
1639 if (!amdgpu_sriov_vf(adev
)) {
1640 adev
->gfxhub
.funcs
->set_fault_enable_default(adev
, value
);
1641 adev
->mmhub
.funcs
->set_fault_enable_default(adev
, value
);
1643 for (i
= 0; i
< adev
->num_vmhubs
; ++i
)
1644 gmc_v9_0_flush_gpu_tlb(adev
, 0, i
, 0);
1646 if (adev
->umc
.funcs
&& adev
->umc
.funcs
->init_registers
)
1647 adev
->umc
.funcs
->init_registers(adev
);
1649 r
= gmc_v9_0_gart_enable(adev
);
1655 * gmc_v9_0_gart_disable - gart disable
1657 * @adev: amdgpu_device pointer
1659 * This disables all VM page table.
1661 static void gmc_v9_0_gart_disable(struct amdgpu_device
*adev
)
1663 adev
->gfxhub
.funcs
->gart_disable(adev
);
1664 adev
->mmhub
.funcs
->gart_disable(adev
);
1665 amdgpu_gart_table_vram_unpin(adev
);
1668 static int gmc_v9_0_hw_fini(void *handle
)
1670 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1672 if (amdgpu_sriov_vf(adev
)) {
1673 /* full access mode, so don't touch any GMC register */
1674 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1678 amdgpu_irq_put(adev
, &adev
->gmc
.ecc_irq
, 0);
1679 amdgpu_irq_put(adev
, &adev
->gmc
.vm_fault
, 0);
1680 gmc_v9_0_gart_disable(adev
);
1685 static int gmc_v9_0_suspend(void *handle
)
1687 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1689 return gmc_v9_0_hw_fini(adev
);
1692 static int gmc_v9_0_resume(void *handle
)
1695 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1697 r
= gmc_v9_0_hw_init(adev
);
1701 amdgpu_vmid_reset_all(adev
);
1706 static bool gmc_v9_0_is_idle(void *handle
)
1708 /* MC is always ready in GMC v9.*/
1712 static int gmc_v9_0_wait_for_idle(void *handle
)
1714 /* There is no need to wait for MC idle in GMC v9.*/
1718 static int gmc_v9_0_soft_reset(void *handle
)
1720 /* XXX for emulation.*/
1724 static int gmc_v9_0_set_clockgating_state(void *handle
,
1725 enum amd_clockgating_state state
)
1727 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1729 adev
->mmhub
.funcs
->set_clockgating(adev
, state
);
1731 athub_v1_0_set_clockgating(adev
, state
);
1736 static void gmc_v9_0_get_clockgating_state(void *handle
, u32
*flags
)
1738 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1740 adev
->mmhub
.funcs
->get_clockgating(adev
, flags
);
1742 athub_v1_0_get_clockgating(adev
, flags
);
1745 static int gmc_v9_0_set_powergating_state(void *handle
,
1746 enum amd_powergating_state state
)
1751 const struct amd_ip_funcs gmc_v9_0_ip_funcs
= {
1753 .early_init
= gmc_v9_0_early_init
,
1754 .late_init
= gmc_v9_0_late_init
,
1755 .sw_init
= gmc_v9_0_sw_init
,
1756 .sw_fini
= gmc_v9_0_sw_fini
,
1757 .hw_init
= gmc_v9_0_hw_init
,
1758 .hw_fini
= gmc_v9_0_hw_fini
,
1759 .suspend
= gmc_v9_0_suspend
,
1760 .resume
= gmc_v9_0_resume
,
1761 .is_idle
= gmc_v9_0_is_idle
,
1762 .wait_for_idle
= gmc_v9_0_wait_for_idle
,
1763 .soft_reset
= gmc_v9_0_soft_reset
,
1764 .set_clockgating_state
= gmc_v9_0_set_clockgating_state
,
1765 .set_powergating_state
= gmc_v9_0_set_powergating_state
,
1766 .get_clockgating_state
= gmc_v9_0_get_clockgating_state
,
1769 const struct amdgpu_ip_block_version gmc_v9_0_ip_block
=
1771 .type
= AMD_IP_BLOCK_TYPE_GMC
,
1775 .funcs
= &gmc_v9_0_ip_funcs
,