From 0cc9e952e6efa1f6f2597a305ea20d4b6ecc7573 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 16:07:22 +0800 Subject: [PATCH] drm/amd/pm: Use separate metric table for APU Use separate metric table for APU and Non APU systems for smu_v_13_0_6 to get metric data Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 90 ++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 124 ++++++++++-------- 2 files changed, 156 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index fef2d290f3f2..8f166aa3043c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -219,7 +219,95 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated -} MetricsTable_t; +} MetricsTableX_t; + +typedef struct __attribute__((packed, aligned(4))) { + uint32_t AccumulationCounter; + + //TEMPERATURE + uint32_t MaxSocketTemperature; + uint32_t MaxVrTemperature; + uint32_t MaxHbmTemperature; + uint64_t MaxSocketTemperatureAcc; + uint64_t MaxVrTemperatureAcc; + uint64_t MaxHbmTemperatureAcc; + + //POWER + uint32_t SocketPowerLimit; + uint32_t MaxSocketPowerLimit; + uint32_t SocketPower; + + //ENERGY + uint64_t Timestamp; + uint64_t SocketEnergyAcc; + uint64_t CcdEnergyAcc; + uint64_t XcdEnergyAcc; + uint64_t AidEnergyAcc; + uint64_t HbmEnergyAcc; + + //FREQUENCY + uint32_t CclkFrequencyLimit; + uint32_t GfxclkFrequencyLimit; + uint32_t FclkFrequency; + uint32_t UclkFrequency; + uint32_t SocclkFrequency[4]; + uint32_t VclkFrequency[4]; + uint32_t DclkFrequency[4]; + uint32_t LclkFrequency[4]; + uint64_t GfxclkFrequencyAcc[8]; + uint64_t CclkFrequencyAcc[96]; + + //FREQUENCY RANGE + uint32_t MaxCclkFrequency; + uint32_t MinCclkFrequency; + uint32_t MaxGfxclkFrequency; + uint32_t MinGfxclkFrequency; + uint32_t FclkFrequencyTable[4]; + uint32_t UclkFrequencyTable[4]; + uint32_t SocclkFrequencyTable[4]; + uint32_t VclkFrequencyTable[4]; + uint32_t DclkFrequencyTable[4]; + uint32_t LclkFrequencyTable[4]; + uint32_t MaxLclkDpmRange; + uint32_t MinLclkDpmRange; + + //XGMI + uint32_t XgmiWidth; + uint32_t XgmiBitrate; + uint64_t XgmiReadBandwidthAcc[8]; + uint64_t XgmiWriteBandwidthAcc[8]; + + //ACTIVITY + uint32_t SocketC0Residency; + uint32_t SocketGfxBusy; + uint32_t DramBandwidthUtilization; + uint64_t SocketC0ResidencyAcc; + uint64_t SocketGfxBusyAcc; + uint64_t DramBandwidthAcc; + uint32_t MaxDramBandwidth; + uint64_t DramBandwidthUtilizationAcc; + uint64_t PcieBandwidthAcc[4]; + + //THROTTLERS + uint32_t ProchotResidencyAcc; + uint32_t PptResidencyAcc; + uint32_t SocketThmResidencyAcc; + uint32_t VrThmResidencyAcc; + uint32_t HbmThmResidencyAcc; + uint32_t GfxLockXCDMak; + + // New Items at end to maintain driver compatibility + uint32_t GfxclkFrequency[8]; + + //PSNs + uint64_t PublicSerialNumber_AID[4]; + uint64_t PublicSerialNumber_XCD[8]; + uint64_t PublicSerialNumber_CCD[12]; + + //XGMI Data tranfser size + uint64_t XgmiReadDataSizeAcc[8];//in KByte + uint64_t XgmiWriteDataSizeAcc[8];//in KByte +} MetricsTableA_t; #define SMU_VF_METRICS_TABLE_VERSION 0x3 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 900a2d9e6d85..17bc5ffbfd97 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -245,6 +245,8 @@ struct PPTable_t { #define SMUQ10_TO_UINT(x) ((x) >> 10) #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) +#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\ + (metrics_a->field) : (metrics_x->field)) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -327,7 +329,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t), + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, + max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); @@ -335,7 +338,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); + smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t), + sizeof(MetricsTableA_t)), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; smu_table->metrics_time = 0; @@ -431,9 +435,11 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu, static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; + struct amdgpu_device *adev = smu->adev; int ret, i, retry = 100; /* Store one-time values in driver PPTable */ @@ -444,7 +450,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return ret; /* Ensure that metrics have been updated */ - if (metrics->AccumulationCounter) + if (GET_METRIC_FIELD(AccumulationCounter)) break; usleep_range(1000, 1100); @@ -454,29 +460,29 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return -ETIME; pptable->MaxSocketPowerLimit = - SMUQ10_ROUND(metrics->MaxSocketPowerLimit); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit)); pptable->MaxGfxclkFrequency = - SMUQ10_ROUND(metrics->MaxGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency)); pptable->MinGfxclkFrequency = - SMUQ10_ROUND(metrics->MinGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency)); for (i = 0; i < 4; ++i) { pptable->FclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->FclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]); pptable->UclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->UclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]); pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( - metrics->SocclkFrequencyTable[i]); + GET_METRIC_FIELD(SocclkFrequencyTable)[i]); pptable->VclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->VclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]); pptable->DclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->DclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]); pptable->LclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->LclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]); } /* use AID0 serial number by default */ - pptable->PublicSerialNumber_AID = metrics->PublicSerialNumber_AID[0]; + pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0]; pptable->Init = true; } @@ -778,7 +784,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, uint32_t *value) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -793,50 +800,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_GFXCLK: if (smu->smc_fw_version >= 0x552F00) { xcc_id = GET_INST(GC, 0); - *value = SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); } else { *value = 0; } break; case METRICS_CURR_SOCCLK: case METRICS_AVERAGE_SOCCLK: - *value = SMUQ10_ROUND(metrics->SocclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]); break; case METRICS_CURR_UCLK: case METRICS_AVERAGE_UCLK: - *value = SMUQ10_ROUND(metrics->UclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); break; case METRICS_CURR_VCLK: - *value = SMUQ10_ROUND(metrics->VclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]); break; case METRICS_CURR_DCLK: - *value = SMUQ10_ROUND(metrics->DclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]); break; case METRICS_CURR_FCLK: - *value = SMUQ10_ROUND(metrics->FclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency)); break; case METRICS_AVERAGE_GFXACTIVITY: - *value = SMUQ10_ROUND(metrics->SocketGfxBusy); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); break; case METRICS_AVERAGE_MEMACTIVITY: - *value = SMUQ10_ROUND(metrics->DramBandwidthUtilization); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); break; case METRICS_CURR_SOCKETPOWER: - *value = SMUQ10_ROUND(metrics->SocketPower) << 8; + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = SMUQ10_ROUND(metrics->MaxSocketTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = SMUQ10_ROUND(metrics->MaxHbmTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; /* This is the max of all VRs and not just SOC VR. * No need to define another data type for the same. */ case METRICS_TEMPERATURE_VRSOC: - *value = SMUQ10_ROUND(metrics->MaxVrTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; default: @@ -2026,63 +2033,66 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table; struct amdgpu_device *adev = smu->adev; int ret = 0, xcc_id, inst, i; - MetricsTable_t *metrics; + MetricsTableX_t *metrics_x; + MetricsTableA_t *metrics_a; u16 link_width_level; - metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); - ret = smu_v13_0_6_get_metrics_table(smu, metrics, true); + metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); + ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); if (ret) { - kfree(metrics); + kfree(metrics_x); return ret; } + metrics_a = (MetricsTableA_t *)metrics_x; + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4); gpu_metrics->temperature_hotspot = - SMUQ10_ROUND(metrics->MaxSocketTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); /* Individual HBM stack temperature is not reported */ gpu_metrics->temperature_mem = - SMUQ10_ROUND(metrics->MaxHbmTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)); /* Reports max temperature of all voltage rails */ gpu_metrics->temperature_vrsoc = - SMUQ10_ROUND(metrics->MaxVrTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)); gpu_metrics->average_gfx_activity = - SMUQ10_ROUND(metrics->SocketGfxBusy); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); gpu_metrics->average_umc_activity = - SMUQ10_ROUND(metrics->DramBandwidthUtilization); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); gpu_metrics->curr_socket_power = - SMUQ10_ROUND(metrics->SocketPower); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)); /* Energy counter reported in 15.259uJ (2^-16) units */ - gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc; + gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc); for (i = 0; i < MAX_GFX_CLKS; i++) { xcc_id = GET_INST(GC, i); if (xcc_id >= 0) gpu_metrics->current_gfxclk[i] = - SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); if (i < MAX_CLKS) { gpu_metrics->current_socclk[i] = - SMUQ10_ROUND(metrics->SocclkFrequency[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]); inst = GET_INST(VCN, i); if (inst >= 0) { gpu_metrics->current_vclk0[i] = - SMUQ10_ROUND(metrics->VclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]); gpu_metrics->current_dclk0[i] = - SMUQ10_ROUND(metrics->DclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]); } } } - gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency); + gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); /* Throttle status is not reported through metrics now */ gpu_metrics->throttle_status = 0; /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ - gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0); + gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0); if (!(adev->flags & AMD_IS_APU)) { link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); @@ -2094,38 +2104,38 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->pcie_link_speed = smu_v13_0_6_get_current_pcie_link_speed(smu); gpu_metrics->pcie_bandwidth_acc = - SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]); gpu_metrics->pcie_bandwidth_inst = - SMUQ10_ROUND(metrics->PcieBandwidth[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidth[0]); gpu_metrics->pcie_l0_to_recov_count_acc = - metrics->PCIeL0ToRecoveryCountAcc; + metrics_x->PCIeL0ToRecoveryCountAcc; gpu_metrics->pcie_replay_count_acc = - metrics->PCIenReplayAAcc; + metrics_x->PCIenReplayAAcc; gpu_metrics->pcie_replay_rover_count_acc = - metrics->PCIenReplayARolloverCountAcc; + metrics_x->PCIenReplayARolloverCountAcc; } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); gpu_metrics->gfx_activity_acc = - SMUQ10_ROUND(metrics->SocketGfxBusyAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc)); gpu_metrics->mem_activity_acc = - SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc)); for (i = 0; i < NUM_XGMI_LINKS; i++) { gpu_metrics->xgmi_read_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]); gpu_metrics->xgmi_write_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate); + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); + gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); - gpu_metrics->firmware_timestamp = metrics->Timestamp; + gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp); *table = (void *)gpu_metrics; - kfree(metrics); + kfree(metrics_x); return sizeof(*gpu_metrics); } -- 2.39.5