]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
habanalabs: set max power according to card type
authorOded Gabbay <oded.gabbay@gmail.com>
Sat, 8 Aug 2020 20:34:47 +0000 (23:34 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Sat, 22 Aug 2020 09:47:57 +0000 (12:47 +0300)
In Gaudi, the default max power setting is different between PCI and PMC
cards. Therefore, the driver need to set the default after knowing what is
the card type.

The current code has a bug where it limits the maximum power of the PMC
card to 200W after a reset occurs.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/sysfs.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/gaudi/gaudiP.h

index be16b75bdfdb5bbd0ebb54b550a2a4fcfe36e76e..8e34c39380a9ba481c98ef91d59a78e12fd92bc4 100644 (file)
@@ -1069,7 +1069,7 @@ again:
                        goto out_err;
                }
 
-               hl_set_max_power(hdev, hdev->max_power);
+               hl_set_max_power(hdev);
        } else {
                rc = hdev->asic_funcs->soft_reset_late_init(hdev);
                if (rc) {
@@ -1318,6 +1318,11 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
                goto out_disabled;
        }
 
+       /* Need to call this again because the max power might change,
+        * depending on card type for certain ASICs
+        */
+       hl_set_max_power(hdev);
+
        /*
         * hl_hwmon_init() must be called after device_late_init(), because only
         * there we get the information from the device about which
index 13c18f3d9a9b27b095f5cb65b18cee857a3520c8..edbd627b29d25749e6024223afbb1874c6ee69f6 100644 (file)
@@ -1462,6 +1462,8 @@ struct hl_device_idle_busy_ts {
  *                     details.
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
+ * @card_type: Various ASICs have several card types. This indicates the card
+ *             type of the current device.
  * @cs_active_cnt: number of active command submissions on this device (active
  *                 means already in H/W queues)
  * @major: habanalabs kernel driver major.
@@ -1566,6 +1568,7 @@ struct hl_device {
        u64                             clock_gating_mask;
        atomic_t                        in_reset;
        enum hl_pll_frequency           curr_pll_profile;
+       enum armcp_card_types           card_type;
        int                             cs_active_cnt;
        u32                             major;
        u32                             high_pll;
@@ -1858,7 +1861,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
 void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
                        long value);
 u64 hl_get_max_power(struct hl_device *hdev);
-void hl_set_max_power(struct hl_device *hdev, u64 value);
+void hl_set_max_power(struct hl_device *hdev);
 int hl_set_voltage(struct hl_device *hdev,
                        int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev,
index b3cb0ac4721c5bb3d74b7d24d9371e97de8df03c..5ae484cc84cd4fa1087d349ffdd99af00c2cd1f0 100644 (file)
@@ -81,7 +81,7 @@ u64 hl_get_max_power(struct hl_device *hdev)
        return result;
 }
 
-void hl_set_max_power(struct hl_device *hdev, u64 value)
+void hl_set_max_power(struct hl_device *hdev)
 {
        struct armcp_packet pkt;
        int rc;
@@ -90,7 +90,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value)
 
        pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
                                ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.value = cpu_to_le64(value);
+       pkt.value = cpu_to_le64(hdev->max_power);
 
        rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
                                                0, NULL);
@@ -316,7 +316,7 @@ static ssize_t max_power_store(struct device *dev,
        }
 
        hdev->max_power = value;
-       hl_set_max_power(hdev, value);
+       hl_set_max_power(hdev);
 
 out:
        return count;
@@ -422,6 +422,7 @@ int hl_sysfs_init(struct hl_device *hdev)
                hdev->pm_mng_profile = PM_AUTO;
        else
                hdev->pm_mng_profile = PM_MANUAL;
+
        hdev->max_power = hdev->asic_prop.max_power_default;
 
        hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
index 7e0f9f64ffcbe4a5e9690a58542a97f682bd8149..4009b7df4cafec33ae1dc8bdb7090e18def1224a 100644 (file)
@@ -456,7 +456,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
        prop->num_of_events = GAUDI_EVENT_SIZE;
        prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 
-       prop->max_power_default = MAX_POWER_DEFAULT;
+       prop->max_power_default = MAX_POWER_DEFAULT_PCI;
 
        prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
        prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
@@ -6055,6 +6055,15 @@ static int gaudi_armcp_info_get(struct hl_device *hdev)
                strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
                                CARD_NAME_MAX_LEN);
 
+       hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type);
+
+       if (hdev->card_type == armcp_card_type_pci)
+               prop->max_power_default = MAX_POWER_DEFAULT_PCI;
+       else if (hdev->card_type == armcp_card_type_pmc)
+               prop->max_power_default = MAX_POWER_DEFAULT_PMC;
+
+       hdev->max_power = prop->max_power_default;
+
        return 0;
 }
 
index 5dc99f6f02963495252676d51c55d0af90f7cc43..82137c3f3e2e6415343dc2aa8ccc7f17d7e404df 100644 (file)
@@ -41,7 +41,8 @@
 
 #define GAUDI_MAX_CLK_FREQ             2200000000ull   /* 2200 MHz */
 
-#define MAX_POWER_DEFAULT              200000          /* 200W */
+#define MAX_POWER_DEFAULT_PCI          200000          /* 200W */
+#define MAX_POWER_DEFAULT_PMC          350000          /* 350W */
 
 #define GAUDI_CPU_TIMEOUT_USEC         15000000        /* 15s */