From 0b5db99406f3351352ddd9bdb8760cbf75da9ebe Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 23 May 2019 09:27:35 -0600 Subject: [PATCH] nvme-pci: Use host managed power state for suspend BugLink: https://bugs.launchpad.net/bugs/1808957 The nvme pci driver prepares its devices for power loss during suspend by shutting down the controllers. The power setting is deferred to pci driver's power management before the platform removes power. The suspend-to-idle mode, however, does not remove power. NVMe devices that implement host managed power settings can achieve lower power and better transition latencies than using generic PCI power settings. Try to use this feature if the platform is not involved with the suspend. If successful, restore the previous power state on resume. Cc: Mario Limonciello Cc: Kai Heng Feng Tested-by: Kai-Heng Feng Tested-by: Mario Limonciello Reviewed-by: Rafael J. Wysocki Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sagi Grimberg (backported from commit a0805317252ad9cf09d4a32b0435e165580adf8a git://git.infradead.org/nvme nvme-5.3) Signed-off-by: Kai-Heng Feng Acked-by: Timo Aaltonen Acked-by: Stefan Bader Signed-off-by: Kleber Sacilotto de Souza --- drivers/nvme/host/pci.c | 95 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c108e603c66c..427239f3ad36 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,7 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; + u32 last_ps; /* shadow doorbell buffer support: */ u32 *dbbuf_dbs; @@ -2615,16 +2617,93 @@ static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs) } #ifdef CONFIG_PM_SLEEP +static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps) +{ + return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps); +} + +static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps) +{ + return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL); +} + +static int nvme_resume(struct device *dev) +{ + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); + struct nvme_ctrl *ctrl = &ndev->ctrl; + + if (pm_resume_via_firmware() || !ctrl->npss || + nvme_set_power_state(ctrl, ndev->last_ps) != 0) + nvme_reset_ctrl(ctrl); + return 0; +} + static int nvme_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); + struct nvme_ctrl *ctrl = &ndev->ctrl; + int ret = -EBUSY; + + /* + * The platform does not remove power for a kernel managed suspend so + * use host managed nvme power settings for lowest idle power if + * possible. This should have quicker resume latency than a full device + * shutdown. But if the firmware is involved after the suspend or the + * device does not support any non-default power states, shut down the + * device fully. + */ + if (pm_suspend_via_firmware() || !ctrl->npss) { + nvme_dev_disable(ndev, true); + return 0; + } + + nvme_start_freeze(ctrl); + nvme_wait_freeze(ctrl); + nvme_sync_queues(ctrl); + + if (ctrl->state != NVME_CTRL_LIVE) + goto unfreeze; + + ndev->last_ps = 0; + ret = nvme_get_power_state(ctrl, &ndev->last_ps); + if (ret < 0) + goto unfreeze; + + ret = nvme_set_power_state(ctrl, ctrl->npss); + if (ret < 0) + goto unfreeze; + + if (ret) { + /* + * Clearing npss forces a controller reset on resume. The + * correct value will be resdicovered then. + */ + nvme_dev_disable(ndev, true); + ctrl->npss = 0; + ret = 0; + goto unfreeze; + } + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); +unfreeze: + nvme_unfreeze(ctrl); + return ret; +} + +static int nvme_simple_suspend(struct device *dev) +{ + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); nvme_dev_disable(ndev, true); return 0; } -static int nvme_resume(struct device *dev) +static int nvme_simple_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); @@ -2632,9 +2711,19 @@ static int nvme_resume(struct device *dev) nvme_reset_ctrl(&ndev->ctrl); return 0; } -#endif -static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); +const struct dev_pm_ops nvme_dev_pm_ops = { + .suspend = nvme_suspend, + .resume = nvme_resume, + .freeze = nvme_simple_suspend, + .thaw = nvme_simple_resume, + .poweroff = nvme_simple_suspend, + .restore = nvme_simple_resume, +}; + +#else +#define nvme_dev_pm_ops NULL +#endif static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, pci_channel_state_t state) -- 2.39.2