]> git.proxmox.com Git - pve-kernel-jessie.git/commitdiff
revert buggy NVME setup commit
authorFabian Grünbichler <f.gruenbichler@proxmox.com>
Thu, 19 Jan 2017 14:15:04 +0000 (15:15 +0100)
committerFabian Grünbichler <f.gruenbichler@proxmox.com>
Fri, 20 Jan 2017 12:44:25 +0000 (13:44 +0100)
see https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1626894

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Makefile
nvme-revert-NVMe-only-setup-MSIX-once.patch [new file with mode: 0644]

index dfb2060a2ce444c401cea680b5943155151e0228..bbf92e3e78b0614c10ead5c36834048aaf989a2e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -268,6 +268,7 @@ ${KERNEL_SRC}/README ${KERNEL_CFG_ORG}: ${KERNELSRCTAR}
        cd ${KERNEL_SRC}; patch -p1 < ../cgroup-cpuset-add-cpuset.remap_cpus.patch
        cd ${KERNEL_SRC}; patch -p1 < ../0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch
        cd ${KERNEL_SRC}; patch -p1 < ../0002-Revert-mm-oom-rework-oom-detection.patch
+       cd ${KERNEL_SRC}; patch -p1 < ../nvme-revert-NVMe-only-setup-MSIX-once.patch 
        sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
        touch $@
 
diff --git a/nvme-revert-NVMe-only-setup-MSIX-once.patch b/nvme-revert-NVMe-only-setup-MSIX-once.patch
new file mode 100644 (file)
index 0000000..b46221e
--- /dev/null
@@ -0,0 +1,128 @@
+From af220b3adff164d1b8b89d7d5c8bb741d6195012 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
+Date: Thu, 19 Jan 2017 15:19:46 +0100
+Subject: [PATCH] Revert "UBUNTU: SAUCE: (no-up) NVMe: only setup MSIX once"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This reverts commit 96fce9e4025b96b08bfe5196d3380ab9215cb64b.
+
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/nvme/host/pci.c | 73 ++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 51 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index ae1f695..b9cf5aa 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -1613,7 +1613,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
+       int result, i, vecs, nr_io_queues, size;
+-      nr_io_queues = dev->max_qid + 1;
++      nr_io_queues = num_possible_cpus();
+       result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
+       if (result < 0)
+               return result;
+@@ -1653,7 +1653,45 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
+               adminq->q_db = dev->dbs;
+       }
+-      dev->max_qid = nr_io_queues - 1;
++      /* Deregister the admin queue's interrupt */
++      free_irq(dev->entry[0].vector, adminq);
++
++      /*
++       * If we enable msix early due to not intx, disable it again before
++       * setting up the full range we need.
++       */
++      if (pdev->msi_enabled)
++              pci_disable_msi(pdev);
++      else if (pdev->msix_enabled)
++              pci_disable_msix(pdev);
++
++      for (i = 0; i < nr_io_queues; i++)
++              dev->entry[i].entry = i;
++      vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues);
++      if (vecs < 0) {
++              vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32));
++              if (vecs < 0) {
++                      vecs = 1;
++              } else {
++                      for (i = 0; i < vecs; i++)
++                              dev->entry[i].vector = i + pdev->irq;
++              }
++      }
++
++      /*
++       * Should investigate if there's a performance win from allocating
++       * more queues than interrupt vectors; it might allow the submission
++       * path to scale better, even if the receive path is limited by the
++       * number of interrupts.
++       */
++      nr_io_queues = vecs;
++      dev->max_qid = nr_io_queues;
++
++      result = queue_request_irq(dev, adminq, adminq->irqname);
++      if (result) {
++              adminq->cq_vector = -1;
++              goto free_queues;
++      }
+       /* Free previously allocated queues that are no longer usable */
+       nvme_free_queues(dev, nr_io_queues + 1);
+@@ -1806,7 +1844,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
+ static int nvme_pci_enable(struct nvme_dev *dev)
+ {
+       u64 cap;
+-      int result = -ENOMEM, nr_io_queues, i, vecs;
++      int result = -ENOMEM;
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
+       if (pci_enable_device_mem(pdev))
+@@ -1823,30 +1861,21 @@ static int nvme_pci_enable(struct nvme_dev *dev)
+               goto disable;
+       }
+-      nr_io_queues = num_possible_cpus();
+-
+-      for (i = 0; i < nr_io_queues; i++)
+-              dev->entry[i].entry = i;
+-      vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues);
+-      if (vecs < 0) {
+-              vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32));
+-              if (vecs < 0) {
+-                      result = vecs;
+-                      goto disable;
+-              } else {
+-                      for (i = 0; i < vecs; i++)
+-                              dev->entry[i].vector = i + pdev->irq;
+-              }
++      /*
++       * Some devices and/or platforms don't advertise or work with INTx
++       * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
++       * adjust this later.
++       */
++      if (pci_enable_msix(pdev, dev->entry, 1)) {
++              pci_enable_msi(pdev);
++              dev->entry[0].vector = pdev->irq;
+       }
+-      if (vecs < 2) {
+-              dev_err(dev->ctrl.device, "Failed to get enough MSI/MSIX interrupts\n");
+-              result = -ENOSPC;
++      if (!dev->entry[0].vector) {
++              result = -ENODEV;
+               goto disable;
+       }
+-      dev->max_qid = vecs - 1;
+-
+       cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+       dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
+-- 
+2.1.4
+