nvme/pci: Don't set reserved SQ create flags

[mirror_ubuntu-bionic-kernel.git] / drivers / nvme / host / pci.c
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index ddc51adb594d0ba3df2e800b9247e2b3cb161847..af783a33e93a8016c508dad9cb5625c74662197f 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -326,10 +326,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
         iod->nents = 0;
         iod->length = size;
  
-       if (!(rq->rq_flags & RQF_DONTPREP)) {
-               rq->retries = 0;
-               rq->rq_flags |= RQF_DONTPREP;
-       }
         return BLK_MQ_RQ_QUEUE_OK;
  }
  
@@ -613,10 +609,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
  
         spin_lock_irq(&nvmeq->q_lock);
         if (unlikely(nvmeq->cq_vector < 0)) {
-               if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
-                       ret = BLK_MQ_RQ_QUEUE_BUSY;
-               else
-                       ret = BLK_MQ_RQ_QUEUE_ERROR;
+               ret = BLK_MQ_RQ_QUEUE_ERROR;
                 spin_unlock_irq(&nvmeq->q_lock);
                 goto out_cleanup_iod;
         }
@@ -631,34 +624,12 @@ out_free_cmd:
         return ret;
  }
  
-static void nvme_complete_rq(struct request *req)
+static void nvme_pci_complete_rq(struct request *req)
  {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       struct nvme_dev *dev = iod->nvmeq->dev;
-       int error = 0;
-
-       nvme_unmap_data(dev, req);
-
-       if (unlikely(req->errors)) {
-               if (nvme_req_needs_retry(req, req->errors)) {
-                       req->retries++;
-                       nvme_requeue_req(req);
-                       return;
-               }
-
-               if (blk_rq_is_passthrough(req))
-                       error = req->errors;
-               else
-                       error = nvme_error_status(req->errors);
-       }
-
-       if (unlikely(iod->aborted)) {
-               dev_warn(dev->ctrl.device,
-                       "completing aborted command with status: %04x\n",
-                       req->errors);
-       }
  
-       blk_mq_end_request(req, error);
+       nvme_unmap_data(iod->nvmeq->dev, req);
+       nvme_complete_rq(req);
  }
  
  /* We read the CQE phase first to check if the rest of the entry is valid */
@@ -708,8 +679,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                 }
  
                 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-               nvme_req(req)->result = cqe.result;
-               blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
+               nvme_end_request(req, cqe.status, cqe.result);
         }
  
         if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
@@ -815,7 +785,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
                                                 struct nvme_queue *nvmeq)
  {
         struct nvme_command c;
-       int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+       int flags = NVME_QUEUE_PHYS_CONTIG;
  
         /*
          * Note: we (ab)use the fact the the prp fields survive if no data
@@ -846,9 +816,9 @@ static void abort_endio(struct request *req, int error)
  {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
         struct nvme_queue *nvmeq = iod->nvmeq;
-       u16 status = req->errors;
  
-       dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
+       dev_warn(nvmeq->dev->ctrl.device,
+                "Abort status: 0x%x", nvme_req(req)->status);
         atomic_inc(&nvmeq->dev->ctrl.abort_limit);
         blk_mq_free_request(req);
  }
@@ -872,7 +842,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                          "I/O %d QID %d timeout, disable controller\n",
                          req->tag, nvmeq->qid);
                 nvme_dev_disable(dev, false);
-               req->errors = NVME_SC_CANCELLED;
+               nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                 return BLK_EH_HANDLED;
         }
  
@@ -892,7 +862,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                  * Mark the request as handled, since the inline shutdown
                  * forces all outstanding requests to complete.
                  */
-               req->errors = NVME_SC_CANCELLED;
+               nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                 return BLK_EH_HANDLED;
         }
  
@@ -1041,9 +1011,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
  }
  
  static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-                                                       int depth)
+                                                       int depth, int node)
  {
-       struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+       struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+                                                       node);
         if (!nvmeq)
                 return NULL;
  
@@ -1131,18 +1102,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
         return result;
  }
  
-static struct blk_mq_ops nvme_mq_admin_ops = {
+static const struct blk_mq_ops nvme_mq_admin_ops = {
         .queue_rq       = nvme_queue_rq,
-       .complete       = nvme_complete_rq,
+       .complete       = nvme_pci_complete_rq,
         .init_hctx      = nvme_admin_init_hctx,
         .exit_hctx      = nvme_admin_exit_hctx,
         .init_request   = nvme_admin_init_request,
         .timeout        = nvme_timeout,
  };
  
-static struct blk_mq_ops nvme_mq_ops = {
+static const struct blk_mq_ops nvme_mq_ops = {
         .queue_rq       = nvme_queue_rq,
-       .complete       = nvme_complete_rq,
+       .complete       = nvme_pci_complete_rq,
         .init_hctx      = nvme_init_hctx,
         .init_request   = nvme_init_request,
         .map_queues     = nvme_pci_map_queues,
@@ -1220,7 +1191,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
  
         nvmeq = dev->queues[0];
         if (!nvmeq) {
-               nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+               nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+                                       dev_to_node(dev->dev));
                 if (!nvmeq)
                         return -ENOMEM;
         }
@@ -1312,7 +1284,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
         int ret = 0;
  
         for (i = dev->queue_count; i <= dev->max_qid; i++) {
-               if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+               /* vector == qid - 1, match nvme_create_queue */
+               if (!nvme_alloc_queue(dev, i, dev->q_depth,
+                    pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
                         ret = -ENOMEM;
                         break;
                 }
@@ -1674,21 +1648,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
  static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
  {
         int i, queues;
-       u32 csts = -1;
+       bool dead = true;
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
  
         del_timer_sync(&dev->watchdog_timer);
  
         mutex_lock(&dev->shutdown_lock);
-       if (pci_is_enabled(to_pci_dev(dev->dev))) {
-               nvme_stop_queues(&dev->ctrl);
-               csts = readl(dev->bar + NVME_REG_CSTS);
+       if (pci_is_enabled(pdev)) {
+               u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+               if (dev->ctrl.state == NVME_CTRL_LIVE)
+                       nvme_start_freeze(&dev->ctrl);
+               dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+                       pdev->error_state  != pci_channel_io_normal);
         }
  
+       /*
+        * Give the controller a chance to complete all entered requests if
+        * doing a safe shutdown.
+        */
+       if (!dead && shutdown)
+               nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+       nvme_stop_queues(&dev->ctrl);
+
         queues = dev->online_queues - 1;
         for (i = dev->queue_count - 1; i > 0; i--)
                 nvme_suspend_queue(dev->queues[i]);
  
-       if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+       if (dead) {
                 /* A device might become IO incapable very soon during
                  * probe, before the admin queue is configured. Thus,
                  * queue_count can be 0 here.
@@ -1703,6 +1690,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
  
         blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
         blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+       /*
+        * The driver will not be starting up queues again if shutting down so
+        * must flush all entered requests to their failed completion to avoid
+        * deadlocking blk-mq hot-cpu notifier.
+        */
+       if (shutdown)
+               nvme_start_queues(&dev->ctrl);
         mutex_unlock(&dev->shutdown_lock);
  }
  
@@ -1739,7 +1734,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
         if (dev->ctrl.admin_q)
                 blk_put_queue(dev->ctrl.admin_q);
         kfree(dev->queues);
-       kfree(dev->ctrl.opal_dev);
+       free_opal_dev(dev->ctrl.opal_dev);
         kfree(dev);
  }
  
@@ -1789,14 +1784,17 @@ static void nvme_reset_work(struct work_struct *work)
         if (result)
                 goto out;
  
-       if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
-               dev->ctrl.opal_dev =
-                       init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+       if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
+               if (!dev->ctrl.opal_dev)
+                       dev->ctrl.opal_dev =
+                               init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+               else if (was_suspend)
+                       opal_unlock_from_suspend(dev->ctrl.opal_dev);
+       } else {
+               free_opal_dev(dev->ctrl.opal_dev);
+               dev->ctrl.opal_dev = NULL;
         }
  
-       if (was_suspend)
-               opal_unlock_from_suspend(dev->ctrl.opal_dev);
-
         result = nvme_setup_io_queues(dev);
         if (result)
                 goto out;
@@ -1822,7 +1820,9 @@ static void nvme_reset_work(struct work_struct *work)
                 nvme_remove_namespaces(&dev->ctrl);
         } else {
                 nvme_start_queues(&dev->ctrl);
+               nvme_wait_freeze(&dev->ctrl);
                 nvme_dev_add(dev);
+               nvme_unfreeze(&dev->ctrl);
         }
  
         if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
@@ -2001,8 +2001,10 @@ static void nvme_remove(struct pci_dev *pdev)
  
         pci_set_drvdata(pdev, NULL);
  
-       if (!pci_device_is_present(pdev))
+       if (!pci_device_is_present(pdev)) {
                 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+               nvme_dev_disable(dev, false);
+       }
  
         flush_work(&dev->reset_work);
         nvme_uninit_ctrl(&dev->ctrl);
@@ -2106,13 +2108,13 @@ static const struct pci_error_handlers nvme_err_handler = {
  static const struct pci_device_id nvme_id_table[] = {
         { PCI_VDEVICE(INTEL, 0x0953),
                 .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
         { PCI_VDEVICE(INTEL, 0x0a53),
                 .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
         { PCI_VDEVICE(INTEL, 0x0a54),
                 .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
         { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
                 .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
         { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
@@ -2121,6 +2123,7 @@ static const struct pci_device_id nvme_id_table[] = {
                 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
         { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
         { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
+       { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
         { 0, }
  };
  MODULE_DEVICE_TABLE(pci, nvme_id_table);