]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - drivers/nvme/host/pci.c
nvme/pci: Don't set reserved SQ create flags
[mirror_ubuntu-bionic-kernel.git] / drivers / nvme / host / pci.c
index ddc51adb594d0ba3df2e800b9247e2b3cb161847..af783a33e93a8016c508dad9cb5625c74662197f 100644 (file)
@@ -326,10 +326,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
        iod->nents = 0;
        iod->length = size;
 
-       if (!(rq->rq_flags & RQF_DONTPREP)) {
-               rq->retries = 0;
-               rq->rq_flags |= RQF_DONTPREP;
-       }
        return BLK_MQ_RQ_QUEUE_OK;
 }
 
@@ -613,10 +609,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        spin_lock_irq(&nvmeq->q_lock);
        if (unlikely(nvmeq->cq_vector < 0)) {
-               if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
-                       ret = BLK_MQ_RQ_QUEUE_BUSY;
-               else
-                       ret = BLK_MQ_RQ_QUEUE_ERROR;
+               ret = BLK_MQ_RQ_QUEUE_ERROR;
                spin_unlock_irq(&nvmeq->q_lock);
                goto out_cleanup_iod;
        }
@@ -631,34 +624,12 @@ out_free_cmd:
        return ret;
 }
 
-static void nvme_complete_rq(struct request *req)
+static void nvme_pci_complete_rq(struct request *req)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       struct nvme_dev *dev = iod->nvmeq->dev;
-       int error = 0;
-
-       nvme_unmap_data(dev, req);
-
-       if (unlikely(req->errors)) {
-               if (nvme_req_needs_retry(req, req->errors)) {
-                       req->retries++;
-                       nvme_requeue_req(req);
-                       return;
-               }
-
-               if (blk_rq_is_passthrough(req))
-                       error = req->errors;
-               else
-                       error = nvme_error_status(req->errors);
-       }
-
-       if (unlikely(iod->aborted)) {
-               dev_warn(dev->ctrl.device,
-                       "completing aborted command with status: %04x\n",
-                       req->errors);
-       }
 
-       blk_mq_end_request(req, error);
+       nvme_unmap_data(iod->nvmeq->dev, req);
+       nvme_complete_rq(req);
 }
 
 /* We read the CQE phase first to check if the rest of the entry is valid */
@@ -708,8 +679,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                }
 
                req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-               nvme_req(req)->result = cqe.result;
-               blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
+               nvme_end_request(req, cqe.status, cqe.result);
        }
 
        if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
@@ -815,7 +785,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
                                                struct nvme_queue *nvmeq)
 {
        struct nvme_command c;
-       int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+       int flags = NVME_QUEUE_PHYS_CONTIG;
 
        /*
         * Note: we (ab)use the fact the the prp fields survive if no data
@@ -846,9 +816,9 @@ static void abort_endio(struct request *req, int error)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct nvme_queue *nvmeq = iod->nvmeq;
-       u16 status = req->errors;
 
-       dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
+       dev_warn(nvmeq->dev->ctrl.device,
+                "Abort status: 0x%x", nvme_req(req)->status);
        atomic_inc(&nvmeq->dev->ctrl.abort_limit);
        blk_mq_free_request(req);
 }
@@ -872,7 +842,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                         "I/O %d QID %d timeout, disable controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
-               req->errors = NVME_SC_CANCELLED;
+               nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                return BLK_EH_HANDLED;
        }
 
@@ -892,7 +862,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                 * Mark the request as handled, since the inline shutdown
                 * forces all outstanding requests to complete.
                 */
-               req->errors = NVME_SC_CANCELLED;
+               nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                return BLK_EH_HANDLED;
        }
 
@@ -1041,9 +1011,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-                                                       int depth)
+                                                       int depth, int node)
 {
-       struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+       struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+                                                       node);
        if (!nvmeq)
                return NULL;
 
@@ -1131,18 +1102,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
        return result;
 }
 
-static struct blk_mq_ops nvme_mq_admin_ops = {
+static const struct blk_mq_ops nvme_mq_admin_ops = {
        .queue_rq       = nvme_queue_rq,
-       .complete       = nvme_complete_rq,
+       .complete       = nvme_pci_complete_rq,
        .init_hctx      = nvme_admin_init_hctx,
        .exit_hctx      = nvme_admin_exit_hctx,
        .init_request   = nvme_admin_init_request,
        .timeout        = nvme_timeout,
 };
 
-static struct blk_mq_ops nvme_mq_ops = {
+static const struct blk_mq_ops nvme_mq_ops = {
        .queue_rq       = nvme_queue_rq,
-       .complete       = nvme_complete_rq,
+       .complete       = nvme_pci_complete_rq,
        .init_hctx      = nvme_init_hctx,
        .init_request   = nvme_init_request,
        .map_queues     = nvme_pci_map_queues,
@@ -1220,7 +1191,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
        nvmeq = dev->queues[0];
        if (!nvmeq) {
-               nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+               nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+                                       dev_to_node(dev->dev));
                if (!nvmeq)
                        return -ENOMEM;
        }
@@ -1312,7 +1284,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
        int ret = 0;
 
        for (i = dev->queue_count; i <= dev->max_qid; i++) {
-               if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+               /* vector == qid - 1, match nvme_create_queue */
+               if (!nvme_alloc_queue(dev, i, dev->q_depth,
+                    pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
                        ret = -ENOMEM;
                        break;
                }
@@ -1674,21 +1648,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
        int i, queues;
-       u32 csts = -1;
+       bool dead = true;
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
 
        del_timer_sync(&dev->watchdog_timer);
 
        mutex_lock(&dev->shutdown_lock);
-       if (pci_is_enabled(to_pci_dev(dev->dev))) {
-               nvme_stop_queues(&dev->ctrl);
-               csts = readl(dev->bar + NVME_REG_CSTS);
+       if (pci_is_enabled(pdev)) {
+               u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+               if (dev->ctrl.state == NVME_CTRL_LIVE)
+                       nvme_start_freeze(&dev->ctrl);
+               dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+                       pdev->error_state  != pci_channel_io_normal);
        }
 
+       /*
+        * Give the controller a chance to complete all entered requests if
+        * doing a safe shutdown.
+        */
+       if (!dead && shutdown)
+               nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+       nvme_stop_queues(&dev->ctrl);
+
        queues = dev->online_queues - 1;
        for (i = dev->queue_count - 1; i > 0; i--)
                nvme_suspend_queue(dev->queues[i]);
 
-       if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+       if (dead) {
                /* A device might become IO incapable very soon during
                 * probe, before the admin queue is configured. Thus,
                 * queue_count can be 0 here.
@@ -1703,6 +1690,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
        blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
        blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+       /*
+        * The driver will not be starting up queues again if shutting down so
+        * must flush all entered requests to their failed completion to avoid
+        * deadlocking blk-mq hot-cpu notifier.
+        */
+       if (shutdown)
+               nvme_start_queues(&dev->ctrl);
        mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -1739,7 +1734,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
        if (dev->ctrl.admin_q)
                blk_put_queue(dev->ctrl.admin_q);
        kfree(dev->queues);
-       kfree(dev->ctrl.opal_dev);
+       free_opal_dev(dev->ctrl.opal_dev);
        kfree(dev);
 }
 
@@ -1789,14 +1784,17 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
-       if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
-               dev->ctrl.opal_dev =
-                       init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+       if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
+               if (!dev->ctrl.opal_dev)
+                       dev->ctrl.opal_dev =
+                               init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+               else if (was_suspend)
+                       opal_unlock_from_suspend(dev->ctrl.opal_dev);
+       } else {
+               free_opal_dev(dev->ctrl.opal_dev);
+               dev->ctrl.opal_dev = NULL;
        }
 
-       if (was_suspend)
-               opal_unlock_from_suspend(dev->ctrl.opal_dev);
-
        result = nvme_setup_io_queues(dev);
        if (result)
                goto out;
@@ -1822,7 +1820,9 @@ static void nvme_reset_work(struct work_struct *work)
                nvme_remove_namespaces(&dev->ctrl);
        } else {
                nvme_start_queues(&dev->ctrl);
+               nvme_wait_freeze(&dev->ctrl);
                nvme_dev_add(dev);
+               nvme_unfreeze(&dev->ctrl);
        }
 
        if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
@@ -2001,8 +2001,10 @@ static void nvme_remove(struct pci_dev *pdev)
 
        pci_set_drvdata(pdev, NULL);
 
-       if (!pci_device_is_present(pdev))
+       if (!pci_device_is_present(pdev)) {
                nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+               nvme_dev_disable(dev, false);
+       }
 
        flush_work(&dev->reset_work);
        nvme_uninit_ctrl(&dev->ctrl);
@@ -2106,13 +2108,13 @@ static const struct pci_error_handlers nvme_err_handler = {
 static const struct pci_device_id nvme_id_table[] = {
        { PCI_VDEVICE(INTEL, 0x0953),
                .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
        { PCI_VDEVICE(INTEL, 0x0a53),
                .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
        { PCI_VDEVICE(INTEL, 0x0a54),
                .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
        { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
                .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
        { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
@@ -2121,6 +2123,7 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
+       { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
        { 0, }
 };
 MODULE_DEVICE_TABLE(pci, nvme_id_table);