iod->nents = 0;
iod->length = size;
- if (!(rq->rq_flags & RQF_DONTPREP)) {
- rq->retries = 0;
- rq->rq_flags |= RQF_DONTPREP;
- }
return BLK_MQ_RQ_QUEUE_OK;
}
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
- if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
- ret = BLK_MQ_RQ_QUEUE_BUSY;
- else
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_MQ_RQ_QUEUE_ERROR;
spin_unlock_irq(&nvmeq->q_lock);
goto out_cleanup_iod;
}
return ret;
}
-static void nvme_complete_rq(struct request *req)
+static void nvme_pci_complete_rq(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_dev *dev = iod->nvmeq->dev;
- int error = 0;
-
- nvme_unmap_data(dev, req);
-
- if (unlikely(req->errors)) {
- if (nvme_req_needs_retry(req, req->errors)) {
- req->retries++;
- nvme_requeue_req(req);
- return;
- }
-
- if (blk_rq_is_passthrough(req))
- error = req->errors;
- else
- error = nvme_error_status(req->errors);
- }
-
- if (unlikely(iod->aborted)) {
- dev_warn(dev->ctrl.device,
- "completing aborted command with status: %04x\n",
- req->errors);
- }
- blk_mq_end_request(req, error);
+ nvme_unmap_data(iod->nvmeq->dev, req);
+ nvme_complete_rq(req);
}
/* We read the CQE phase first to check if the rest of the entry is valid */
}
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
- nvme_req(req)->result = cqe.result;
- blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
+ nvme_end_request(req, cqe.status, cqe.result);
}
if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
struct nvme_queue *nvmeq)
{
struct nvme_command c;
- int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+ int flags = NVME_QUEUE_PHYS_CONTIG;
/*
* Note: we (ab)use the fact the the prp fields survive if no data
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
- u16 status = req->errors;
- dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
+ dev_warn(nvmeq->dev->ctrl.device,
+ "Abort status: 0x%x", nvme_req(req)->status);
atomic_inc(&nvmeq->dev->ctrl.abort_limit);
blk_mq_free_request(req);
}
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
- req->errors = NVME_SC_CANCELLED;
+ nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
}
* Mark the request as handled, since the inline shutdown
* forces all outstanding requests to complete.
*/
- req->errors = NVME_SC_CANCELLED;
+ nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
}
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
- int depth)
+ int depth, int node)
{
- struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+ struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+ node);
if (!nvmeq)
return NULL;
return result;
}
-static struct blk_mq_ops nvme_mq_admin_ops = {
+static const struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
- .complete = nvme_complete_rq,
+ .complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
.init_request = nvme_admin_init_request,
.timeout = nvme_timeout,
};
-static struct blk_mq_ops nvme_mq_ops = {
+static const struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
- .complete = nvme_complete_rq,
+ .complete = nvme_pci_complete_rq,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
.map_queues = nvme_pci_map_queues,
nvmeq = dev->queues[0];
if (!nvmeq) {
- nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+ nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+ dev_to_node(dev->dev));
if (!nvmeq)
return -ENOMEM;
}
int ret = 0;
for (i = dev->queue_count; i <= dev->max_qid; i++) {
- if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+ /* vector == qid - 1, match nvme_create_queue */
+ if (!nvme_alloc_queue(dev, i, dev->q_depth,
+ pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
ret = -ENOMEM;
break;
}
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i, queues;
- u32 csts = -1;
+ bool dead = true;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
del_timer_sync(&dev->watchdog_timer);
mutex_lock(&dev->shutdown_lock);
- if (pci_is_enabled(to_pci_dev(dev->dev))) {
- nvme_stop_queues(&dev->ctrl);
- csts = readl(dev->bar + NVME_REG_CSTS);
+ if (pci_is_enabled(pdev)) {
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ if (dev->ctrl.state == NVME_CTRL_LIVE)
+ nvme_start_freeze(&dev->ctrl);
+ dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+ pdev->error_state != pci_channel_io_normal);
}
+ /*
+ * Give the controller a chance to complete all entered requests if
+ * doing a safe shutdown.
+ */
+ if (!dead && shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+ nvme_stop_queues(&dev->ctrl);
+
queues = dev->online_queues - 1;
for (i = dev->queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
- if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+ if (dead) {
/* A device might become IO incapable very soon during
* probe, before the admin queue is configured. Thus,
* queue_count can be 0 here.
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+ /*
+ * The driver will not be starting up queues again if shutting down so
+ * must flush all entered requests to their failed completion to avoid
+ * deadlocking blk-mq hot-cpu notifier.
+ */
+ if (shutdown)
+ nvme_start_queues(&dev->ctrl);
mutex_unlock(&dev->shutdown_lock);
}
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
- kfree(dev->ctrl.opal_dev);
+ free_opal_dev(dev->ctrl.opal_dev);
kfree(dev);
}
if (result)
goto out;
- if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
- dev->ctrl.opal_dev =
- init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+ if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
+ if (!dev->ctrl.opal_dev)
+ dev->ctrl.opal_dev =
+ init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+ else if (was_suspend)
+ opal_unlock_from_suspend(dev->ctrl.opal_dev);
+ } else {
+ free_opal_dev(dev->ctrl.opal_dev);
+ dev->ctrl.opal_dev = NULL;
}
- if (was_suspend)
- opal_unlock_from_suspend(dev->ctrl.opal_dev);
-
result = nvme_setup_io_queues(dev);
if (result)
goto out;
nvme_remove_namespaces(&dev->ctrl);
} else {
nvme_start_queues(&dev->ctrl);
+ nvme_wait_freeze(&dev->ctrl);
nvme_dev_add(dev);
+ nvme_unfreeze(&dev->ctrl);
}
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
pci_set_drvdata(pdev, NULL);
- if (!pci_device_is_present(pdev))
+ if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+ nvme_dev_disable(dev, false);
+ }
flush_work(&dev->reset_work);
nvme_uninit_ctrl(&dev->ctrl);
static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0x0953),
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DISCARD_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a53),
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DISCARD_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a54),
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DISCARD_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);