nvme: use the block layer for userspace passthrough metadata

[mirror_ubuntu-artful-kernel.git] / drivers / nvme / host / pci.c
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index e5f53f15906915c0842f867677ca5470bf91f68c..aa033f047aaf2564ad5e21b32e5809e8e118f872 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -89,10 +89,12 @@ static struct class *nvme_class;
  
  struct nvme_dev;
  struct nvme_queue;
+struct nvme_iod;
  
  static int __nvme_reset(struct nvme_dev *dev);
  static int nvme_reset(struct nvme_dev *dev);
  static void nvme_process_cq(struct nvme_queue *nvmeq);
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_iod *iod);
  static void nvme_dead_ctrl(struct nvme_dev *dev);
  
  struct async_cmd_info {
@@ -545,18 +547,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
                 kfree(iod);
  }
  
-static int nvme_error_status(u16 status)
-{
-       switch (status & 0x7ff) {
-       case NVME_SC_SUCCESS:
-               return 0;
-       case NVME_SC_CAP_EXCEEDED:
-               return -ENOSPC;
-       default:
-               return -EIO;
-       }
-}
-
  #ifdef CONFIG_BLK_DEV_INTEGRITY
  static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
  {
@@ -655,7 +645,6 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
         struct request *req = iod_get_private(iod);
         struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
         u16 status = le16_to_cpup(&cqe->status) >> 1;
-       bool requeue = false;
         int error = 0;
  
         if (unlikely(status)) {
@@ -663,13 +652,14 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
                     && (jiffies - req->start_time) < req->timeout) {
                         unsigned long flags;
  
-                       requeue = true;
+                       nvme_unmap_data(nvmeq->dev, iod);
+
                         blk_mq_requeue_request(req);
                         spin_lock_irqsave(req->q->queue_lock, flags);
                         if (!blk_queue_stopped(req->q))
                                 blk_mq_kick_requeue_list(req->q);
                         spin_unlock_irqrestore(req->q->queue_lock, flags);
-                       goto release_iod;
+                       return;
                 }
  
                 if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
@@ -692,21 +682,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
                         "completing aborted command with status:%04x\n",
                         error);
  
-release_iod:
-       if (iod->nents) {
-               dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
-                       rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-               if (blk_integrity_rq(req)) {
-                       if (!rq_data_dir(req))
-                               nvme_dif_remap(req, nvme_dif_complete);
-                       dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
-                               rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-               }
-       }
-       nvme_free_iod(nvmeq->dev, iod);
-
-       if (likely(!requeue))
-               blk_mq_complete_request(req, error);
+       nvme_unmap_data(nvmeq->dev, iod);
+       blk_mq_complete_request(req, error);
  }
  
  static bool nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
@@ -837,6 +814,24 @@ out:
         return ret;
  }
  
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_iod *iod)
+{
+       struct request *req = iod_get_private(iod);
+       enum dma_data_direction dma_dir = rq_data_dir(req) ?
+                       DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+       if (iod->nents) {
+               dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+               if (blk_integrity_rq(req)) {
+                       if (!rq_data_dir(req))
+                               nvme_dif_remap(req, nvme_dif_complete);
+                       dma_unmap_sg(dev->dev, iod->meta_sg, 1, dma_dir);
+               }
+       }
+
+       nvme_free_iod(dev, iod);
+}
+
  /*
   * We reuse the small pool to allocate the 16-byte range here as it is not
   * worth having a special pool for these or additional cases to handle freeing
@@ -868,55 +863,6 @@ static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
         return BLK_MQ_RQ_QUEUE_OK;
  }
  
-static void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd)
-{
-       memset(cmnd, 0, sizeof(*cmnd));
-       cmnd->common.opcode = nvme_cmd_flush;
-       cmnd->common.nsid = cpu_to_le32(ns->ns_id);
-}
-
-static void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
-               struct nvme_command *cmnd)
-{
-       u16 control = 0;
-       u32 dsmgmt = 0;
-
-       if (req->cmd_flags & REQ_FUA)
-               control |= NVME_RW_FUA;
-       if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
-               control |= NVME_RW_LR;
-
-       if (req->cmd_flags & REQ_RAHEAD)
-               dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
-
-       memset(cmnd, 0, sizeof(*cmnd));
-       cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
-       cmnd->rw.command_id = req->tag;
-       cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
-       cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
-       cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-
-       if (ns->ms) {
-               switch (ns->pi_type) {
-               case NVME_NS_DPS_PI_TYPE3:
-                       control |= NVME_RW_PRINFO_PRCHK_GUARD;
-                       break;
-               case NVME_NS_DPS_PI_TYPE1:
-               case NVME_NS_DPS_PI_TYPE2:
-                       control |= NVME_RW_PRINFO_PRCHK_GUARD |
-                                       NVME_RW_PRINFO_PRCHK_REF;
-                       cmnd->rw.reftag = cpu_to_le32(
-                                       nvme_block_nr(ns, blk_rq_pos(req)));
-                       break;
-               }
-               if (!blk_integrity_rq(req))
-                       control |= NVME_RW_PRINFO_PRACT;
-       }
-
-       cmnd->rw.control = cpu_to_le16(control);
-       cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
-}
-
  /*
   * NOTE: ns is NULL when called on the admin queue.
   */
@@ -1689,13 +1635,9 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
  
  static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
  {
-       struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
         struct nvme_user_io io;
         struct nvme_command c;
         unsigned length, meta_len;
-       int status, write;
-       dma_addr_t meta_dma = 0;
-       void *meta = NULL;
         void __user *metadata;
  
         if (copy_from_user(&io, uio, sizeof(io)))
@@ -1713,29 +1655,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
         length = (io.nblocks + 1) << ns->lba_shift;
         meta_len = (io.nblocks + 1) * ns->ms;
         metadata = (void __user *)(uintptr_t)io.metadata;
-       write = io.opcode & 1;
  
         if (ns->ext) {
                 length += meta_len;
                 meta_len = 0;
-       }
-       if (meta_len) {
-               if (((io.metadata & 3) || !io.metadata) && !ns->ext)
+       } else if (meta_len) {
+               if ((io.metadata & 3) || !io.metadata)
                         return -EINVAL;
-
-               meta = dma_alloc_coherent(dev->dev, meta_len,
-                                               &meta_dma, GFP_KERNEL);
-
-               if (!meta) {
-                       status = -ENOMEM;
-                       goto unmap;
-               }
-               if (write) {
-                       if (copy_from_user(meta, metadata, meta_len)) {
-                               status = -EFAULT;
-                               goto unmap;
-                       }
-               }
         }
  
         memset(&c, 0, sizeof(c));
@@ -1749,19 +1675,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
         c.rw.reftag = cpu_to_le32(io.reftag);
         c.rw.apptag = cpu_to_le16(io.apptag);
         c.rw.appmask = cpu_to_le16(io.appmask);
-       c.rw.metadata = cpu_to_le64(meta_dma);
  
-       status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
-                       (void __user *)(uintptr_t)io.addr, length, NULL, 0);
- unmap:
-       if (meta) {
-               if (status == NVME_SC_SUCCESS && !write) {
-                       if (copy_to_user(metadata, meta, meta_len))
-                               status = -EFAULT;
-               }
-               dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
-       }
-       return status;
+       return __nvme_submit_user_cmd(ns->queue, &c,
+                       (void __user *)(uintptr_t)io.addr, length,
+                       metadata, meta_len, io.slba, NULL, 0);
  }
  
  static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
@@ -1793,8 +1710,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
         if (cmd.timeout_ms)
                 timeout = msecs_to_jiffies(cmd.timeout_ms);
  
-       status = __nvme_submit_sync_cmd(ns ? ns->queue : ctrl->admin_q, &c,
-                       NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+       status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+                       (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
                         &cmd.result, timeout);
         if (status >= 0) {
                 if (put_user(cmd.result, &ucmd->result))