]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Jul 2017 22:36:52 +0000 (15:36 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Jul 2017 22:36:52 +0000 (15:36 -0700)
Pull more block updates from Jens Axboe:
 "This is a followup for block changes, that didn't make the initial
  pull request. It's a bit of a mixed bag, this contains:

   - A followup pull request from Sagi for NVMe. Outside of fixups for
     NVMe, it also includes a series for ensuring that we properly
     quiesce hardware queues when browsing live tags.

   - Set of integrity fixes from Dmitry (mostly), fixing various issues
     for folks using DIF/DIX.

   - Fix for a bug introduced in cciss, with the req init changes. From
     Christoph.

   - Fix for a bug in BFQ, from Paolo.

   - Two followup fixes for lightnvm/pblk from Javier.

   - Depth fix from Ming for blk-mq-sched.

   - Also from Ming, performance fix for mtip32xx that was introduced
     with the dynamic initialization of commands"

* 'for-linus' of git://git.kernel.dk/linux-block: (44 commits)
  block: call bio_uninit in bio_endio
  nvmet: avoid unneeded assignment of submit_bio return value
  nvme-pci: add module parameter for io queue depth
  nvme-pci: compile warnings in nvme_alloc_host_mem()
  nvmet_fc: Accept variable pad lengths on Create Association LS
  nvme_fc/nvmet_fc: revise Create Association descriptor length
  lightnvm: pblk: remove unnecessary checks
  lightnvm: pblk: control I/O flow also on tear down
  cciss: initialize struct scsi_req
  null_blk: fix error flow for shared tags during module_init
  block: Fix __blkdev_issue_zeroout loop
  nvme-rdma: unconditionally recycle the request mr
  nvme: split nvme_uninit_ctrl into stop and uninit
  virtio_blk: quiesce/unquiesce live IO when entering PM states
  mtip32xx: quiesce request queues to make sure no submissions are inflight
  nbd: quiesce request queues to make sure no submissions are inflight
  nvme: kick requeue list when requeueing a request instead of when starting the queues
  nvme-pci: quiesce/unquiesce admin_q instead of start/stop its hw queues
  nvme-loop: quiesce/unquiesce admin_q instead of start/stop its hw queues
  nvme-fc: quiesce/unquiesce admin_q instead of start/stop its hw queues
  ...

40 files changed:
Documentation/block/data-integrity.txt
block/bfq-iosched.c
block/bfq-iosched.h
block/bfq-wf2q.c
block/bio-integrity.c
block/bio.c
block/blk-core.c
block/blk-lib.c
block/blk-mq-sched.c
block/blk-mq.c
block/blk.h
block/t10-pi.c
drivers/block/cciss.c
drivers/block/mtip32xx/mtip32xx.c
drivers/block/mtip32xx/mtip32xx.h
drivers/block/nbd.c
drivers/block/null_blk.c
drivers/block/virtio_blk.c
drivers/lightnvm/pblk-core.c
drivers/lightnvm/pblk-recovery.c
drivers/lightnvm/pblk-write.c
drivers/lightnvm/pblk.h
drivers/md/dm.c
drivers/nvdimm/blk.c
drivers/nvdimm/btt.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/fc.c
drivers/nvme/target/io-cmd.c
drivers/nvme/target/loop.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/target/target_core_sbc.c
include/linux/bio.h
include/linux/bvec.h
include/linux/nvme-fc.h
include/linux/t10-pi.h

index f56ec97f0d1492c3b5b5ea57bae974cb087b2e7d..934c44ea0c57feca0e4172912083b02cb985a326 100644 (file)
@@ -192,7 +192,7 @@ will require extra work due to the application tag.
     supported by the block device.
 
 
-    int bio_integrity_prep(bio);
+    bool bio_integrity_prep(bio);
 
       To generate IMD for WRITE and to set up buffers for READ, the
       filesystem must call bio_integrity_prep(bio).
@@ -201,9 +201,7 @@ will require extra work due to the application tag.
       sector must be set, and the bio should have all data pages
       added.  It is up to the caller to ensure that the bio does not
       change while I/O is in progress.
-
-      bio_integrity_prep() should only be called if
-      bio_integrity_enabled() returned 1.
+      Complete bio with error if prepare failed for some reson.
 
 
 5.3 PASSING EXISTING INTEGRITY METADATA
index 12bbc6b8657dd686b918f27a3d5f2d7db133d778..60a6835265fc386a229ec96338eb735c7dbea65b 100644 (file)
@@ -3483,11 +3483,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
                        }
                }
        }
-       /* Update weight both if it must be raised and if it must be lowered */
+       /*
+        * To improve latency (for this or other queues), immediately
+        * update weight both if it must be raised and if it must be
+        * lowered. Since, entity may be on some active tree here, and
+        * might have a pending change of its ioprio class, invoke
+        * next function with the last parameter unset (see the
+        * comments on the function).
+        */
        if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
-               __bfq_entity_update_weight_prio(
-                       bfq_entity_service_tree(entity),
-                       entity);
+               __bfq_entity_update_weight_prio(bfq_entity_service_tree(entity),
+                                               entity, false);
 }
 
 /*
index 5c3bf986149215b3d98f753548cba9b4880f6e62..8fd83b885774392fc1368b728621a430ffcec519 100644 (file)
@@ -892,7 +892,8 @@ void bfq_put_idle_entity(struct bfq_service_tree *st,
                         struct bfq_entity *entity);
 struct bfq_service_tree *
 __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
-                               struct bfq_entity *entity);
+                               struct bfq_entity *entity,
+                               bool update_class_too);
 void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
 void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
                          unsigned long time_ms);
index 8726ede19eef2c632a8e53d2bc340a7cae832e6e..5ec05cd42b80725cb297129739da2edf1372a70a 100644 (file)
@@ -694,10 +694,28 @@ struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)
        return sched_data->service_tree + idx;
 }
 
-
+/*
+ * Update weight and priority of entity. If update_class_too is true,
+ * then update the ioprio_class of entity too.
+ *
+ * The reason why the update of ioprio_class is controlled through the
+ * last parameter is as follows. Changing the ioprio class of an
+ * entity implies changing the destination service trees for that
+ * entity. If such a change occurred when the entity is already on one
+ * of the service trees for its previous class, then the state of the
+ * entity would become more complex: none of the new possible service
+ * trees for the entity, according to bfq_entity_service_tree(), would
+ * match any of the possible service trees on which the entity
+ * is. Complex operations involving these trees, such as entity
+ * activations and deactivations, should take into account this
+ * additional complexity.  To avoid this issue, this function is
+ * invoked with update_class_too unset in the points in the code where
+ * entity may happen to be on some tree.
+ */
 struct bfq_service_tree *
 __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
-                               struct bfq_entity *entity)
+                               struct bfq_entity *entity,
+                               bool update_class_too)
 {
        struct bfq_service_tree *new_st = old_st;
 
@@ -739,9 +757,15 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
                                  bfq_weight_to_ioprio(entity->orig_weight);
                }
 
-               if (bfqq)
+               if (bfqq && update_class_too)
                        bfqq->ioprio_class = bfqq->new_ioprio_class;
-               entity->prio_changed = 0;
+
+               /*
+                * Reset prio_changed only if the ioprio_class change
+                * is not pending any longer.
+                */
+               if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
+                       entity->prio_changed = 0;
 
                /*
                 * NOTE: here we may be changing the weight too early,
@@ -867,7 +891,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
 {
        struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
 
-       st = __bfq_entity_update_weight_prio(st, entity);
+       /*
+        * When this function is invoked, entity is not in any service
+        * tree, then it is safe to invoke next function with the last
+        * parameter set (see the comments on the function).
+        */
+       st = __bfq_entity_update_weight_prio(st, entity, true);
        bfq_calc_finish(entity, entity->budget);
 
        /*
index b8a3a65f73641a591ab7918b56c3a003594bee89..83e92beb3c9feb25f3be917e2b675f90b1544b5f 100644 (file)
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio)
+static void bio_integrity_free(struct bio *bio)
 {
        struct bio_integrity_payload *bip = bio_integrity(bio);
        struct bio_set *bs = bio->bi_pool;
@@ -120,8 +120,8 @@ void bio_integrity_free(struct bio *bio)
        }
 
        bio->bi_integrity = NULL;
+       bio->bi_opf &= ~REQ_INTEGRITY;
 }
-EXPORT_SYMBOL(bio_integrity_free);
 
 /**
  * bio_integrity_add_page - Attach integrity metadata
@@ -159,44 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-/**
- * bio_integrity_enabled - Check whether integrity can be passed
- * @bio:       bio to check
- *
- * Description: Determines whether bio_integrity_prep() can be called
- * on this bio or not. bio data direction and target device must be
- * set prior to calling.  The functions honors the write_generate and
- * read_verify flags in sysfs.
- */
-bool bio_integrity_enabled(struct bio *bio)
-{
-       struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
-       if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
-               return false;
-
-       if (!bio_sectors(bio))
-               return false;
-
-       /* Already protected? */
-       if (bio_integrity(bio))
-               return false;
-
-       if (bi == NULL)
-               return false;
-
-       if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
-           (bi->flags & BLK_INTEGRITY_VERIFY))
-               return true;
-
-       if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
-           (bi->flags & BLK_INTEGRITY_GENERATE))
-               return true;
-
-       return false;
-}
-EXPORT_SYMBOL(bio_integrity_enabled);
-
 /**
  * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:                blk_integrity profile for device
@@ -222,10 +184,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 /**
  * bio_integrity_process - Process integrity metadata for a bio
  * @bio:       bio to generate/verify integrity metadata for
+ * @proc_iter:  iterator to process
  * @proc_fn:   Pointer to the relevant processing function
  */
 static blk_status_t bio_integrity_process(struct bio *bio,
-                                integrity_processing_fn *proc_fn)
+               struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
 {
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
        struct blk_integrity_iter iter;
@@ -238,10 +201,10 @@ static blk_status_t bio_integrity_process(struct bio *bio,
 
        iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
        iter.interval = 1 << bi->interval_exp;
-       iter.seed = bip_get_seed(bip);
+       iter.seed = proc_iter->bi_sector;
        iter.prot_buf = prot_buf;
 
-       bio_for_each_segment(bv, bio, bviter) {
+       __bio_for_each_segment(bv, bio, bviter, *proc_iter) {
                void *kaddr = kmap_atomic(bv.bv_page);
 
                iter.data_buf = kaddr + bv.bv_offset;
@@ -262,14 +225,15 @@ static blk_status_t bio_integrity_process(struct bio *bio,
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:       bio to prepare
  *
- * Description: Allocates a buffer for integrity metadata, maps the
- * pages and attaches them to a bio.  The bio must have data
- * direction, target device and start sector set priot to calling.  In
- * the WRITE case, integrity metadata will be generated using the
- * block device's integrity function.  In the READ case, the buffer
+ * Description:  Checks if the bio already has an integrity payload attached.
+ * If it does, the payload has been generated by another kernel subsystem,
+ * and we just pass it through. Otherwise allocates integrity payload.
+ * The bio must have data direction, target device and start sector set priot
+ * to calling.  In the WRITE case, integrity metadata will be generated using
+ * the block device's integrity function.  In the READ case, the buffer
  * will be prepared for DMA and a suitable end_io handler set up.
  */
-int bio_integrity_prep(struct bio *bio)
+bool bio_integrity_prep(struct bio *bio)
 {
        struct bio_integrity_payload *bip;
        struct blk_integrity *bi;
@@ -279,20 +243,41 @@ int bio_integrity_prep(struct bio *bio)
        unsigned int len, nr_pages;
        unsigned int bytes, offset, i;
        unsigned int intervals;
+       blk_status_t status;
 
        bi = bdev_get_integrity(bio->bi_bdev);
        q = bdev_get_queue(bio->bi_bdev);
-       BUG_ON(bi == NULL);
-       BUG_ON(bio_integrity(bio));
+       if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
+               return true;
 
+       if (!bio_sectors(bio))
+               return true;
+
+       /* Already protected? */
+       if (bio_integrity(bio))
+               return true;
+
+       if (bi == NULL)
+               return true;
+
+       if (bio_data_dir(bio) == READ) {
+               if (!bi->profile->verify_fn ||
+                   !(bi->flags & BLK_INTEGRITY_VERIFY))
+                       return true;
+       } else {
+               if (!bi->profile->generate_fn ||
+                   !(bi->flags & BLK_INTEGRITY_GENERATE))
+                       return true;
+       }
        intervals = bio_integrity_intervals(bi, bio_sectors(bio));
 
        /* Allocate kernel buffer for protection data */
        len = intervals * bi->tuple_size;
        buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
+       status = BLK_STS_RESOURCE;
        if (unlikely(buf == NULL)) {
                printk(KERN_ERR "could not allocate integrity buffer\n");
-               return -ENOMEM;
+               goto err_end_io;
        }
 
        end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -304,7 +289,8 @@ int bio_integrity_prep(struct bio *bio)
        if (IS_ERR(bip)) {
                printk(KERN_ERR "could not allocate data integrity bioset\n");
                kfree(buf);
-               return PTR_ERR(bip);
+               status = BLK_STS_RESOURCE;
+               goto err_end_io;
        }
 
        bip->bip_flags |= BIP_BLOCK_INTEGRITY;
@@ -330,7 +316,7 @@ int bio_integrity_prep(struct bio *bio)
                                             bytes, offset);
 
                if (ret == 0)
-                       return 0;
+                       return false;
 
                if (ret < bytes)
                        break;
@@ -340,17 +326,18 @@ int bio_integrity_prep(struct bio *bio)
                offset = 0;
        }
 
-       /* Install custom I/O completion handler if read verify is enabled */
-       if (bio_data_dir(bio) == READ) {
-               bip->bip_end_io = bio->bi_end_io;
-               bio->bi_end_io = bio_integrity_endio;
+       /* Auto-generate integrity metadata if this is a write */
+       if (bio_data_dir(bio) == WRITE) {
+               bio_integrity_process(bio, &bio->bi_iter,
+                                     bi->profile->generate_fn);
        }
+       return true;
 
-       /* Auto-generate integrity metadata if this is a write */
-       if (bio_data_dir(bio) == WRITE)
-               bio_integrity_process(bio, bi->profile->generate_fn);
+err_end_io:
+       bio->bi_status = status;
+       bio_endio(bio);
+       return false;
 
-       return 0;
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
@@ -368,16 +355,26 @@ static void bio_integrity_verify_fn(struct work_struct *work)
                container_of(work, struct bio_integrity_payload, bip_work);
        struct bio *bio = bip->bip_bio;
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+       struct bvec_iter iter = bio->bi_iter;
 
-       bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
+       /*
+        * At the moment verify is called bio's iterator was advanced
+        * during split and completion, we need to rewind iterator to
+        * it's original position.
+        */
+       if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
+               bio->bi_status = bio_integrity_process(bio, &iter,
+                                                      bi->profile->verify_fn);
+       } else {
+               bio->bi_status = BLK_STS_IOERR;
+       }
 
-       /* Restore original bio completion handler */
-       bio->bi_end_io = bip->bip_end_io;
+       bio_integrity_free(bio);
        bio_endio(bio);
 }
 
 /**
- * bio_integrity_endio - Integrity I/O completion function
+ * __bio_integrity_endio - Integrity I/O completion function
  * @bio:       Protected bio
  * @error:     Pointer to errno
  *
@@ -388,27 +385,19 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  * in process context. This function postpones completion
  * accordingly.
  */
-void bio_integrity_endio(struct bio *bio)
+bool __bio_integrity_endio(struct bio *bio)
 {
-       struct bio_integrity_payload *bip = bio_integrity(bio);
-
-       BUG_ON(bip->bip_bio != bio);
+       if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
+               struct bio_integrity_payload *bip = bio_integrity(bio);
 
-       /* In case of an I/O error there is no point in verifying the
-        * integrity metadata.  Restore original bio end_io handler
-        * and run it.
-        */
-       if (bio->bi_status) {
-               bio->bi_end_io = bip->bip_end_io;
-               bio_endio(bio);
-
-               return;
+               INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
+               queue_work(kintegrityd_wq, &bip->bip_work);
+               return false;
        }
 
-       INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
-       queue_work(kintegrityd_wq, &bip->bip_work);
+       bio_integrity_free(bio);
+       return true;
 }
-EXPORT_SYMBOL(bio_integrity_endio);
 
 /**
  * bio_integrity_advance - Advance integrity vector
@@ -425,6 +414,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
        unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
+       bip->bip_iter.bi_sector += bytes_done >> 9;
        bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
 }
 EXPORT_SYMBOL(bio_integrity_advance);
@@ -432,22 +422,15 @@ EXPORT_SYMBOL(bio_integrity_advance);
 /**
  * bio_integrity_trim - Trim integrity vector
  * @bio:       bio whose integrity vector to update
- * @offset:    offset to first data sector
- * @sectors:   number of data sectors
  *
  * Description: Used to trim the integrity vector in a cloned bio.
- * The ivec will be advanced corresponding to 'offset' data sectors
- * and the length will be truncated corresponding to 'len' data
- * sectors.
  */
-void bio_integrity_trim(struct bio *bio, unsigned int offset,
-                       unsigned int sectors)
+void bio_integrity_trim(struct bio *bio)
 {
        struct bio_integrity_payload *bip = bio_integrity(bio);
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-       bio_integrity_advance(bio, offset << 9);
-       bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
+       bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
 }
 EXPORT_SYMBOL(bio_integrity_trim);
 
index 1cfcd0df3f30f7635d2ca5ac27e356196d56646f..9a63597aaaccd226dc03b9ab3595c693d5a918d7 100644 (file)
@@ -243,9 +243,6 @@ fallback:
 void bio_uninit(struct bio *bio)
 {
        bio_disassociate_task(bio);
-
-       if (bio_integrity(bio))
-               bio_integrity_free(bio);
 }
 EXPORT_SYMBOL(bio_uninit);
 
@@ -1813,6 +1810,8 @@ void bio_endio(struct bio *bio)
 again:
        if (!bio_remaining_done(bio))
                return;
+       if (!bio_integrity_endio(bio))
+               return;
 
        /*
         * Need to have a real endio function for chained bios, otherwise
@@ -1834,6 +1833,8 @@ again:
        }
 
        blk_throtl_bio_endio(bio);
+       /* release cgroup info */
+       bio_uninit(bio);
        if (bio->bi_end_io)
                bio->bi_end_io(bio);
 }
@@ -1868,7 +1869,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
        split->bi_iter.bi_size = sectors << 9;
 
        if (bio_integrity(split))
-               bio_integrity_trim(split, 0, sectors);
+               bio_integrity_trim(split);
 
        bio_advance(bio, split->bi_iter.bi_size);
 
@@ -1900,6 +1901,10 @@ void bio_trim(struct bio *bio, int offset, int size)
        bio_advance(bio, offset << 9);
 
        bio->bi_iter.bi_size = size;
+
+       if (bio_integrity(bio))
+               bio_integrity_trim(bio);
+
 }
 EXPORT_SYMBOL_GPL(bio_trim);
 
index af393d5a96807c6c59ce45a031c14042b72b5e6a..970b9c9638c55f4852019f452f64d2a4c66c1a9c 100644 (file)
@@ -1787,11 +1787,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 
        blk_queue_split(q, &bio);
 
-       if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio->bi_status = BLK_STS_IOERR;
-               bio_endio(bio);
+       if (!bio_integrity_prep(bio))
                return BLK_QC_T_NONE;
-       }
 
        if (op_is_flush(bio->bi_opf)) {
                spin_lock_irq(q->queue_lock);
index e8caecd71688e00a4974b5d45b95bc53e4eaeac5..3fe0aec90597294a15e04b1f89144631b976ab94 100644 (file)
@@ -261,6 +261,19 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
        return 0;
 }
 
+/*
+ * Convert a number of 512B sectors to a number of pages.
+ * The result is limited to a number of pages that can fit into a BIO.
+ * Also make sure that the result is always at least 1 (page) for the cases
+ * where nr_sects is lower than the number of sectors in a page.
+ */
+static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
+{
+       sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
+
+       return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
+}
+
 /**
  * __blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:      blockdev to issue
@@ -307,18 +320,18 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 
        ret = 0;
        while (nr_sects != 0) {
-               bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
-                               gfp_mask);
+               bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+                              gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_bdev   = bdev;
                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
                while (nr_sects != 0) {
-                       sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
-                       bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+                       sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
+                       bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
                        nr_sects -= bi_size >> 9;
                        sector += bi_size >> 9;
-                       if (bi_size < (sz << 9))
+                       if (bi_size < sz)
                                break;
                }
                cond_resched();
index 7f0dc48ffb40895a499208474536e300f2efab34..4ab69435708c2b04df3ba3bda332fdfe74a5aac5 100644 (file)
@@ -515,10 +515,12 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
        }
 
        /*
-        * Default to 256, since we don't split into sync/async like the
-        * old code did. Additionally, this is a per-hw queue depth.
+        * Default to double of smaller one between hw queue_depth and 128,
+        * since we don't split into sync/async like the old code did.
+        * Additionally, this is a per-hw queue depth.
         */
-       q->nr_requests = 2 * BLKDEV_MAX_RQ;
+       q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
+                                  BLKDEV_MAX_RQ);
 
        queue_for_each_hw_ctx(q, hctx, i) {
                ret = blk_mq_sched_alloc_tags(q, hctx, i);
index 6cef42f419a57d43ece64983915f3aefce7fefcf..041f7b7fa0d6def444e9349b6cf748afc8e89b2d 100644 (file)
@@ -1547,10 +1547,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
        blk_queue_split(q, &bio);
 
-       if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio_io_error(bio);
+       if (!bio_integrity_prep(bio))
                return BLK_QC_T_NONE;
-       }
 
        if (!is_flush_fua && !blk_queue_nomerges(q) &&
            blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
index 01ebb8185f6bfac8eb827ebdaee3a9d6845660dc..3a3d715bd7253b82522b30fcde33161f909dcf0b 100644 (file)
@@ -81,10 +81,21 @@ static inline void blk_queue_enter_live(struct request_queue *q)
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
+bool __bio_integrity_endio(struct bio *);
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+       if (bio_integrity(bio))
+               return __bio_integrity_endio(bio);
+       return true;
+}
 #else
 static inline void blk_flush_integrity(void)
 {
 }
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+       return true;
+}
 #endif
 
 void blk_timeout_work(struct work_struct *work);
index 3416dadf7b15b3c66cd71d04d9a954d7d0238657..a98db384048fa0f1e497fc740faebaabce79c955 100644 (file)
@@ -28,9 +28,6 @@
 
 typedef __be16 (csum_fn) (void *, unsigned int);
 
-static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
-static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
-
 static __be16 t10_pi_crc_fn(void *data, unsigned int len)
 {
        return cpu_to_be16(crc_t10dif(data, len));
@@ -82,7 +79,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
                switch (type) {
                case 1:
                case 2:
-                       if (pi->app_tag == APP_ESCAPE)
+                       if (pi->app_tag == T10_PI_APP_ESCAPE)
                                goto next;
 
                        if (be32_to_cpu(pi->ref_tag) !=
@@ -95,8 +92,8 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
                        }
                        break;
                case 3:
-                       if (pi->app_tag == APP_ESCAPE &&
-                           pi->ref_tag == REF_ESCAPE)
+                       if (pi->app_tag == T10_PI_APP_ESCAPE &&
+                           pi->ref_tag == T10_PI_REF_ESCAPE)
                                goto next;
                        break;
                }
index 02a611993bb4b8d3673b2d92b4ebee512ca434ce..678af946be30c9b9a0861e335a32952e07aa8410 100644 (file)
@@ -1944,6 +1944,13 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
        return;
 }
 
+static void cciss_initialize_rq(struct request *rq)
+{
+       struct scsi_request *sreq = blk_mq_rq_to_pdu(rq);
+
+       scsi_req_init(sreq);
+}
+
 /*
  * cciss_add_disk sets up the block device queue for a logical drive
  */
@@ -1956,6 +1963,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 
        disk->queue->cmd_size = sizeof(struct scsi_request);
        disk->queue->request_fn = do_cciss_request;
+       disk->queue->initialize_rq_fn = cciss_initialize_rq;
        disk->queue->queue_lock = &h->lock;
        queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
        if (blk_init_allocated_queue(disk->queue) < 0)
index 61b046f256ca79e6ac70e4319f217aa6948c9822..4a3cfc7940dec5002be9d5c8d624ae08870caa64 100644 (file)
@@ -174,7 +174,6 @@ static void mtip_init_cmd_header(struct request *rq)
 {
        struct driver_data *dd = rq->q->queuedata;
        struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-       u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
 
        /* Point the command headers at the command tables. */
        cmd->command_header = dd->port->command_list +
@@ -182,7 +181,7 @@ static void mtip_init_cmd_header(struct request *rq)
        cmd->command_header_dma = dd->port->command_list_dma +
                                (sizeof(struct mtip_cmd_hdr) * rq->tag);
 
-       if (host_cap_64)
+       if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
                cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
 
        cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
@@ -386,6 +385,7 @@ static void mtip_init_port(struct mtip_port *port)
                         port->mmio + PORT_LST_ADDR_HI);
                writel((port->rxfis_dma >> 16) >> 16,
                         port->mmio + PORT_FIS_ADDR_HI);
+               set_bit(MTIP_PF_HOST_CAP_64, &port->flags);
        }
 
        writel(port->command_list_dma & 0xFFFFFFFF,
@@ -950,7 +950,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
        unsigned long to;
        bool active = true;
 
-       blk_mq_stop_hw_queues(port->dd->queue);
+       blk_mq_quiesce_queue(port->dd->queue);
 
        to = jiffies + msecs_to_jiffies(timeout);
        do {
@@ -970,10 +970,10 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
                        break;
        } while (time_before(jiffies, to));
 
-       blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+       blk_mq_unquiesce_queue(port->dd->queue);
        return active ? -EBUSY : 0;
 err_fault:
-       blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+       blk_mq_unquiesce_queue(port->dd->queue);
        return -EFAULT;
 }
 
@@ -2737,6 +2737,9 @@ static void mtip_abort_cmd(struct request *req, void *data,
        struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
        struct driver_data *dd = data;
 
+       if (!blk_mq_request_started(req))
+               return;
+
        dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
        clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2749,6 +2752,9 @@ static void mtip_queue_cmd(struct request *req, void *data,
 {
        struct driver_data *dd = data;
 
+       if (!blk_mq_request_started(req))
+               return;
+
        set_bit(req->tag, dd->port->cmds_to_issue);
        blk_abort_request(req);
 }
@@ -2814,6 +2820,8 @@ restart_eh:
                                dev_warn(&dd->pdev->dev,
                                        "Completion workers still active!");
 
+                       blk_mq_quiesce_queue(dd->queue);
+
                        spin_lock(dd->queue->queue_lock);
                        blk_mq_tagset_busy_iter(&dd->tags,
                                                        mtip_queue_cmd, dd);
@@ -2826,6 +2834,8 @@ restart_eh:
                                                        mtip_abort_cmd, dd);
 
                        clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
+
+                       blk_mq_unquiesce_queue(dd->queue);
                }
 
                if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
@@ -3995,8 +4005,9 @@ static int mtip_block_remove(struct driver_data *dd)
                                                dd->disk->disk_name);
 
        blk_freeze_queue_start(dd->queue);
-       blk_mq_stop_hw_queues(dd->queue);
+       blk_mq_quiesce_queue(dd->queue);
        blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
+       blk_mq_unquiesce_queue(dd->queue);
 
        /*
         * Delete our gendisk structure. This also removes the device
index e8286af50e16b6b4c561e279dce2b3ed748d2a9a..e20e55dab4436730104d2991594a4f4c2f9e7ded 100644 (file)
@@ -140,6 +140,7 @@ enum {
                                (1 << MTIP_PF_SE_ACTIVE_BIT) |
                                (1 << MTIP_PF_DM_ACTIVE_BIT) |
                                (1 << MTIP_PF_TO_ACTIVE_BIT)),
+       MTIP_PF_HOST_CAP_64         = 10, /* cache HOST_CAP_64 */
 
        MTIP_PF_SVC_THD_ACTIVE_BIT  = 4,
        MTIP_PF_ISSUE_CMDS_BIT      = 5,
index 977ec960dd2f974b0c09db746671a7dbd0130652..dea7d85134ee6eaa04803f1a0ce4878756e1461a 100644 (file)
@@ -661,9 +661,9 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 
 static void nbd_clear_que(struct nbd_device *nbd)
 {
-       blk_mq_stop_hw_queues(nbd->disk->queue);
+       blk_mq_quiesce_queue(nbd->disk->queue);
        blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
-       blk_mq_start_hw_queues(nbd->disk->queue);
+       blk_mq_unquiesce_queue(nbd->disk->queue);
        dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
 }
 
index 71f4422eba8152bf4cbb3dbfa50bd9cd3291ac5b..85c24cace9732fa62b67afd81c827437a04adfb9 100644 (file)
@@ -844,9 +844,6 @@ static int __init null_init(void)
                queue_mode = NULL_Q_MQ;
        }
 
-       if (queue_mode == NULL_Q_MQ && shared_tags)
-               null_init_tag_set(&tag_set);
-
        if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
                if (submit_queues < nr_online_nodes) {
                        pr_warn("null_blk: submit_queues param is set to %u.",
@@ -858,11 +855,19 @@ static int __init null_init(void)
        else if (!submit_queues)
                submit_queues = 1;
 
+       if (queue_mode == NULL_Q_MQ && shared_tags) {
+               ret = null_init_tag_set(&tag_set);
+               if (ret)
+                       return ret;
+       }
+
        mutex_init(&lock);
 
        null_major = register_blkdev(0, "nullb");
-       if (null_major < 0)
-               return null_major;
+       if (null_major < 0) {
+               ret = null_major;
+               goto err_tagset;
+       }
 
        if (use_lightnvm) {
                ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
@@ -891,6 +896,9 @@ err_dev:
        kmem_cache_destroy(ppa_cache);
 err_ppa:
        unregister_blkdev(null_major, "nullb");
+err_tagset:
+       if (queue_mode == NULL_Q_MQ && shared_tags)
+               blk_mq_free_tag_set(&tag_set);
        return ret;
 }
 
index 0297ad7c1452a0985963ce5655869ed061809b09..4e02aa5fdac053dfa254fb05aac1b2c252e4c1be 100644 (file)
@@ -840,7 +840,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
        /* Make sure no work handler is accessing the device. */
        flush_work(&vblk->config_work);
 
-       blk_mq_stop_hw_queues(vblk->disk->queue);
+       blk_mq_quiesce_queue(vblk->disk->queue);
 
        vdev->config->del_vqs(vdev);
        return 0;
@@ -857,7 +857,7 @@ static int virtblk_restore(struct virtio_device *vdev)
 
        virtio_device_ready(vdev);
 
-       blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+       blk_mq_unquiesce_queue(vblk->disk->queue);
        return 0;
 }
 #endif
index 11fe0c5b2a9cf1e3ddf9bd5dedd0549c7a0f9bd1..81501644fb158be8731773379714cd6ebfbca6fc 100644 (file)
@@ -1670,13 +1670,10 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
        queue_work(wq, &line_ws->ws);
 }
 
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
-                 unsigned long *lun_bitmap)
+static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
+                            int nr_ppas, int pos)
 {
-       struct nvm_tgt_dev *dev = pblk->dev;
-       struct nvm_geo *geo = &dev->geo;
-       struct pblk_lun *rlun;
-       int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+       struct pblk_lun *rlun = &pblk->luns[pos];
        int ret;
 
        /*
@@ -1690,14 +1687,8 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
                WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
                                ppa_list[0].g.ch != ppa_list[i].g.ch);
 #endif
-       /* If the LUN has been locked for this same request, do no attempt to
-        * lock it again
-        */
-       if (test_and_set_bit(pos, lun_bitmap))
-               return;
 
-       rlun = &pblk->luns[pos];
-       ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+       ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
        if (ret) {
                switch (ret) {
                case -ETIME:
@@ -1710,6 +1701,50 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
        }
 }
 
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+       __pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+                 unsigned long *lun_bitmap)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+       /* If the LUN has been locked for this same request, do no attempt to
+        * lock it again
+        */
+       if (test_and_set_bit(pos, lun_bitmap))
+               return;
+
+       __pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_lun *rlun;
+       int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+#ifdef CONFIG_NVM_DEBUG
+       int i;
+
+       for (i = 1; i < nr_ppas; i++)
+               WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+                               ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+
+       rlun = &pblk->luns[pos];
+       up(&rlun->wr_sem);
+}
+
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
                unsigned long *lun_bitmap)
 {
index 0e48d3e4e143773fb937b2d3713c5e0c34adf799..cb556e06673e7465bf856cab8e3329867e676382 100644 (file)
@@ -340,9 +340,14 @@ static void pblk_end_io_recov(struct nvm_rq *rqd)
        struct pblk *pblk = pad_rq->pblk;
        struct nvm_tgt_dev *dev = pblk->dev;
 
-       kref_put(&pad_rq->ref, pblk_recov_complete);
+       pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+       bio_put(rqd->bio);
        nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
        pblk_free_rqd(pblk, rqd, WRITE);
+
+       atomic_dec(&pblk->inflight_io);
+       kref_put(&pad_rq->ref, pblk_recov_complete);
 }
 
 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
@@ -385,7 +390,7 @@ next_pad_rq:
        rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
        if (rq_ppas < pblk->min_write_pgs) {
                pr_err("pblk: corrupted pad line %d\n", line->id);
-               goto free_rq;
+               goto fail_free_pad;
        }
 
        rq_len = rq_ppas * geo->sec_size;
@@ -393,7 +398,7 @@ next_pad_rq:
        meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
        if (!meta_list) {
                ret = -ENOMEM;
-               goto free_data;
+               goto fail_free_pad;
        }
 
        ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
@@ -404,9 +409,9 @@ next_pad_rq:
                ret = PTR_ERR(rqd);
                goto fail_free_meta;
        }
-       memset(rqd, 0, pblk_w_rq_size);
 
-       bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+       bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+                                               PBLK_VMALLOC_META, GFP_KERNEL);
        if (IS_ERR(bio)) {
                ret = PTR_ERR(bio);
                goto fail_free_rqd;
@@ -453,15 +458,15 @@ next_pad_rq:
        }
 
        kref_get(&pad_rq->ref);
+       pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
        ret = pblk_submit_io(pblk, rqd);
        if (ret) {
                pr_err("pblk: I/O submission failed: %d\n", ret);
-               goto free_data;
+               pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+               goto fail_free_bio;
        }
 
-       atomic_dec(&pblk->inflight_io);
-
        left_line_ppas -= rq_ppas;
        left_ppas -= rq_ppas;
        if (left_ppas && left_line_ppas)
@@ -475,17 +480,23 @@ next_pad_rq:
                ret = -ETIME;
        }
 
+       if (!pblk_line_is_full(line))
+               pr_err("pblk: corrupted padded line: %d\n", line->id);
+
+       vfree(data);
 free_rq:
        kfree(pad_rq);
-free_data:
-       vfree(data);
        return ret;
 
+fail_free_bio:
+       bio_put(bio);
 fail_free_rqd:
        pblk_free_rqd(pblk, rqd, WRITE);
 fail_free_meta:
        nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+fail_free_pad:
        kfree(pad_rq);
+       vfree(data);
        return ret;
 }
 
index d62a8f4faaf433189710b01ce1b3227205a6f266..3ad9e56d2473412ae3ead5b2946066643ebd541c 100644 (file)
@@ -39,9 +39,7 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
        ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
 
-       if (rqd->meta_list)
-               nvm_dev_dma_free(dev->parent, rqd->meta_list,
-                                                       rqd->dma_meta_list);
+       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 
        bio_put(rqd->bio);
        pblk_free_rqd(pblk, rqd, WRITE);
@@ -178,15 +176,12 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 {
        struct pblk *pblk = rqd->private;
        struct nvm_tgt_dev *dev = pblk->dev;
-       struct nvm_geo *geo = &dev->geo;
        struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
        struct pblk_line *line = m_ctx->private;
        struct pblk_emeta *emeta = line->emeta;
-       int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
-       struct pblk_lun *rlun = &pblk->luns[pos];
        int sync;
 
-       up(&rlun->wr_sem);
+       pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
        if (rqd->error) {
                pblk_log_write_err(pblk, rqd);
@@ -203,6 +198,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
                                                                pblk->close_wq);
 
        bio_put(rqd->bio);
+       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
        pblk_free_rqd(pblk, rqd, READ);
 
        atomic_dec(&pblk->inflight_io);
@@ -226,9 +222,6 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
        if (!rqd->meta_list)
                return -ENOMEM;
 
-       if (unlikely(nr_secs == 1))
-               return 0;
-
        rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
        rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
@@ -367,7 +360,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
        struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_emeta *emeta = meta_line->emeta;
        struct pblk_g_ctx *m_ctx;
-       struct pblk_lun *rlun;
        struct bio *bio;
        struct nvm_rq *rqd;
        void *data;
@@ -411,13 +403,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
                        rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
        }
 
-       rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
-       ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
-       if (ret) {
-               pr_err("pblk: lun semaphore timed out (%d)\n", ret);
-               goto fail_free_bio;
-       }
-
        emeta->mem += rq_len;
        if (emeta->mem >= lm->emeta_len[0]) {
                spin_lock(&l_mg->close_lock);
@@ -427,6 +412,8 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
                spin_unlock(&l_mg->close_lock);
        }
 
+       pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
        ret = pblk_submit_io(pblk, rqd);
        if (ret) {
                pr_err("pblk: emeta I/O submission failed: %d\n", ret);
@@ -436,10 +423,13 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
        return NVM_IO_OK;
 
 fail_rollback:
+       pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
        spin_lock(&l_mg->close_lock);
        pblk_dealloc_page(pblk, meta_line, rq_ppas);
        list_add(&meta_line->list, &meta_line->list);
        spin_unlock(&l_mg->close_lock);
+
+       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 fail_free_bio:
        if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
                bio_put(bio);
index 15931381348c70c255f4d6d38ab00fe143dc8554..0c5692cc2f605861da62a93de7a61aafb3f09f75 100644 (file)
@@ -739,8 +739,10 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
                   unsigned long secs_to_flush);
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
                  unsigned long *lun_bitmap);
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
                unsigned long *lun_bitmap);
 void pblk_end_bio_sync(struct bio *bio);
index 10cabe961bdbe663c999293131833355a20456e0..2edbcc2d7d3f6274b689447859470d2773ea4e3b 100644 (file)
@@ -1279,7 +1279,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
        clone->bi_iter.bi_size = to_bytes(len);
 
        if (unlikely(bio_integrity(bio) != NULL))
-               bio_integrity_trim(clone, 0, len);
+               bio_integrity_trim(clone);
 
        return 0;
 }
index f12d23c49771ca95d8fdf31949ec5be2805986c6..345acca576b3c077b68e339437dd2e54ae76384a 100644 (file)
@@ -106,7 +106,8 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
 
                len -= cur_len;
                dev_offset += cur_len;
-               bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+               if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+                       return -EIO;
        }
 
        return err;
@@ -179,16 +180,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
        int err = 0, rw;
        bool do_acct;
 
-       /*
-        * bio_integrity_enabled also checks if the bio already has an
-        * integrity payload attached. If it does, we *don't* do a
-        * bio_integrity_prep here - the payload has been generated by
-        * another kernel subsystem, and we just pass it through.
-        */
-       if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio->bi_status = BLK_STS_IOERR;
-               goto out;
-       }
+       if (!bio_integrity_prep(bio))
+               return BLK_QC_T_NONE;
 
        bip = bio_integrity(bio);
        nsblk = q->queuedata;
@@ -212,7 +205,6 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
        if (do_acct)
                nd_iostat_end(bio, start);
 
- out:
        bio_endio(bio);
        return BLK_QC_T_NONE;
 }
index 64216dea5278479f3ada0935d68822d9626ba1af..14323faf8bd96fbeacc6a55dc6733553a53e1c68 100644 (file)
@@ -985,7 +985,8 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 
                len -= cur_len;
                meta_nsoff += cur_len;
-               bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+               if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+                       return -EIO;
        }
 
        return ret;
@@ -1203,16 +1204,8 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
        int err = 0;
        bool do_acct;
 
-       /*
-        * bio_integrity_enabled also checks if the bio already has an
-        * integrity payload attached. If it does, we *don't* do a
-        * bio_integrity_prep here - the payload has been generated by
-        * another kernel subsystem, and we just pass it through.
-        */
-       if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio->bi_status = BLK_STS_IOERR;
-               goto out;
-       }
+       if (!bio_integrity_prep(bio))
+               return BLK_QC_T_NONE;
 
        do_acct = nd_iostat_start(bio, &start);
        bio_for_each_segment(bvec, bio, iter) {
@@ -1239,7 +1232,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
        if (do_acct)
                nd_iostat_end(bio, start);
 
-out:
        bio_endio(bio);
        return BLK_QC_T_NONE;
 }
index d70df1d0072d434eb6addd99308a17a52e381782..cb96f4a7ae3a93b4f6736d6b375098a88e935fa3 100644 (file)
@@ -131,7 +131,7 @@ void nvme_complete_rq(struct request *req)
 {
        if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
                nvme_req(req)->retries++;
-               blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+               blk_mq_requeue_request(req, true);
                return;
        }
 
@@ -2591,12 +2591,29 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
        spin_unlock(&dev_list_lock);
 }
 
-void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
+       nvme_stop_keep_alive(ctrl);
        flush_work(&ctrl->async_event_work);
        flush_work(&ctrl->scan_work);
-       nvme_remove_namespaces(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
 
+void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+{
+       if (ctrl->kato)
+               nvme_start_keep_alive(ctrl);
+
+       if (ctrl->queue_count > 1) {
+               nvme_queue_scan(ctrl);
+               nvme_queue_async_events(ctrl);
+               nvme_start_queues(ctrl);
+       }
+}
+EXPORT_SYMBOL_GPL(nvme_start_ctrl);
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+{
        device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
 
        spin_lock(&dev_list_lock);
@@ -2694,9 +2711,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
        /* Forcibly unquiesce queues to avoid blocking dispatch */
        blk_mq_unquiesce_queue(ctrl->admin_q);
 
-       /* Forcibly start all queues to avoid having stuck requests */
-       blk_mq_start_hw_queues(ctrl->admin_q);
-
        list_for_each_entry(ns, &ctrl->namespaces, list) {
                /*
                 * Revalidating a dead namespace sets capacity to 0. This will
@@ -2709,16 +2723,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 
                /* Forcibly unquiesce queues to avoid blocking dispatch */
                blk_mq_unquiesce_queue(ns->queue);
-
-               /*
-                * Forcibly start all queues to avoid having stuck requests.
-                * Note that we must ensure the queues are not stopped
-                * when the final removal happens.
-                */
-               blk_mq_start_hw_queues(ns->queue);
-
-               /* draining requests in requeue list */
-               blk_mq_kick_requeue_list(ns->queue);
        }
        mutex_unlock(&ctrl->namespaces_mutex);
 }
@@ -2787,10 +2791,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
        struct nvme_ns *ns;
 
        mutex_lock(&ctrl->namespaces_mutex);
-       list_for_each_entry(ns, &ctrl->namespaces, list) {
+       list_for_each_entry(ns, &ctrl->namespaces, list)
                blk_mq_unquiesce_queue(ns->queue);
-               blk_mq_kick_requeue_list(ns->queue);
-       }
        mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
index ed87214fdc0e41fbdb1ad8a3e2b49248b7b29fb9..d666ada39a9be6d6f27fff36904af75063c035d6 100644 (file)
@@ -148,13 +148,10 @@ struct nvme_fc_ctrl {
        struct device           *dev;
        struct nvme_fc_lport    *lport;
        struct nvme_fc_rport    *rport;
-       u32                     queue_count;
        u32                     cnum;
 
        u64                     association_id;
 
-       u64                     cap;
-
        struct list_head        ctrl_list;      /* rport->ctrl_list */
 
        struct blk_mq_tag_set   admin_tag_set;
@@ -1614,7 +1611,7 @@ nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
 {
        int i;
 
-       for (i = 1; i < ctrl->queue_count; i++)
+       for (i = 1; i < ctrl->ctrl.queue_count; i++)
                nvme_fc_free_queue(&ctrl->queues[i]);
 }
 
@@ -1635,10 +1632,10 @@ __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
 static void
 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
 {
-       struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1];
+       struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1];
        int i;
 
-       for (i = ctrl->queue_count - 1; i >= 1; i--, queue--)
+       for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--)
                __nvme_fc_delete_hw_queue(ctrl, queue, i);
 }
 
@@ -1648,7 +1645,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
        struct nvme_fc_queue *queue = &ctrl->queues[1];
        int i, ret;
 
-       for (i = 1; i < ctrl->queue_count; i++, queue++) {
+       for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) {
                ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
                if (ret)
                        goto delete_queues;
@@ -1667,7 +1664,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 {
        int i, ret = 0;
 
-       for (i = 1; i < ctrl->queue_count; i++) {
+       for (i = 1; i < ctrl->ctrl.queue_count; i++) {
                ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize,
                                        (qsize / 5));
                if (ret)
@@ -1685,7 +1682,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
 {
        int i;
 
-       for (i = 1; i < ctrl->queue_count; i++)
+       for (i = 1; i < ctrl->ctrl.queue_count; i++)
                nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
 }
 
@@ -1706,6 +1703,7 @@ nvme_fc_ctrl_free(struct kref *ref)
        list_del(&ctrl->ctrl_list);
        spin_unlock_irqrestore(&ctrl->rport->lock, flags);
 
+       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
        blk_cleanup_queue(ctrl->ctrl.admin_q);
        blk_mq_free_tag_set(&ctrl->admin_tag_set);
 
@@ -1969,10 +1967,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                if (ret != -EBUSY)
                        return BLK_STS_IOERR;
 
-               if (op->rq) {
-                       blk_mq_stop_hw_queues(op->rq->q);
-                       blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
-               }
+               if (op->rq)
+                       blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
+
                return BLK_STS_RESOURCE;
        }
 
@@ -2178,17 +2175,20 @@ static int
 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 {
        struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       unsigned int nr_io_queues;
        int ret;
 
-       ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+       nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+                               ctrl->lport->ops->max_hw_queues);
+       ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
        if (ret) {
                dev_info(ctrl->ctrl.device,
                        "set_queue_count failed: %d\n", ret);
                return ret;
        }
 
-       ctrl->queue_count = opts->nr_io_queues + 1;
-       if (!opts->nr_io_queues)
+       ctrl->ctrl.queue_count = nr_io_queues + 1;
+       if (!nr_io_queues)
                return 0;
 
        nvme_fc_init_io_queues(ctrl);
@@ -2204,7 +2204,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
                                                sizeof(struct scatterlist)) +
                                        ctrl->lport->ops->fcprqst_priv_sz;
        ctrl->tag_set.driver_data = ctrl;
-       ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+       ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
        ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
        ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -2232,7 +2232,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 out_delete_hw_queues:
        nvme_fc_delete_hw_io_queues(ctrl);
 out_cleanup_blk_queue:
-       nvme_stop_keep_alive(&ctrl->ctrl);
        blk_cleanup_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
        blk_mq_free_tag_set(&ctrl->tag_set);
@@ -2248,17 +2247,21 @@ static int
 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 {
        struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       unsigned int nr_io_queues;
        int ret;
 
-       ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+       nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+                               ctrl->lport->ops->max_hw_queues);
+       ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
        if (ret) {
                dev_info(ctrl->ctrl.device,
                        "set_queue_count failed: %d\n", ret);
                return ret;
        }
 
+       ctrl->ctrl.queue_count = nr_io_queues + 1;
        /* check for io queues existing */
-       if (ctrl->queue_count == 1)
+       if (ctrl->ctrl.queue_count == 1)
                return 0;
 
        nvme_fc_init_io_queues(ctrl);
@@ -2275,6 +2278,8 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
        if (ret)
                goto out_delete_hw_queues;
 
+       blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+
        return 0;
 
 out_delete_hw_queues:
@@ -2316,7 +2321,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                goto out_delete_hw_queue;
 
        if (ctrl->ctrl.state != NVME_CTRL_NEW)
-               blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+               blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
        ret = nvmf_connect_admin_queue(&ctrl->ctrl);
        if (ret)
@@ -2329,7 +2334,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
         * prior connection values
         */
 
-       ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+       ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
        if (ret) {
                dev_err(ctrl->ctrl.device,
                        "prop_get NVME_REG_CAP failed\n");
@@ -2337,9 +2342,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        }
 
        ctrl->ctrl.sqsize =
-               min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+               min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
 
-       ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+       ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
        if (ret)
                goto out_disconnect_admin_queue;
 
@@ -2360,8 +2365,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                goto out_disconnect_admin_queue;
        }
 
-       nvme_start_keep_alive(&ctrl->ctrl);
-
        /* FC-NVME supports normal SGL Data Block Descriptors */
 
        if (opts->queue_size > ctrl->ctrl.maxcmd) {
@@ -2381,7 +2384,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
         * Create the io queues
         */
 
-       if (ctrl->queue_count > 1) {
+       if (ctrl->ctrl.queue_count > 1) {
                if (ctrl->ctrl.state == NVME_CTRL_NEW)
                        ret = nvme_fc_create_io_queues(ctrl);
                else
@@ -2395,17 +2398,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
        ctrl->ctrl.nr_reconnects = 0;
 
-       if (ctrl->queue_count > 1) {
-               nvme_start_queues(&ctrl->ctrl);
-               nvme_queue_scan(&ctrl->ctrl);
-               nvme_queue_async_events(&ctrl->ctrl);
-       }
+       nvme_start_ctrl(&ctrl->ctrl);
 
        return 0;       /* Success */
 
 out_term_aen_ops:
        nvme_fc_term_aen_ops(ctrl);
-       nvme_stop_keep_alive(&ctrl->ctrl);
 out_disconnect_admin_queue:
        /* send a Disconnect(association) LS to fc-nvme target */
        nvme_fc_xmt_disconnect_assoc(ctrl);
@@ -2428,8 +2426,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
        unsigned long flags;
 
-       nvme_stop_keep_alive(&ctrl->ctrl);
-
        spin_lock_irqsave(&ctrl->lock, flags);
        ctrl->flags |= FCCTRL_TERMIO;
        ctrl->iocnt = 0;
@@ -2447,7 +2443,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
         * io requests back to the block layer as part of normal completions
         * (but with error status).
         */
-       if (ctrl->queue_count > 1) {
+       if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -2470,7 +2466,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
         * use blk_mq_tagset_busy_itr() and the transport routine to
         * terminate the exchanges.
         */
-       blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_fc_terminate_exchange, &ctrl->ctrl);
 
@@ -2511,7 +2507,8 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
        cancel_work_sync(&ctrl->ctrl.reset_work);
        cancel_delayed_work_sync(&ctrl->connect_work);
-
+       nvme_stop_ctrl(&ctrl->ctrl);
+       nvme_remove_namespaces(&ctrl->ctrl);
        /*
         * kill the association on the link side.  this will block
         * waiting for io to terminate
@@ -2606,6 +2603,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
                container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
        int ret;
 
+       nvme_stop_ctrl(&ctrl->ctrl);
        /* will block will waiting for io to terminate */
        nvme_fc_delete_association(ctrl);
 
@@ -2702,18 +2700,17 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
        spin_lock_init(&ctrl->lock);
 
        /* io queue count */
-       ctrl->queue_count = min_t(unsigned int,
+       ctrl->ctrl.queue_count = min_t(unsigned int,
                                opts->nr_io_queues,
                                lport->ops->max_hw_queues);
-       opts->nr_io_queues = ctrl->queue_count; /* so opts has valid value */
-       ctrl->queue_count++;    /* +1 for admin queue */
+       ctrl->ctrl.queue_count++;       /* +1 for admin queue */
 
        ctrl->ctrl.sqsize = opts->queue_size - 1;
        ctrl->ctrl.kato = opts->kato;
 
        ret = -ENOMEM;
-       ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
-                               GFP_KERNEL);
+       ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
+                               sizeof(struct nvme_fc_queue), GFP_KERNEL);
        if (!ctrl->queues)
                goto out_free_ida;
 
index d70ff0fdd36bdf2cda89391e509df72af58e7d29..8f2a168ddc013d8bc1a10685e611fdc309429b6f 100644 (file)
@@ -142,7 +142,9 @@ struct nvme_ctrl {
        u16 cntlid;
 
        u32 ctrl_config;
+       u32 queue_count;
 
+       u64 cap;
        u32 page_size;
        u32 max_hw_sectors;
        u16 oncs;
@@ -278,6 +280,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
                const struct nvme_ctrl_ops *ops, unsigned long quirks);
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_start_ctrl(struct nvme_ctrl *ctrl);
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
 void nvme_put_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_identify(struct nvme_ctrl *ctrl);
 
index b7a84c523475739ece377c884bb8da3c22b91e4d..d10d2f279d19ad5b5924ffb16a70762f4548da42 100644 (file)
@@ -35,7 +35,6 @@
 
 #include "nvme.h"
 
-#define NVME_Q_DEPTH           1024
 #define SQ_SIZE(depth)         (depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)         (depth * sizeof(struct nvme_completion))
 
@@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444);
 MODULE_PARM_DESC(max_host_mem_size_mb,
        "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+       .set = io_queue_depth_set,
+       .get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
 struct nvme_dev;
 struct nvme_queue;
 
@@ -74,7 +83,6 @@ struct nvme_dev {
        struct device *dev;
        struct dma_pool *prp_page_pool;
        struct dma_pool *prp_small_pool;
-       unsigned queue_count;
        unsigned online_queues;
        unsigned max_qid;
        int q_depth;
@@ -105,6 +113,17 @@ struct nvme_dev {
        void **host_mem_desc_bufs;
 };
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+       int n = 0, ret;
+
+       ret = kstrtoint(val, 10, &n);
+       if (ret != 0 || n < 2)
+               return -EINVAL;
+
+       return param_set_int(val, kp);
+}
+
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
 {
        return qid * 2 * stride;
@@ -1099,9 +1118,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 {
        int i;
 
-       for (i = dev->queue_count - 1; i >= lowest; i--) {
+       for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
                struct nvme_queue *nvmeq = dev->queues[i];
-               dev->queue_count--;
+               dev->ctrl.queue_count--;
                dev->queues[i] = NULL;
                nvme_free_queue(nvmeq);
        }
@@ -1126,7 +1145,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
        spin_unlock_irq(&nvmeq->q_lock);
 
        if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
-               blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+               blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
 
        pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
 
@@ -1145,8 +1164,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
        if (shutdown)
                nvme_shutdown_ctrl(&dev->ctrl);
        else
-               nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
-                                               dev->bar + NVME_REG_CAP));
+               nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 
        spin_lock_irq(&nvmeq->q_lock);
        nvme_process_cq(nvmeq);
@@ -1221,7 +1239,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
        nvmeq->qid = qid;
        nvmeq->cq_vector = -1;
        dev->queues[qid] = nvmeq;
-       dev->queue_count++;
+       dev->ctrl.queue_count++;
 
        return nvmeq;
 
@@ -1317,7 +1335,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
                 * user requests may be waiting on a stopped queue. Start the
                 * queue to flush these to completion.
                 */
-               blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+               blk_mq_unquiesce_queue(dev->ctrl.admin_q);
                blk_cleanup_queue(dev->ctrl.admin_q);
                blk_mq_free_tag_set(&dev->admin_tagset);
        }
@@ -1354,7 +1372,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
                        return -ENODEV;
                }
        } else
-               blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+               blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 
        return 0;
 }
@@ -1385,11 +1403,10 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
        return 0;
 }
 
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 {
        int result;
        u32 aqa;
-       u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
        struct nvme_queue *nvmeq;
 
        result = nvme_remap_bar(dev, db_bar_size(dev, 0));
@@ -1397,13 +1414,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
                return result;
 
        dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
-                                               NVME_CAP_NSSRC(cap) : 0;
+                               NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
 
        if (dev->subsystem &&
            (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
                writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
 
-       result = nvme_disable_ctrl(&dev->ctrl, cap);
+       result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
        if (result < 0)
                return result;
 
@@ -1422,7 +1439,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
        lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
 
-       result = nvme_enable_ctrl(&dev->ctrl, cap);
+       result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
        if (result)
                return result;
 
@@ -1441,7 +1458,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
        unsigned i, max;
        int ret = 0;
 
-       for (i = dev->queue_count; i <= dev->max_qid; i++) {
+       for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
                /* vector == qid - 1, match nvme_create_queue */
                if (!nvme_alloc_queue(dev, i, dev->q_depth,
                     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
@@ -1450,7 +1467,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
                }
        }
 
-       max = min(dev->max_qid, dev->queue_count - 1);
+       max = min(dev->max_qid, dev->ctrl.queue_count - 1);
        for (i = dev->online_queues; i <= max; i++) {
                ret = nvme_create_queue(dev->queues[i], i);
                if (ret)
@@ -1585,9 +1602,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
 static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
 {
        struct nvme_host_mem_buf_desc *descs;
-       u32 chunk_size, max_entries, i = 0;
+       u32 chunk_size, max_entries;
+       int i = 0;
        void **bufs;
-       u64 size, tmp;
+       u64 size = 0, tmp;
 
        /* start big and work our way down */
        chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
@@ -1866,7 +1884,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 
 static int nvme_pci_enable(struct nvme_dev *dev)
 {
-       u64 cap;
        int result = -ENOMEM;
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -1893,10 +1910,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
        if (result < 0)
                return result;
 
-       cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+       dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
-       dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
-       dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+       dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+                               io_queue_depth);
+       dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
        dev->dbs = dev->bar + 4096;
 
        /*
@@ -1908,6 +1926,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
                dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
                        "set queue depth=%u to work around controller resets\n",
                        dev->q_depth);
+       } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+                  (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+                  NVME_CAP_MQES(dev->ctrl.cap) == 0) {
+               dev->q_depth = 64;
+               dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+                        "set queue depth=%u\n", dev->q_depth);
        }
 
        /*
@@ -1996,7 +2020,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        nvme_stop_queues(&dev->ctrl);
 
        queues = dev->online_queues - 1;
-       for (i = dev->queue_count - 1; i > 0; i--)
+       for (i = dev->ctrl.queue_count - 1; i > 0; i--)
                nvme_suspend_queue(dev->queues[i]);
 
        if (dead) {
@@ -2004,7 +2028,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
                 * probe, before the admin queue is configured. Thus,
                 * queue_count can be 0 here.
                 */
-               if (dev->queue_count)
+               if (dev->ctrl.queue_count)
                        nvme_suspend_queue(dev->queues[0]);
        } else {
                nvme_disable_io_queues(dev, queues);
@@ -2094,7 +2118,7 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
-       result = nvme_configure_admin_queue(dev);
+       result = nvme_pci_configure_admin_queue(dev);
        if (result)
                goto out;
 
@@ -2132,15 +2156,6 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
-       /*
-        * A controller that can not execute IO typically requires user
-        * intervention to correct. For such degraded controllers, the driver
-        * should not submit commands the user did not request, so skip
-        * registering for asynchronous event notification on this condition.
-        */
-       if (dev->online_queues > 1)
-               nvme_queue_async_events(&dev->ctrl);
-
        /*
         * Keep the controller around but remove all namespaces if we don't have
         * any working I/O queue.
@@ -2161,8 +2176,7 @@ static void nvme_reset_work(struct work_struct *work)
                goto out;
        }
 
-       if (dev->online_queues > 1)
-               nvme_queue_scan(&dev->ctrl);
+       nvme_start_ctrl(&dev->ctrl);
        return;
 
  out:
@@ -2341,11 +2355,13 @@ static void nvme_remove(struct pci_dev *pdev)
        }
 
        flush_work(&dev->ctrl.reset_work);
-       nvme_uninit_ctrl(&dev->ctrl);
+       nvme_stop_ctrl(&dev->ctrl);
+       nvme_remove_namespaces(&dev->ctrl);
        nvme_dev_disable(dev, true);
        nvme_free_host_mem(dev);
        nvme_dev_remove_admin(dev);
        nvme_free_queues(dev, 0);
+       nvme_uninit_ctrl(&dev->ctrl);
        nvme_release_prp_pools(dev);
        nvme_dev_unmap(dev);
        nvme_put_ctrl(&dev->ctrl);
@@ -2458,6 +2474,10 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x1c5f, 0x0540),   /* Memblaze Pblaze4 adapter */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+       { PCI_DEVICE(0x144d, 0xa821),   /* Samsung PM1725 */
+               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+       { PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
+               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
index 6d4119dfbdaacf5df18c41bf92d0711c2e074bbf..da04df1af231758cb4965735c417a215d736ec94 100644 (file)
@@ -86,7 +86,7 @@ enum nvme_rdma_queue_flags {
 
 struct nvme_rdma_queue {
        struct nvme_rdma_qe     *rsp_ring;
-       u8                      sig_count;
+       atomic_t                sig_count;
        int                     queue_size;
        size_t                  cmnd_capsule_len;
        struct nvme_rdma_ctrl   *ctrl;
@@ -103,7 +103,6 @@ struct nvme_rdma_queue {
 struct nvme_rdma_ctrl {
        /* read only in the hot path */
        struct nvme_rdma_queue  *queues;
-       u32                     queue_count;
 
        /* other member variables */
        struct blk_mq_tag_set   tag_set;
@@ -119,7 +118,6 @@ struct nvme_rdma_ctrl {
        struct blk_mq_tag_set   admin_tag_set;
        struct nvme_rdma_device *device;
 
-       u64                     cap;
        u32                     max_fr_pages;
 
        struct sockaddr_storage addr;
@@ -274,9 +272,6 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
        struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
        int ret = 0;
 
-       if (!req->mr->need_inval)
-               goto out;
-
        ib_dereg_mr(req->mr);
 
        req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
@@ -349,7 +344,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
        struct nvme_rdma_ctrl *ctrl = data;
        struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-       BUG_ON(hctx_idx >= ctrl->queue_count);
+       BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
        hctx->driver_data = queue;
        return 0;
@@ -525,6 +520,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
                queue->cmnd_capsule_len = sizeof(struct nvme_command);
 
        queue->queue_size = queue_size;
+       atomic_set(&queue->sig_count, 0);
 
        queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
                        RDMA_PS_TCP, IB_QPT_RC);
@@ -587,7 +583,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
        int i;
 
-       for (i = 1; i < ctrl->queue_count; i++)
+       for (i = 1; i < ctrl->ctrl.queue_count; i++)
                nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
 }
 
@@ -595,7 +591,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
        int i, ret = 0;
 
-       for (i = 1; i < ctrl->queue_count; i++) {
+       for (i = 1; i < ctrl->ctrl.queue_count; i++) {
                ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
                if (ret) {
                        dev_info(ctrl->ctrl.device,
@@ -623,14 +619,14 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
        if (ret)
                return ret;
 
-       ctrl->queue_count = nr_io_queues + 1;
-       if (ctrl->queue_count < 2)
+       ctrl->ctrl.queue_count = nr_io_queues + 1;
+       if (ctrl->ctrl.queue_count < 2)
                return 0;
 
        dev_info(ctrl->ctrl.device,
                "creating %d I/O queues.\n", nr_io_queues);
 
-       for (i = 1; i < ctrl->queue_count; i++) {
+       for (i = 1; i < ctrl->ctrl.queue_count; i++) {
                ret = nvme_rdma_init_queue(ctrl, i,
                                           ctrl->ctrl.opts->queue_size);
                if (ret) {
@@ -705,7 +701,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
        ++ctrl->ctrl.nr_reconnects;
 
-       if (ctrl->queue_count > 1) {
+       if (ctrl->ctrl.queue_count > 1) {
                nvme_rdma_free_io_queues(ctrl);
 
                ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -729,13 +725,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
        set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-       ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+       ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
        if (ret)
                goto requeue;
 
-       nvme_start_keep_alive(&ctrl->ctrl);
-
-       if (ctrl->queue_count > 1) {
+       if (ctrl->ctrl.queue_count > 1) {
                ret = nvme_rdma_init_io_queues(ctrl);
                if (ret)
                        goto requeue;
@@ -743,16 +737,16 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
                ret = nvme_rdma_connect_io_queues(ctrl);
                if (ret)
                        goto requeue;
+
+               blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+                               ctrl->ctrl.queue_count - 1);
        }
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
        WARN_ON_ONCE(!changed);
        ctrl->ctrl.nr_reconnects = 0;
 
-       if (ctrl->queue_count > 1) {
-               nvme_queue_scan(&ctrl->ctrl);
-               nvme_queue_async_events(&ctrl->ctrl);
-       }
+       nvme_start_ctrl(&ctrl->ctrl);
 
        dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
 
@@ -770,17 +764,17 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
                        struct nvme_rdma_ctrl, err_work);
        int i;
 
-       nvme_stop_keep_alive(&ctrl->ctrl);
+       nvme_stop_ctrl(&ctrl->ctrl);
 
-       for (i = 0; i < ctrl->queue_count; i++)
+       for (i = 0; i < ctrl->ctrl.queue_count; i++)
                clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
 
-       if (ctrl->queue_count > 1)
+       if (ctrl->ctrl.queue_count > 1)
                nvme_stop_queues(&ctrl->ctrl);
-       blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 
        /* We must take care of fastfail/requeue all our inflight requests */
-       if (ctrl->queue_count > 1)
+       if (ctrl->ctrl.queue_count > 1)
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
@@ -790,7 +784,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
         * queues are not a live anymore, so restart the queues to fail fast
         * new IO
         */
-       blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
        nvme_start_queues(&ctrl->ctrl);
 
        nvme_rdma_reconnect_or_remove(ctrl);
@@ -1008,17 +1002,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
                nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
-static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+/*
+ * We want to signal completion at least every queue depth/2.  This returns the
+ * largest power of two that is not above half of (queue size + 1) to optimize
+ * (avoid divisions).
+ */
+static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
 {
-       int sig_limit;
+       int limit = 1 << ilog2((queue->queue_size + 1) / 2);
 
-       /*
-        * We signal completion every queue depth/2 and also handle the
-        * degenerated case of a  device with queue_depth=1, where we
-        * would need to signal every message.
-        */
-       sig_limit = max(queue->queue_size / 2, 1);
-       return (++queue->sig_count % sig_limit) == 0;
+       return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
@@ -1574,7 +1567,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
        set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-       error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+       error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP,
+                       &ctrl->ctrl.cap);
        if (error) {
                dev_err(ctrl->ctrl.device,
                        "prop_get NVME_REG_CAP failed\n");
@@ -1582,9 +1576,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
        }
 
        ctrl->ctrl.sqsize =
-               min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+               min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-       error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+       error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
        if (error)
                goto out_cleanup_queue;
 
@@ -1601,8 +1595,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
        if (error)
                goto out_cleanup_queue;
 
-       nvme_start_keep_alive(&ctrl->ctrl);
-
        return 0;
 
 out_cleanup_queue:
@@ -1620,11 +1612,10 @@ out_free_queue:
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 {
-       nvme_stop_keep_alive(&ctrl->ctrl);
        cancel_work_sync(&ctrl->err_work);
        cancel_delayed_work_sync(&ctrl->reconnect_work);
 
-       if (ctrl->queue_count > 1) {
+       if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
@@ -1634,18 +1625,21 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
        if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
                nvme_shutdown_ctrl(&ctrl->ctrl);
 
-       blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
+       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
        nvme_rdma_destroy_admin_queue(ctrl);
 }
 
 static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-       nvme_uninit_ctrl(&ctrl->ctrl);
+       nvme_stop_ctrl(&ctrl->ctrl);
+       nvme_remove_namespaces(&ctrl->ctrl);
        if (shutdown)
                nvme_rdma_shutdown_ctrl(ctrl);
 
+       nvme_uninit_ctrl(&ctrl->ctrl);
        if (ctrl->ctrl.tagset) {
                blk_cleanup_queue(ctrl->ctrl.connect_q);
                blk_mq_free_tag_set(&ctrl->tag_set);
@@ -1707,6 +1701,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
        int ret;
        bool changed;
 
+       nvme_stop_ctrl(&ctrl->ctrl);
        nvme_rdma_shutdown_ctrl(ctrl);
 
        ret = nvme_rdma_configure_admin_queue(ctrl);
@@ -1716,7 +1711,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
                goto del_dead_ctrl;
        }
 
-       if (ctrl->queue_count > 1) {
+       if (ctrl->ctrl.queue_count > 1) {
                ret = blk_mq_reinit_tagset(&ctrl->tag_set);
                if (ret)
                        goto del_dead_ctrl;
@@ -1728,16 +1723,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
                ret = nvme_rdma_connect_io_queues(ctrl);
                if (ret)
                        goto del_dead_ctrl;
+
+               blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+                               ctrl->ctrl.queue_count - 1);
        }
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
        WARN_ON_ONCE(!changed);
 
-       if (ctrl->queue_count > 1) {
-               nvme_start_queues(&ctrl->ctrl);
-               nvme_queue_scan(&ctrl->ctrl);
-               nvme_queue_async_events(&ctrl->ctrl);
-       }
+       nvme_start_ctrl(&ctrl->ctrl);
 
        return;
 
@@ -1785,7 +1779,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
        ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
                SG_CHUNK_SIZE * sizeof(struct scatterlist);
        ctrl->tag_set.driver_data = ctrl;
-       ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+       ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
        ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
        ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -1863,12 +1857,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
        INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
-       ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+       ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
        ctrl->ctrl.sqsize = opts->queue_size - 1;
        ctrl->ctrl.kato = opts->kato;
 
        ret = -ENOMEM;
-       ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues),
+       ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
                                GFP_KERNEL);
        if (!ctrl->queues)
                goto out_uninit_ctrl;
@@ -1925,15 +1919,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
        mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-       if (opts->nr_io_queues) {
-               nvme_queue_scan(&ctrl->ctrl);
-               nvme_queue_async_events(&ctrl->ctrl);
-       }
+       nvme_start_ctrl(&ctrl->ctrl);
 
        return &ctrl->ctrl;
 
 out_remove_admin_queue:
-       nvme_stop_keep_alive(&ctrl->ctrl);
        nvme_rdma_destroy_admin_queue(ctrl);
 out_kfree_queues:
        kfree(ctrl->queues);
index 7692a96c9065b22149734eccc7d767e44ece7f36..1e6dcc241b3cfbc4cfd5e62d0668509ab636bb34 100644 (file)
@@ -1164,18 +1164,24 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 
        memset(acc, 0, sizeof(*acc));
 
-       if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst))
+       /*
+        * FC-NVME spec changes. There are initiators sending different
+        * lengths as padding sizes for Create Association Cmd descriptor
+        * was incorrect.
+        * Accept anything of "minimum" length. Assume format per 1.15
+        * spec (with HOSTID reduced to 16 bytes), ignore how long the
+        * trailing pad length is.
+        */
+       if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN)
                ret = VERR_CR_ASSOC_LEN;
-       else if (rqst->desc_list_len !=
-                       fcnvme_lsdesc_len(
-                               sizeof(struct fcnvme_ls_cr_assoc_rqst)))
+       else if (rqst->desc_list_len <
+                       cpu_to_be32(FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN))
                ret = VERR_CR_ASSOC_RQST_LEN;
        else if (rqst->assoc_cmd.desc_tag !=
                        cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD))
                ret = VERR_CR_ASSOC_CMD;
-       else if (rqst->assoc_cmd.desc_len !=
-                       fcnvme_lsdesc_len(
-                               sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)))
+       else if (rqst->assoc_cmd.desc_len <
+                       cpu_to_be32(FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN))
                ret = VERR_CR_ASSOC_CMD_LEN;
        else if (!rqst->assoc_cmd.ersp_ratio ||
                 (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >=
index 40128793e61350f59c2bb136e91efcb6e83dc649..3b4d47a6abdb8337419f3e8e054c9a00e1147091 100644 (file)
@@ -85,7 +85,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
                        bio_set_op_attrs(bio, op, op_flags);
 
                        bio_chain(bio, prev);
-                       cookie = submit_bio(prev);
+                       submit_bio(prev);
                }
 
                sector += sg->length >> 9;
index 5f55c683b338c7f47245e727f1d4abbf744ad991..717ed7ddb2f6d59fdd79d5e21565184f3acdf5ec 100644 (file)
@@ -44,12 +44,10 @@ struct nvme_loop_iod {
 
 struct nvme_loop_ctrl {
        struct nvme_loop_queue  *queues;
-       u32                     queue_count;
 
        struct blk_mq_tag_set   admin_tag_set;
 
        struct list_head        list;
-       u64                     cap;
        struct blk_mq_tag_set   tag_set;
        struct nvme_loop_iod    async_event_iod;
        struct nvme_ctrl        ctrl;
@@ -241,7 +239,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
        struct nvme_loop_ctrl *ctrl = data;
        struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-       BUG_ON(hctx_idx >= ctrl->queue_count);
+       BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
        hctx->driver_data = queue;
        return 0;
@@ -307,7 +305,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
 {
        int i;
 
-       for (i = 1; i < ctrl->queue_count; i++)
+       for (i = 1; i < ctrl->ctrl.queue_count; i++)
                nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
 }
 
@@ -330,7 +328,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
                if (ret)
                        goto out_destroy_queues;
 
-               ctrl->queue_count++;
+               ctrl->ctrl.queue_count++;
        }
 
        return 0;
@@ -344,7 +342,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
 {
        int i, ret;
 
-       for (i = 1; i < ctrl->queue_count; i++) {
+       for (i = 1; i < ctrl->ctrl.queue_count; i++) {
                ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
                if (ret)
                        return ret;
@@ -372,7 +370,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
        error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
        if (error)
                return error;
-       ctrl->queue_count = 1;
+       ctrl->ctrl.queue_count = 1;
 
        error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
        if (error)
@@ -388,7 +386,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
        if (error)
                goto out_cleanup_queue;
 
-       error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+       error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
        if (error) {
                dev_err(ctrl->ctrl.device,
                        "prop_get NVME_REG_CAP failed\n");
@@ -396,9 +394,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
        }
 
        ctrl->ctrl.sqsize =
-               min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+               min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-       error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+       error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
        if (error)
                goto out_cleanup_queue;
 
@@ -409,8 +407,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
        if (error)
                goto out_cleanup_queue;
 
-       nvme_start_keep_alive(&ctrl->ctrl);
-
        return 0;
 
 out_cleanup_queue:
@@ -424,9 +420,7 @@ out_free_sq:
 
 static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
-       nvme_stop_keep_alive(&ctrl->ctrl);
-
-       if (ctrl->queue_count > 1) {
+       if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
@@ -436,9 +430,10 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
        if (ctrl->ctrl.state == NVME_CTRL_LIVE)
                nvme_shutdown_ctrl(&ctrl->ctrl);
 
-       blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
+       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
        nvme_loop_destroy_admin_queue(ctrl);
 }
 
@@ -447,8 +442,10 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
        struct nvme_loop_ctrl *ctrl = container_of(work,
                                struct nvme_loop_ctrl, delete_work);
 
-       nvme_uninit_ctrl(&ctrl->ctrl);
+       nvme_stop_ctrl(&ctrl->ctrl);
+       nvme_remove_namespaces(&ctrl->ctrl);
        nvme_loop_shutdown_ctrl(ctrl);
+       nvme_uninit_ctrl(&ctrl->ctrl);
        nvme_put_ctrl(&ctrl->ctrl);
 }
 
@@ -496,6 +493,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
        bool changed;
        int ret;
 
+       nvme_stop_ctrl(&ctrl->ctrl);
        nvme_loop_shutdown_ctrl(ctrl);
 
        ret = nvme_loop_configure_admin_queue(ctrl);
@@ -510,13 +508,13 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
        if (ret)
                goto out_destroy_io;
 
+       blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+                       ctrl->ctrl.queue_count - 1);
+
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
        WARN_ON_ONCE(!changed);
 
-       nvme_queue_scan(&ctrl->ctrl);
-       nvme_queue_async_events(&ctrl->ctrl);
-
-       nvme_start_queues(&ctrl->ctrl);
+       nvme_start_ctrl(&ctrl->ctrl);
 
        return;
 
@@ -559,7 +557,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
        ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
                SG_CHUNK_SIZE * sizeof(struct scatterlist);
        ctrl->tag_set.driver_data = ctrl;
-       ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+       ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
        ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
        ctrl->ctrl.tagset = &ctrl->tag_set;
 
@@ -651,10 +649,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
        list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
        mutex_unlock(&nvme_loop_ctrl_mutex);
 
-       if (opts->nr_io_queues) {
-               nvme_queue_scan(&ctrl->ctrl);
-               nvme_queue_async_events(&ctrl->ctrl);
-       }
+       nvme_start_ctrl(&ctrl->ctrl);
 
        return &ctrl->ctrl;
 
index cfe1d01eb73f052fedeca8186af0491beb384dba..adc7845390611f8ec03b85e43a4a264ea0d3643d 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <asm/unaligned.h>
+#include <linux/t10-pi.h>
 #include <linux/crc-t10dif.h>
 #include <net/checksum.h>
 
@@ -2934,8 +2935,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
                                 * First check to see if a protection data
                                 * check is valid
                                 */
-                               if ((src->ref_tag == 0xffffffff) ||
-                                   (src->app_tag == 0xffff)) {
+                               if ((src->ref_tag == T10_PI_REF_ESCAPE) ||
+                                   (src->app_tag == T10_PI_APP_ESCAPE)) {
                                        start_ref_tag++;
                                        goto skipit;
                                }
index 6c6e624a5aa6b52525da4dd43aee1f65f8139782..7b3b702ef6222cab385bb90d346f9eae679b1a95 100644 (file)
@@ -2040,9 +2040,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
         * For type     3: ref & app tag is all 'f's
         * For type 0,1,2: app tag is all 'f's
         */
-       if ((a_app_tag == 0xffff) &&
+       if ((a_app_tag == T10_PI_APP_ESCAPE) &&
            ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
-            (a_ref_tag == 0xffffffff))) {
+            (a_ref_tag == T10_PI_REF_ESCAPE))) {
                uint32_t blocks_done, resid;
                sector_t lba_s = scsi_get_lba(cmd);
 
@@ -2084,9 +2084,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
                        spt = page_address(sg_page(sg)) + sg->offset;
                        spt += j;
 
-                       spt->app_tag = 0xffff;
+                       spt->app_tag = T10_PI_APP_ESCAPE;
                        if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
-                               spt->ref_tag = 0xffffffff;
+                               spt->ref_tag = T10_PI_REF_ESCAPE;
                }
 
                return 0;
index 4316f7b65fb7691fa38e81ce7ed4d4d62b9520df..dc9456e7dac985534ea72c4f8a7d9883ada53644 100644 (file)
@@ -1450,7 +1450,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
                                 (unsigned long long)sector, sdt->guard_tag,
                                 sdt->app_tag, be32_to_cpu(sdt->ref_tag));
 
-                       if (sdt->app_tag == cpu_to_be16(0xffff)) {
+                       if (sdt->app_tag == T10_PI_APP_ESCAPE) {
                                dsg_off += block_size;
                                goto next;
                        }
index 664a27da276d64d3d84375c0baaa983d4c546cd5..7b1cf4ba090291af59e904dc1c59d8196ab88d3d 100644 (file)
@@ -165,10 +165,27 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 {
        iter->bi_sector += bytes >> 9;
 
-       if (bio_no_advance_iter(bio))
+       if (bio_no_advance_iter(bio)) {
                iter->bi_size -= bytes;
-       else
+               iter->bi_done += bytes;
+       } else {
                bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+               /* TODO: It is reasonable to complete bio with error here. */
+       }
+}
+
+static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
+               unsigned int bytes)
+{
+       iter->bi_sector -= bytes >> 9;
+
+       if (bio_no_advance_iter(bio)) {
+               iter->bi_size += bytes;
+               iter->bi_done -= bytes;
+               return true;
+       }
+
+       return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)                  \
@@ -303,8 +320,6 @@ struct bio_integrity_payload {
 
        struct bvec_iter        bip_iter;
 
-       bio_end_io_t            *bip_end_io;    /* saved I/O completion fn */
-
        unsigned short          bip_slab;       /* slab the bip came from */
        unsigned short          bip_vcnt;       /* # of integrity bio_vecs */
        unsigned short          bip_max_vcnt;   /* integrity bio_vec slots */
@@ -722,13 +737,10 @@ struct biovec_slab {
                bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
 
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_prep(struct bio *);
-extern void bio_integrity_endio(struct bio *);
+extern bool bio_integrity_prep(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
+extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 extern int bioset_integrity_create(struct bio_set *, int);
 extern void bioset_integrity_free(struct bio_set *);
@@ -741,11 +753,6 @@ static inline void *bio_integrity(struct bio *bio)
        return NULL;
 }
 
-static inline bool bio_integrity_enabled(struct bio *bio)
-{
-       return false;
-}
-
 static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
        return 0;
@@ -756,14 +763,9 @@ static inline void bioset_integrity_free (struct bio_set *bs)
        return;
 }
 
-static inline int bio_integrity_prep(struct bio *bio)
-{
-       return 0;
-}
-
-static inline void bio_integrity_free(struct bio *bio)
+static inline bool bio_integrity_prep(struct bio *bio)
 {
-       return;
+       return true;
 }
 
 static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
@@ -778,8 +780,7 @@ static inline void bio_integrity_advance(struct bio *bio,
        return;
 }
 
-static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
-                                     unsigned int sectors)
+static inline void bio_integrity_trim(struct bio *bio)
 {
        return;
 }
index 89b65b82d98f5c5e77c34f967e856dcc6028dabe..ec8a4d7af6bda55586bc0b0221b3dcfe6e5ab487 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/errno.h>
 
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
@@ -39,6 +40,8 @@ struct bvec_iter {
 
        unsigned int            bi_idx;         /* current index into bvl_vec */
 
+       unsigned int            bi_done;        /* number of bytes completed */
+
        unsigned int            bi_bvec_done;   /* number of bytes completed in
                                                   current bvec */
 };
@@ -66,12 +69,14 @@ struct bvec_iter {
        .bv_offset      = bvec_iter_offset((bvec), (iter)),     \
 })
 
-static inline void bvec_iter_advance(const struct bio_vec *bv,
-                                    struct bvec_iter *iter,
-                                    unsigned bytes)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+               struct bvec_iter *iter, unsigned bytes)
 {
-       WARN_ONCE(bytes > iter->bi_size,
-                 "Attempted to advance past end of bvec iter\n");
+       if (WARN_ONCE(bytes > iter->bi_size,
+                    "Attempted to advance past end of bvec iter\n")) {
+               iter->bi_size = 0;
+               return false;
+       }
 
        while (bytes) {
                unsigned iter_len = bvec_iter_len(bv, *iter);
@@ -80,12 +85,38 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
                bytes -= len;
                iter->bi_size -= len;
                iter->bi_bvec_done += len;
+               iter->bi_done += len;
 
                if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
                        iter->bi_bvec_done = 0;
                        iter->bi_idx++;
                }
        }
+       return true;
+}
+
+static inline bool bvec_iter_rewind(const struct bio_vec *bv,
+                                    struct bvec_iter *iter,
+                                    unsigned int bytes)
+{
+       while (bytes) {
+               unsigned len = min(bytes, iter->bi_bvec_done);
+
+               if (iter->bi_bvec_done == 0) {
+                       if (WARN_ONCE(iter->bi_idx == 0,
+                                     "Attempted to rewind iter beyond "
+                                     "bvec's boundaries\n")) {
+                               return false;
+                       }
+                       iter->bi_idx--;
+                       iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len;
+                       continue;
+               }
+               bytes -= len;
+               iter->bi_size += len;
+               iter->bi_bvec_done -= len;
+       }
+       return true;
 }
 
 #define for_each_bvec(bvl, bio_vec, iter, start)                       \
index bc711a10be05c2d8354a9ba2eff382bc992314b1..21c37e39e41a2829c77ebac20afcd8a38102ac57 100644 (file)
@@ -17,6 +17,7 @@
 
 /*
  * This file contains definitions relative to FC-NVME r1.14 (16-020vB).
+ * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content.
  */
 
 #ifndef _NVME_FC_H
@@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
        uuid_t  hostid;
        u8      hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
        u8      subnqn[FCNVME_ASSOC_SUBNQN_LEN];
-       u8      rsvd632[384];
+       __be32  rsvd584[108];           /* pad to 1016 bytes,
+                                        * which makes overall LS rqst
+                                        * payload 1024 bytes
+                                        */
 };
 
+#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN      \
+               offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584)
+
+#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN \
+               (FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \
+                offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio))
+
+
+
 /* FCNVME_LSDESC_CREATE_CONN_CMD */
 struct fcnvme_lsdesc_cr_conn_cmd {
        __be32  desc_tag;               /* FCNVME_LSDESC_xxx */
@@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst {
        struct fcnvme_lsdesc_cr_assoc_cmd       assoc_cmd;
 };
 
+#define FCNVME_LSDESC_CRA_RQST_MINLEN  \
+               (offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \
+                       FCNVME_LSDESC_CRA_CMD_DESC_MINLEN)
+
+#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN     \
+               FCNVME_LSDESC_CRA_CMD_DESC_MINLEN
+
+
 struct fcnvme_ls_cr_assoc_acc {
        struct fcnvme_ls_acc_hdr                hdr;
        struct fcnvme_lsdesc_assoc_id           associd;
index 9375d23a24e7aba97504d743e77d3b78f4d72493..635a3c5706bdeb75378b22d93ce77ba811be0cf3 100644 (file)
@@ -33,6 +33,8 @@ struct t10_pi_tuple {
        __be32 ref_tag;         /* Target LBA or indirect LBA */
 };
 
+#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
+#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;