blk-mq: allow changing of queue depth through sysfs

author Jens Axboe <axboe@fb.com>

Tue, 20 May 2014 17:49:02 +0000 (11:49 -0600)

committer Jens Axboe <axboe@fb.com>

Tue, 20 May 2014 17:49:02 +0000 (11:49 -0600)
author Jens Axboe <axboe@fb.com>
Tue, 20 May 2014 17:49:02 +0000 (11:49 -0600)
committer Jens Axboe <axboe@fb.com>
Tue, 20 May 2014 17:49:02 +0000 (11:49 -0600)
diff --git a/block/blk-core.c b/block/blk-core.c

index a6bd3e702201b3cd8558c76d8ad672e740a39c07..fe81e19099a13dbb1a4876f61f437886e2e85232 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags)
                 __freed_request(rl, sync ^ 1);
  }
  
+int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
+{
+       struct request_list *rl;
+
+       spin_lock_irq(q->queue_lock);
+       q->nr_requests = nr;
+       blk_queue_congestion_threshold(q);
+
+       /* congestion isn't cgroup aware and follows root blkcg for now */
+       rl = &q->root_rl;
+
+       if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
+               blk_set_queue_congested(q, BLK_RW_SYNC);
+       else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
+               blk_clear_queue_congested(q, BLK_RW_SYNC);
+
+       if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
+               blk_set_queue_congested(q, BLK_RW_ASYNC);
+       else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
+               blk_clear_queue_congested(q, BLK_RW_ASYNC);
+
+       blk_queue_for_each_rl(rl, q) {
+               if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
+                       blk_set_rl_full(rl, BLK_RW_SYNC);
+               } else {
+                       blk_clear_rl_full(rl, BLK_RW_SYNC);
+                       wake_up(&rl->wait[BLK_RW_SYNC]);
+               }
+
+               if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
+                       blk_set_rl_full(rl, BLK_RW_ASYNC);
+               } else {
+                       blk_clear_rl_full(rl, BLK_RW_ASYNC);
+                       wake_up(&rl->wait[BLK_RW_ASYNC]);
+               }
+       }
+
+       spin_unlock_irq(q->queue_lock);
+       return 0;
+}
+
  /*
   * Determine if elevator data should be initialized when allocating the
   * request associated with @bio.
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index e6b3fbae9862a79dc77053bfd094e6f5fa94f3fa..f6dea968b710bee95c56d0e06b8ddfc49cb0e330 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
  }
  
  /*
- * If a previously busy queue goes inactive, potential waiters could now
- * be allowed to queue. Wake them up and check.
+ * Wakeup all potentially sleeping on normal (non-reserved) tags
   */
-void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
  {
-       struct blk_mq_tags *tags = hctx->tags;
         struct blk_mq_bitmap_tags *bt;
         int i, wake_index;
  
-       if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-               return;
-
-       atomic_dec(&tags->active_queues);
-
-       /*
-        * Will only throttle depth on non-reserved tags
-        */
         bt = &tags->bitmap_tags;
         wake_index = bt->wake_index;
         for (i = 0; i < BT_WAIT_QUEUES; i++) {
@@ -86,6 +76,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
         }
  }
  
+/*
+ * If a previously busy queue goes inactive, potential waiters could now
+ * be allowed to queue. Wake them up and check.
+ */
+void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+       struct blk_mq_tags *tags = hctx->tags;
+
+       if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+               return;
+
+       atomic_dec(&tags->active_queues);
+
+       blk_mq_tag_wakeup_all(tags);
+}
+
  /*
   * For shared tag users, we track the number of currently active users
   * and attempt to provide a fair share of the tag depth for each of them.
@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
         return bt->depth - used;
  }
  
+static void bt_update_count(struct blk_mq_bitmap_tags *bt,
+                           unsigned int depth)
+{
+       unsigned int tags_per_word = 1U << bt->bits_per_word;
+       unsigned int map_depth = depth;
+
+       if (depth) {
+               int i;
+
+               for (i = 0; i < bt->map_nr; i++) {
+                       bt->map[i].depth = min(map_depth, tags_per_word);
+                       map_depth -= bt->map[i].depth;
+               }
+       }
+
+       bt->wake_cnt = BT_WAIT_BATCH;
+       if (bt->wake_cnt > depth / 4)
+               bt->wake_cnt = max(1U, depth / 4);
+
+       bt->depth = depth;
+}
+
  static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
                         int node, bool reserved)
  {
@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
          * condition.
          */
         if (depth) {
-               unsigned int nr, i, map_depth, tags_per_word;
+               unsigned int nr, tags_per_word;
  
                 tags_per_word = (1 << bt->bits_per_word);
  
@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
                         return -ENOMEM;
  
                 bt->map_nr = nr;
-               map_depth = depth;
-               for (i = 0; i < nr; i++) {
-                       bt->map[i].depth = min(map_depth, tags_per_word);
-                       map_depth -= tags_per_word;
-               }
         }
  
         bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
         for (i = 0; i < BT_WAIT_QUEUES; i++)
                 init_waitqueue_head(&bt->bs[i].wait);
  
-       bt->wake_cnt = BT_WAIT_BATCH;
-       if (bt->wake_cnt > depth / 4)
-               bt->wake_cnt = max(1U, depth / 4);
-
-       bt->depth = depth;
+       bt_update_count(bt, depth);
         return 0;
  }
  
@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
         *tag = prandom_u32() % depth;
  }
  
+int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
+{
+       tdepth -= tags->nr_reserved_tags;
+       if (tdepth > tags->nr_tags)
+               return -EINVAL;
+
+       /*
+        * Don't need (or can't) update reserved tags here, they remain
+        * static and should never need resizing.
+        */
+       bt_update_count(&tags->bitmap_tags, tdepth);
+       blk_mq_tag_wakeup_all(tags);
+       return 0;
+}
+
  ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
  {
         char *orig_page = page;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h

index e144f68ec45faaa47a84a9e4d182e5fa167b4d94..e7ff5ceeeb9778c952db3bd7deadb9383e717562 100644 (file)
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data
  extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
  extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
  extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
+extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
  
  enum {
         BLK_MQ_TAG_CACHE_MIN    = 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 0fbef7e9bef1e9599364250feae7d9d3076f42d9..7b71ab1b1536f809b4ab884cf7df7315943c62c9 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
  }
  EXPORT_SYMBOL(blk_mq_free_tag_set);
  
+int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
+{
+       struct blk_mq_tag_set *set = q->tag_set;
+       struct blk_mq_hw_ctx *hctx;
+       int i, ret;
+
+       if (!set || nr > set->queue_depth)
+               return -EINVAL;
+
+       ret = 0;
+       queue_for_each_hw_ctx(q, hctx, i) {
+               ret = blk_mq_tag_update_depth(hctx->tags, nr);
+               if (ret)
+                       break;
+       }
+
+       if (!ret)
+               q->nr_requests = nr;
+
+       return ret;
+}
+
  void blk_mq_disable_hotplug(void)
  {
         mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h

index 5e5a378962b7346fa019c37ef6ccbef9ac78b4ad..7db4fe4bd0021434246912ef9cdcbd1020e15e5d 100644 (file)
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q);
  void blk_mq_free_queue(struct request_queue *q);
  void blk_mq_clone_flush_request(struct request *flush_rq,
                 struct request *orig_rq);
+int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
  
  /*
   * CPU hotplug helpers
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 7500f876dae40e0b124b90adab21c60c1e3676b6..4d6811ac13fddc9bfeba6e81afaef923ee6b4295 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
  static ssize_t
  queue_requests_store(struct request_queue *q, const char *page, size_t count)
  {
-       struct request_list *rl;
         unsigned long nr;
-       int ret;
+       int ret, err;
  
-       if (!q->request_fn)
+       if (!q->request_fn && !q->mq_ops)
                 return -EINVAL;
  
         ret = queue_var_store(&nr, page, count);
@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
         if (nr < BLKDEV_MIN_RQ)
                 nr = BLKDEV_MIN_RQ;
  
-       spin_lock_irq(q->queue_lock);
-       q->nr_requests = nr;
-       blk_queue_congestion_threshold(q);
-
-       /* congestion isn't cgroup aware and follows root blkcg for now */
-       rl = &q->root_rl;
-
-       if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
-               blk_set_queue_congested(q, BLK_RW_SYNC);
-       else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
-               blk_clear_queue_congested(q, BLK_RW_SYNC);
-
-       if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
-               blk_set_queue_congested(q, BLK_RW_ASYNC);
-       else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
-               blk_clear_queue_congested(q, BLK_RW_ASYNC);
-
-       blk_queue_for_each_rl(rl, q) {
-               if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
-                       blk_set_rl_full(rl, BLK_RW_SYNC);
-               } else {
-                       blk_clear_rl_full(rl, BLK_RW_SYNC);
-                       wake_up(&rl->wait[BLK_RW_SYNC]);
-               }
-
-               if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
-                       blk_set_rl_full(rl, BLK_RW_ASYNC);
-               } else {
-                       blk_clear_rl_full(rl, BLK_RW_ASYNC);
-                       wake_up(&rl->wait[BLK_RW_ASYNC]);
-               }
-       }
+       if (q->request_fn)
+               err = blk_update_nr_requests(q, nr);
+       else
+               err = blk_mq_update_nr_requests(q, nr);
+
+       if (err)
+               return err;
  
-       spin_unlock_irq(q->queue_lock);
         return ret;
  }
  
diff --git a/block/blk.h b/block/blk.h

index 95cab70000e3bae1802fccbe353bca9c485c8f95..45385e9abf6f8e926e58d3fc8535b2133d6230e9 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
         return q->nr_congestion_off;
  }
  
+extern int blk_update_nr_requests(struct request_queue *, unsigned int);
+
  /*
   * Contribute to IO statistics IFF:
   *
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index a06ca7b5ea05c06033a94cf4eca284f401be6f0b..f45424453338ea4952c8f9e6a8461622cedb011c 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -63,7 +63,7 @@ struct blk_mq_hw_ctx {
  struct blk_mq_tag_set {
         struct blk_mq_ops       *ops;
         unsigned int            nr_hw_queues;
-       unsigned int            queue_depth;
+       unsigned int            queue_depth;    /* max hw supported */
         unsigned int            reserved_tags;
         unsigned int            cmd_size;       /* per-request extra data */
         int                     numa_node;
author	Jens Axboe <axboe@fb.com>
	Tue, 20 May 2014 17:49:02 +0000 (11:49 -0600)
committer	Jens Axboe <axboe@fb.com>
	Tue, 20 May 2014 17:49:02 +0000 (11:49 -0600)
block/blk-core.c		patch \| blob \| blame \| history
block/blk-mq-tag.c		patch \| blob \| blame \| history
block/blk-mq-tag.h		patch \| blob \| blame \| history
block/blk-mq.c		patch \| blob \| blame \| history
block/blk-mq.h		patch \| blob \| blame \| history
block/blk-sysfs.c		patch \| blob \| blame \| history
block/blk.h		patch \| blob \| blame \| history
include/linux/blk-mq.h		patch \| blob \| blame \| history