blk-mq: dequeue request one by one from sw queue if hctx is busy

author Ming Lei <ming.lei@redhat.com>

Wed, 27 Nov 2019 20:18:16 +0000 (17:18 -0300)

committer Marcelo Henrique Cerri <marcelo.cerri@canonical.com>

Fri, 17 Jan 2020 17:23:13 +0000 (14:23 -0300)
author Ming Lei <ming.lei@redhat.com>
Wed, 27 Nov 2019 20:18:16 +0000 (17:18 -0300)
committer Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Fri, 17 Jan 2020 17:23:13 +0000 (14:23 -0300)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c

index 54bd8c31b822d25e35c8807903bef9dffbf1277b..ead271fb641e5ab37f46f571a6ad7d780aa4b131 100644 (file)
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -607,6 +607,14 @@ static int hctx_active_show(void *data, struct seq_file *m)
         return 0;
  }
  
         return 0;
  }
  
+static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
+{
+       struct blk_mq_hw_ctx *hctx = data;
+
+       seq_printf(m, "%u\n", hctx->dispatch_busy);
+       return 0;
+}
+
  static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
         __acquires(&ctx->lock)
  {
  static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
         __acquires(&ctx->lock)
  {
@@ -776,6 +784,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
         {"queued", 0600, hctx_queued_show, hctx_queued_write},
         {"run", 0600, hctx_run_show, hctx_run_write},
         {"active", 0400, hctx_active_show},
         {"queued", 0600, hctx_queued_show, hctx_queued_write},
         {"run", 0600, hctx_run_show, hctx_run_write},
         {"active", 0400, hctx_active_show},
+       {"dispatch_busy", 0400, hctx_dispatch_busy_show},
         {},
  };
  
         {},
  };
  
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index f3380331e5f3b697bebc1c3e957ee0fa32ee77ed..1518c794a78cd1194b129a6d14942df3be6f5265 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -220,15 +220,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
                 }
         } else if (has_sched_dispatch) {
                 blk_mq_do_dispatch_sched(hctx);
                 }
         } else if (has_sched_dispatch) {
                 blk_mq_do_dispatch_sched(hctx);
-       } else if (q->mq_ops->get_budget) {
-               /*
-                * If we need to get budget before queuing request, we
-                * dequeue request one by one from sw queue for avoiding
-                * to mess up I/O merge when dispatch runs out of resource.
-                *
-                * TODO: get more budgets, and dequeue more requests in
-                * one time.
-                */
+       } else if (hctx->dispatch_busy) {
+               /* dequeue request one by one from sw queue if queue is busy */
                 blk_mq_do_dispatch_ctx(hctx);
         } else {
                 blk_mq_flush_busy_ctxs(hctx, &rq_list);
                 blk_mq_do_dispatch_ctx(hctx);
         } else {
                 blk_mq_flush_busy_ctxs(hctx, &rq_list);
diff --git a/block/blk-mq.c b/block/blk-mq.c

index a522caa005a796c185571ca12b267ef591c83c7b..7cc7d79e6350e576ebf3e4ec6e9e5a494ed3e0e0 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1121,6 +1121,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
         }
  }
  
         }
  }
  
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+       unsigned int ewma;
+
+       if (hctx->queue->elevator)
+               return;
+
+       ewma = hctx->dispatch_busy;
+
+       if (!ewma && !busy)
+               return;
+
+       ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+       if (busy)
+               ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+       ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+       hctx->dispatch_busy = ewma;
+}
+
  #define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
  
  /*
  #define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
  
  /*
@@ -1257,8 +1286,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 else if (needs_restart && (ret == BLK_STS_RESOURCE))
                         blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
  
                 else if (needs_restart && (ret == BLK_STS_RESOURCE))
                         blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
  
+               blk_mq_update_dispatch_busy(hctx, true);
                 return false;
                 return false;
-       }
+       } else
+               blk_mq_update_dispatch_busy(hctx, false);
  
         /*
          * If the host/device is unable to accept more work, inform the
  
         /*
          * If the host/device is unable to accept more work, inform the
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index 95c9a5c862e2545b26922b3cbb2103200a29a888..f3188bf2acee63969211957aa3066021dc06ed60 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -32,9 +32,10 @@ struct blk_mq_hw_ctx {
         struct sbitmap          ctx_map;
  
         struct blk_mq_ctx       *dispatch_from;
         struct sbitmap          ctx_map;
  
         struct blk_mq_ctx       *dispatch_from;
+       unsigned int            dispatch_busy;
  
  
-       struct blk_mq_ctx       **ctxs;
         unsigned int            nr_ctx;
         unsigned int            nr_ctx;
+       struct blk_mq_ctx       **ctxs;
  
         wait_queue_entry_t      dispatch_wait;
         atomic_t                wait_index;
  
         wait_queue_entry_t      dispatch_wait;
         atomic_t                wait_index;
author	Ming Lei <ming.lei@redhat.com>
	Wed, 27 Nov 2019 20:18:16 +0000 (17:18 -0300)
committer	Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
	Fri, 17 Jan 2020 17:23:13 +0000 (14:23 -0300)
block/blk-mq-debugfs.c		patch \| blob \| blame \| history
block/blk-mq-sched.c		patch \| blob \| blame \| history
block/blk-mq.c		patch \| blob \| blame \| history
include/linux/blk-mq.h		patch \| blob \| blame \| history