]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
blk-mq: dequeue request one by one from sw queue if hctx is busy
authorMing Lei <ming.lei@redhat.com>
Wed, 27 Nov 2019 20:18:16 +0000 (17:18 -0300)
committerMarcelo Henrique Cerri <marcelo.cerri@canonical.com>
Fri, 17 Jan 2020 17:23:13 +0000 (14:23 -0300)
BugLink: https://bugs.launchpad.net/bugs/1848739
It won't be efficient to dequeue request one by one from sw queue,
but we have to do that when queue is busy for better merge performance.

This patch takes the Exponential Weighted Moving Average(EWMA) to figure
out if queue is busy, then only dequeue request one by one from sw queue
when queue is busy.

Fixes: b347689ffbca ("blk-mq-sched: improve dispatching from sw queue")
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Reported-by: Kashyap Desai <kashyap.desai@broadcom.com>
Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit 6e768717304bdbe8d2897ca8298f6b58863fdc41)
Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
block/blk-mq-debugfs.c
block/blk-mq-sched.c
block/blk-mq.c
include/linux/blk-mq.h

index 54bd8c31b822d25e35c8807903bef9dffbf1277b..ead271fb641e5ab37f46f571a6ad7d780aa4b131 100644 (file)
@@ -607,6 +607,14 @@ static int hctx_active_show(void *data, struct seq_file *m)
        return 0;
 }
 
        return 0;
 }
 
+static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
+{
+       struct blk_mq_hw_ctx *hctx = data;
+
+       seq_printf(m, "%u\n", hctx->dispatch_busy);
+       return 0;
+}
+
 static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
        __acquires(&ctx->lock)
 {
 static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
        __acquires(&ctx->lock)
 {
@@ -776,6 +784,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
        {"queued", 0600, hctx_queued_show, hctx_queued_write},
        {"run", 0600, hctx_run_show, hctx_run_write},
        {"active", 0400, hctx_active_show},
        {"queued", 0600, hctx_queued_show, hctx_queued_write},
        {"run", 0600, hctx_run_show, hctx_run_write},
        {"active", 0400, hctx_active_show},
+       {"dispatch_busy", 0400, hctx_dispatch_busy_show},
        {},
 };
 
        {},
 };
 
index f3380331e5f3b697bebc1c3e957ee0fa32ee77ed..1518c794a78cd1194b129a6d14942df3be6f5265 100644 (file)
@@ -220,15 +220,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
                }
        } else if (has_sched_dispatch) {
                blk_mq_do_dispatch_sched(hctx);
                }
        } else if (has_sched_dispatch) {
                blk_mq_do_dispatch_sched(hctx);
-       } else if (q->mq_ops->get_budget) {
-               /*
-                * If we need to get budget before queuing request, we
-                * dequeue request one by one from sw queue for avoiding
-                * to mess up I/O merge when dispatch runs out of resource.
-                *
-                * TODO: get more budgets, and dequeue more requests in
-                * one time.
-                */
+       } else if (hctx->dispatch_busy) {
+               /* dequeue request one by one from sw queue if queue is busy */
                blk_mq_do_dispatch_ctx(hctx);
        } else {
                blk_mq_flush_busy_ctxs(hctx, &rq_list);
                blk_mq_do_dispatch_ctx(hctx);
        } else {
                blk_mq_flush_busy_ctxs(hctx, &rq_list);
index a522caa005a796c185571ca12b267ef591c83c7b..7cc7d79e6350e576ebf3e4ec6e9e5a494ed3e0e0 100644 (file)
@@ -1121,6 +1121,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
        }
 }
 
        }
 }
 
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+       unsigned int ewma;
+
+       if (hctx->queue->elevator)
+               return;
+
+       ewma = hctx->dispatch_busy;
+
+       if (!ewma && !busy)
+               return;
+
+       ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+       if (busy)
+               ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+       ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+       hctx->dispatch_busy = ewma;
+}
+
 #define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
 
 /*
 #define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
 
 /*
@@ -1257,8 +1286,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                else if (needs_restart && (ret == BLK_STS_RESOURCE))
                        blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
 
                else if (needs_restart && (ret == BLK_STS_RESOURCE))
                        blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
 
+               blk_mq_update_dispatch_busy(hctx, true);
                return false;
                return false;
-       }
+       } else
+               blk_mq_update_dispatch_busy(hctx, false);
 
        /*
         * If the host/device is unable to accept more work, inform the
 
        /*
         * If the host/device is unable to accept more work, inform the
index 95c9a5c862e2545b26922b3cbb2103200a29a888..f3188bf2acee63969211957aa3066021dc06ed60 100644 (file)
@@ -32,9 +32,10 @@ struct blk_mq_hw_ctx {
        struct sbitmap          ctx_map;
 
        struct blk_mq_ctx       *dispatch_from;
        struct sbitmap          ctx_map;
 
        struct blk_mq_ctx       *dispatch_from;
+       unsigned int            dispatch_busy;
 
 
-       struct blk_mq_ctx       **ctxs;
        unsigned int            nr_ctx;
        unsigned int            nr_ctx;
+       struct blk_mq_ctx       **ctxs;
 
        wait_queue_entry_t      dispatch_wait;
        atomic_t                wait_index;
 
        wait_queue_entry_t      dispatch_wait;
        atomic_t                wait_index;