]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - block/blk-mq.c
blk-mq: issue directly if hw queue isn't busy in case of 'none'
[mirror_ubuntu-bionic-kernel.git] / block / blk-mq.c
index 9e2650020281640e65058d309a802fcc5bd6d6f5..6a0ad28fa17638a63b90bfa0143d09b140201207 100644 (file)
@@ -1121,6 +1121,40 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
        }
 }
 
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+       unsigned int ewma;
+
+       if (hctx->queue->elevator)
+               return;
+
+       ewma = hctx->dispatch_busy;
+
+       if (!ewma && !busy)
+               return;
+
+       ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+       if (busy)
+               ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+       ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+       hctx->dispatch_busy = ewma;
+}
+
+#define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
+
+/*
+ * Returns true if we did some work AND can potentially do more.
+ */
 bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                             bool got_budget)
 {
@@ -1128,6 +1162,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
        struct request *rq, *nxt;
        bool no_tag = false;
        int errors, queued;
+       blk_status_t ret = BLK_STS_OK;
 
        if (list_empty(list))
                return false;
@@ -1140,7 +1175,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
        errors = queued = 0;
        do {
                struct blk_mq_queue_data bd;
-               blk_status_t ret;
 
                rq = list_first_entry(list, struct request, queuelist);
 
@@ -1184,7 +1218,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                }
 
                ret = q->mq_ops->queue_rq(hctx, &bd);
-               if (ret == BLK_STS_RESOURCE) {
+               if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
                        /*
                         * If an I/O scheduler has been configured and we got a
                         * driver tag for the next request already, free it
@@ -1215,6 +1249,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
         * that is where we will continue on next queue run.
         */
        if (!list_empty(list)) {
+               bool needs_restart;
+
                spin_lock(&hctx->lock);
                list_splice_init(list, &hctx->dispatch);
                spin_unlock(&hctx->lock);
@@ -1238,11 +1274,29 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 * - Some but not all block drivers stop a queue before
                 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
                 *   and dm-rq.
+                *
+                * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
+                * bit is set, run queue after a delay to avoid IO stalls
+                * that could otherwise occur if the queue is idle.
                 */
-               if (!blk_mq_sched_needs_restart(hctx) ||
+               needs_restart = blk_mq_sched_needs_restart(hctx);
+               if (!needs_restart ||
                    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
                        blk_mq_run_hw_queue(hctx, true);
-       }
+               else if (needs_restart && (ret == BLK_STS_RESOURCE))
+                       blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
+
+               blk_mq_update_dispatch_busy(hctx, true);
+               return false;
+       } else
+               blk_mq_update_dispatch_busy(hctx, false);
+
+       /*
+        * If the host/device is unable to accept more work, inform the
+        * caller of that.
+        */
+       if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+               return false;
 
        return (queued + errors) != 0;
 }
@@ -1719,12 +1773,16 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
        ret = q->mq_ops->queue_rq(hctx, &bd);
        switch (ret) {
        case BLK_STS_OK:
+               blk_mq_update_dispatch_busy(hctx, false);
                *cookie = new_cookie;
                break;
        case BLK_STS_RESOURCE:
+       case BLK_STS_DEV_RESOURCE:
+               blk_mq_update_dispatch_busy(hctx, true);
                __blk_mq_requeue_request(rq);
                break;
        default:
+               blk_mq_update_dispatch_busy(hctx, false);
                *cookie = BLK_QC_T_NONE;
                break;
        }
@@ -1744,7 +1802,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
         * RCU or SRCU read lock is needed before checking quiesced flag.
         *
         * When queue is stopped or quiesced, ignore 'bypass_insert' from
-        * blk_mq_request_direct_issue(), and return BLK_STS_OK to caller,
+        * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
         * and avoid driver to try to dispatch again.
         */
        if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
@@ -1784,7 +1842,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
        hctx_lock(hctx, &srcu_idx);
 
        ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
-       if (ret == BLK_STS_RESOURCE)
+       if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
                blk_mq_sched_insert_request(rq, false, true, false);
        else if (ret != BLK_STS_OK)
                blk_mq_end_request(rq, ret);
@@ -1792,7 +1850,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
        hctx_unlock(hctx, srcu_idx);
 }
 
-blk_status_t blk_mq_request_direct_issue(struct request *rq)
+blk_status_t blk_mq_request_issue_directly(struct request *rq)
 {
        blk_status_t ret;
        int srcu_idx;
@@ -1807,6 +1865,23 @@ blk_status_t blk_mq_request_direct_issue(struct request *rq)
        return ret;
 }
 
+void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+               struct list_head *list)
+{
+       while (!list_empty(list)) {
+               blk_status_t ret;
+               struct request *rq = list_first_entry(list, struct request,
+                               queuelist);
+
+               list_del_init(&rq->queuelist);
+               ret = blk_mq_request_issue_directly(rq);
+               if (ret != BLK_STS_OK) {
+                       list_add(&rq->queuelist, list);
+                       break;
+               }
+       }
+}
+
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
        const int is_sync = op_is_sync(bio->bi_opf);
@@ -1908,7 +1983,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                        blk_mq_try_issue_directly(data.hctx, same_queue_rq,
                                        &cookie);
                }
-       } else if (q->nr_hw_queues > 1 && is_sync) {
+       } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
+                       !data.hctx->dispatch_busy)) {
                blk_mq_put_ctx(data.ctx);
                blk_mq_bio_to_request(rq, bio);
                blk_mq_try_issue_directly(data.hctx, rq, &cookie);