blk-mq: issue directly if hw queue isn't busy in case of 'none'

[mirror_ubuntu-bionic-kernel.git] / block / blk-mq.c
diff --git a/block/blk-mq.c b/block/blk-mq.c

index d417804b7f8ffa4959331c8c12c9a07b43188ec1..6a0ad28fa17638a63b90bfa0143d09b140201207 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -736,13 +736,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
                 if (rq->rq_flags & RQF_DONTPREP)
                         blk_mq_request_bypass_insert(rq, false);
                 else
-                       blk_mq_sched_insert_request(rq, true, false, false, true);
+                       blk_mq_sched_insert_request(rq, true, false, false);
         }
  
         while (!list_empty(&rq_list)) {
                 rq = list_entry(rq_list.next, struct request, queuelist);
                 list_del_init(&rq->queuelist);
-               blk_mq_sched_insert_request(rq, false, false, false, true);
+               blk_mq_sched_insert_request(rq, false, false, false);
         }
  
         blk_mq_run_hw_queues(q, false);
@@ -1121,6 +1121,40 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
         }
  }
  
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+       unsigned int ewma;
+
+       if (hctx->queue->elevator)
+               return;
+
+       ewma = hctx->dispatch_busy;
+
+       if (!ewma && !busy)
+               return;
+
+       ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+       if (busy)
+               ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+       ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+       hctx->dispatch_busy = ewma;
+}
+
+#define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
+
+/*
+ * Returns true if we did some work AND can potentially do more.
+ */
  bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                              bool got_budget)
  {
@@ -1128,6 +1162,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
         struct request *rq, *nxt;
         bool no_tag = false;
         int errors, queued;
+       blk_status_t ret = BLK_STS_OK;
  
         if (list_empty(list))
                 return false;
@@ -1140,7 +1175,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
         errors = queued = 0;
         do {
                 struct blk_mq_queue_data bd;
-               blk_status_t ret;
  
                 rq = list_first_entry(list, struct request, queuelist);
  
@@ -1184,7 +1218,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 }
  
                 ret = q->mq_ops->queue_rq(hctx, &bd);
-               if (ret == BLK_STS_RESOURCE) {
+               if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
                         /*
                          * If an I/O scheduler has been configured and we got a
                          * driver tag for the next request already, free it
@@ -1215,6 +1249,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
          * that is where we will continue on next queue run.
          */
         if (!list_empty(list)) {
+               bool needs_restart;
+
                 spin_lock(&hctx->lock);
                 list_splice_init(list, &hctx->dispatch);
                 spin_unlock(&hctx->lock);
@@ -1238,11 +1274,29 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                  * - Some but not all block drivers stop a queue before
                  *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
                  *   and dm-rq.
+                *
+                * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
+                * bit is set, run queue after a delay to avoid IO stalls
+                * that could otherwise occur if the queue is idle.
                  */
-               if (!blk_mq_sched_needs_restart(hctx) ||
+               needs_restart = blk_mq_sched_needs_restart(hctx);
+               if (!needs_restart ||
                     (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
                         blk_mq_run_hw_queue(hctx, true);
-       }
+               else if (needs_restart && (ret == BLK_STS_RESOURCE))
+                       blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
+
+               blk_mq_update_dispatch_busy(hctx, true);
+               return false;
+       } else
+               blk_mq_update_dispatch_busy(hctx, false);
+
+       /*
+        * If the host/device is unable to accept more work, inform the
+        * caller of that.
+        */
+       if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+               return false;
  
         return (queued + errors) != 0;
  }
@@ -1719,12 +1773,16 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
         ret = q->mq_ops->queue_rq(hctx, &bd);
         switch (ret) {
         case BLK_STS_OK:
+               blk_mq_update_dispatch_busy(hctx, false);
                 *cookie = new_cookie;
                 break;
         case BLK_STS_RESOURCE:
+       case BLK_STS_DEV_RESOURCE:
+               blk_mq_update_dispatch_busy(hctx, true);
                 __blk_mq_requeue_request(rq);
                 break;
         default:
+               blk_mq_update_dispatch_busy(hctx, false);
                 *cookie = BLK_QC_T_NONE;
                 break;
         }
@@ -1732,17 +1790,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
         return ret;
  }
  
-static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
-                                       struct request *rq,
-                                       bool run_queue, bool bypass_insert)
-{
-       if (!bypass_insert)
-               blk_mq_sched_insert_request(rq, false, run_queue, false,
-                                           hctx->flags & BLK_MQ_F_BLOCKING);
-       else
-               blk_mq_request_bypass_insert(rq, run_queue);
-}
-
  static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
                                                 struct request *rq,
                                                 blk_qc_t *cookie,
@@ -1751,9 +1798,16 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
         struct request_queue *q = rq->q;
         bool run_queue = true;
  
-       /* RCU or SRCU read lock is needed before checking quiesced flag */
+       /*
+        * RCU or SRCU read lock is needed before checking quiesced flag.
+        *
+        * When queue is stopped or quiesced, ignore 'bypass_insert' from
+        * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
+        * and avoid driver to try to dispatch again.
+        */
         if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
                 run_queue = false;
+               bypass_insert = false;
                 goto insert;
         }
  
@@ -1770,10 +1824,10 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
  
         return __blk_mq_issue_directly(hctx, rq, cookie);
  insert:
-       __blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert);
         if (bypass_insert)
                 return BLK_STS_RESOURCE;
  
+       blk_mq_sched_insert_request(rq, false, run_queue, false);
         return BLK_STS_OK;
  }
  
@@ -1788,15 +1842,15 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
         hctx_lock(hctx, &srcu_idx);
  
         ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
-       if (ret == BLK_STS_RESOURCE)
-               __blk_mq_fallback_to_insert(hctx, rq, true, false);
+       if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+               blk_mq_sched_insert_request(rq, false, true, false);
         else if (ret != BLK_STS_OK)
                 blk_mq_end_request(rq, ret);
  
         hctx_unlock(hctx, srcu_idx);
  }
  
-blk_status_t blk_mq_request_direct_issue(struct request *rq)
+blk_status_t blk_mq_request_issue_directly(struct request *rq)
  {
         blk_status_t ret;
         int srcu_idx;
@@ -1811,6 +1865,23 @@ blk_status_t blk_mq_request_direct_issue(struct request *rq)
         return ret;
  }
  
+void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+               struct list_head *list)
+{
+       while (!list_empty(list)) {
+               blk_status_t ret;
+               struct request *rq = list_first_entry(list, struct request,
+                               queuelist);
+
+               list_del_init(&rq->queuelist);
+               ret = blk_mq_request_issue_directly(rq);
+               if (ret != BLK_STS_OK) {
+                       list_add(&rq->queuelist, list);
+                       break;
+               }
+       }
+}
+
  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
  {
         const int is_sync = op_is_sync(bio->bi_opf);
@@ -1912,14 +1983,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                         blk_mq_try_issue_directly(data.hctx, same_queue_rq,
                                         &cookie);
                 }
-       } else if (q->nr_hw_queues > 1 && is_sync) {
+       } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
+                       !data.hctx->dispatch_busy)) {
                 blk_mq_put_ctx(data.ctx);
                 blk_mq_bio_to_request(rq, bio);
                 blk_mq_try_issue_directly(data.hctx, rq, &cookie);
         } else if (q->elevator) {
                 blk_mq_put_ctx(data.ctx);
                 blk_mq_bio_to_request(rq, bio);
-               blk_mq_sched_insert_request(rq, false, true, true, true);
+               blk_mq_sched_insert_request(rq, false, true, true);
         } else {
                 blk_mq_put_ctx(data.ctx);
                 blk_mq_bio_to_request(rq, bio);