blk-wbt: abstract out end IO completion handler

[mirror_ubuntu-bionic-kernel.git] / block / blk-mq-sched.c
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index 8e525e66a0d971aaa50d5b57cd27471f7cc96e6e..c117bd8fd1f6126849472300b3c294e65825a2f7 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -81,16 +81,15 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
         } else
                 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  
-       if (blk_mq_hctx_has_pending(hctx)) {
-               blk_mq_run_hw_queue(hctx, true);
-               return true;
-       }
-
-       return false;
+       return blk_mq_run_hw_queue(hctx, true);
  }
  
-/* return true if hctx need to run again */
-static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+/*
+ * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
+ * its queue by itself in its completion handler, so we don't need to
+ * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
+ */
+static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  {
         struct request_queue *q = hctx->queue;
         struct elevator_queue *e = q->elevator;
@@ -98,23 +97,18 @@ static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  
         do {
                 struct request *rq;
-               blk_status_t ret;
  
                 if (e->type->ops.mq.has_work &&
                                 !e->type->ops.mq.has_work(hctx))
                         break;
  
-               ret = blk_mq_get_dispatch_budget(hctx);
-               if (ret == BLK_STS_RESOURCE)
-                       return true;
+               if (!blk_mq_get_dispatch_budget(hctx))
+                       break;
  
                 rq = e->type->ops.mq.dispatch_request(hctx);
                 if (!rq) {
                         blk_mq_put_dispatch_budget(hctx);
                         break;
-               } else if (ret != BLK_STS_OK) {
-                       blk_mq_end_request(rq, ret);
-                       continue;
                 }
  
                 /*
@@ -124,22 +118,71 @@ static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
                  */
                 list_add(&rq->queuelist, &rq_list);
         } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+}
  
-       return false;
+static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
+                                         struct blk_mq_ctx *ctx)
+{
+       unsigned idx = ctx->index_hw;
+
+       if (++idx == hctx->nr_ctx)
+               idx = 0;
+
+       return hctx->ctxs[idx];
+}
+
+/*
+ * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
+ * its queue by itself in its completion handler, so we don't need to
+ * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
+ */
+static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
+{
+       struct request_queue *q = hctx->queue;
+       LIST_HEAD(rq_list);
+       struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
+
+       do {
+               struct request *rq;
+
+               if (!sbitmap_any_bit_set(&hctx->ctx_map))
+                       break;
+
+               if (!blk_mq_get_dispatch_budget(hctx))
+                       break;
+
+               rq = blk_mq_dequeue_from_ctx(hctx, ctx);
+               if (!rq) {
+                       blk_mq_put_dispatch_budget(hctx);
+                       break;
+               }
+
+               /*
+                * Now this rq owns the budget which has to be released
+                * if this rq won't be queued to driver via .queue_rq()
+                * in blk_mq_dispatch_rq_list().
+                */
+               list_add(&rq->queuelist, &rq_list);
+
+               /* round robin for fair dispatch */
+               ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
+
+       } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+
+       WRITE_ONCE(hctx->dispatch_from, ctx);
  }
  
  /* return true if hw queue need to be run again */
-bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
  {
         struct request_queue *q = hctx->queue;
         struct elevator_queue *e = q->elevator;
         const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
         LIST_HEAD(rq_list);
-       bool run_queue = false;
  
         /* RCU or SRCU read lock is needed before checking quiesced flag */
         if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
-               return false;
+               return;
  
         hctx->run++;
  
@@ -169,23 +212,28 @@ bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
          */
         if (!list_empty(&rq_list)) {
                 blk_mq_sched_mark_restart_hctx(hctx);
-               if (blk_mq_dispatch_rq_list(q, &rq_list, false) &&
-                               has_sched_dispatch)
-                       run_queue = blk_mq_do_dispatch_sched(hctx);
+               if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
+                       if (has_sched_dispatch)
+                               blk_mq_do_dispatch_sched(hctx);
+                       else
+                               blk_mq_do_dispatch_ctx(hctx);
+               }
         } else if (has_sched_dispatch) {
-               run_queue = blk_mq_do_dispatch_sched(hctx);
+               blk_mq_do_dispatch_sched(hctx);
+       } else if (q->mq_ops->get_budget) {
+               /*
+                * If we need to get budget before queuing request, we
+                * dequeue request one by one from sw queue for avoiding
+                * to mess up I/O merge when dispatch runs out of resource.
+                *
+                * TODO: get more budgets, and dequeue more requests in
+                * one time.
+                */
+               blk_mq_do_dispatch_ctx(hctx);
         } else {
                 blk_mq_flush_busy_ctxs(hctx, &rq_list);
                 blk_mq_dispatch_rq_list(q, &rq_list, false);
         }
-
-       if (run_queue && !blk_mq_sched_needs_restart(hctx) &&
-                       !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) {
-               blk_mq_sched_mark_restart_hctx(hctx);
-               return true;
-       }
-
-       return false;
  }
  
  bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -300,21 +348,21 @@ void blk_mq_sched_request_inserted(struct request *rq)
  EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
  
  static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
+                                      bool has_sched,
                                        struct request *rq)
  {
-       if (rq->tag == -1) {
-               rq->rq_flags |= RQF_SORTED;
-               return false;
+       /* dispatch flush rq directly */
+       if (rq->rq_flags & RQF_FLUSH_SEQ) {
+               spin_lock(&hctx->lock);
+               list_add(&rq->queuelist, &hctx->dispatch);
+               spin_unlock(&hctx->lock);
+               return true;
         }
  
-       /*
-        * If we already have a real request tag, send directly to
-        * the dispatch list.
-        */
-       spin_lock(&hctx->lock);
-       list_add(&rq->queuelist, &hctx->dispatch);
-       spin_unlock(&hctx->lock);
-       return true;
+       if (has_sched)
+               rq->rq_flags |= RQF_SORTED;
+
+       return false;
  }
  
  /**
@@ -379,21 +427,6 @@ done:
         }
  }
  
-/*
- * Add flush/fua to the queue. If we fail getting a driver tag, then
- * punt to the requeue list. Requeue will re-invoke us from a context
- * that's safe to block from.
- */
-static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
-                                     struct request *rq, bool can_block)
-{
-       if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
-               blk_insert_flush(rq);
-               blk_mq_run_hw_queue(hctx, true);
-       } else
-               blk_mq_add_to_requeue_list(rq, false, true);
-}
-
  void blk_mq_sched_insert_request(struct request *rq, bool at_head,
                                  bool run_queue, bool async, bool can_block)
  {
@@ -402,12 +435,15 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
         struct blk_mq_ctx *ctx = rq->mq_ctx;
         struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
  
-       if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
-               blk_mq_sched_insert_flush(hctx, rq, can_block);
-               return;
+       /* flush rq in flush machinery need to be dispatched directly */
+       if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
+               blk_insert_flush(rq);
+               goto run;
         }
  
-       if (e && blk_mq_sched_bypass_insert(hctx, rq))
+       WARN_ON(e && (rq->tag != -1));
+
+       if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
                 goto run;
  
         if (e && e->type->ops.mq.insert_requests) {
@@ -433,23 +469,6 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
         struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
         struct elevator_queue *e = hctx->queue->elevator;
  
-       if (e) {
-               struct request *rq, *next;
-
-               /*
-                * We bypass requests that already have a driver tag assigned,
-                * which should only be flushes. Flushes are only ever inserted
-                * as single requests, so we shouldn't ever hit the
-                * WARN_ON_ONCE() below (but let's handle it just in case).
-                */
-               list_for_each_entry_safe(rq, next, list, queuelist) {
-                       if (WARN_ON_ONCE(rq->tag != -1)) {
-                               list_del_init(&rq->queuelist);
-                               blk_mq_sched_bypass_insert(hctx, rq);
-                       }
-               }
-       }
-
         if (e && e->type->ops.mq.insert_requests)
                 e->type->ops.mq.insert_requests(hctx, list, false);
         else