block, bfq: correctly charge and reset entity service in all cases

[mirror_ubuntu-bionic-kernel.git] / block / blk-mq.c
diff --git a/block/blk-mq.c b/block/blk-mq.c

index cd409593ab1978efd04e61a667cf1fcb448201e7..dcc19e2d370ced4c27d8392760b87bb66cc5cc68 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,6 +119,25 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
         blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
  }
  
+static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
+                                    struct request *rq, void *priv,
+                                    bool reserved)
+{
+       struct mq_inflight *mi = priv;
+
+       if (rq->part == mi->part)
+               mi->inflight[rq_data_dir(rq)]++;
+}
+
+void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+                        unsigned int inflight[2])
+{
+       struct mq_inflight mi = { .part = part, .inflight = inflight, };
+
+       inflight[0] = inflight[1] = 0;
+       blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
+}
+
  void blk_freeze_queue_start(struct request_queue *q)
  {
         int freeze_depth;
@@ -279,7 +298,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
                 rq->tag = -1;
                 rq->internal_tag = tag;
         } else {
-               if (blk_mq_tag_busy(data->hctx)) {
+               if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
                         rq->rq_flags = RQF_MQ_INFLIGHT;
                         atomic_inc(&data->hctx->nr_active);
                 }
@@ -357,6 +376,8 @@ static struct request *blk_mq_get_request(struct request_queue *q,
                  */
                 if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
                         e->type->ops.mq.limit_depth(op, data);
+       } else {
+               blk_mq_tag_busy(data->hctx);
         }
  
         tag = blk_mq_get_tag(data);
@@ -731,7 +752,7 @@ EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
  
  void blk_mq_kick_requeue_list(struct request_queue *q)
  {
-       kblockd_schedule_delayed_work(&q->requeue_work, 0);
+       kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
  }
  EXPORT_SYMBOL(blk_mq_kick_requeue_list);
  
@@ -974,6 +995,7 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
                 .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
                 .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
         };
+       bool shared;
  
         might_sleep_if(wait);
  
@@ -983,9 +1005,10 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
         if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
                 data.flags |= BLK_MQ_REQ_RESERVED;
  
+       shared = blk_mq_tag_busy(data.hctx);
         rq->tag = blk_mq_get_tag(&data);
         if (rq->tag >= 0) {
-               if (blk_mq_tag_busy(data.hctx)) {
+               if (shared) {
                         rq->rq_flags |= RQF_MQ_INFLIGHT;
                         atomic_inc(&data.hctx->nr_active);
                 }
@@ -1248,6 +1271,15 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
         }
  }
  
+static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
+{
+       int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
+
+       if (cpu >= nr_cpu_ids)
+               cpu = cpumask_first(hctx->cpumask);
+       return cpu;
+}
+
  /*
   * It'd be great if the workqueue API had a way to pass
   * in a mask and had some smarts for more clever placement.
@@ -1257,26 +1289,17 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
  static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
  {
         bool tried = false;
+       int next_cpu = hctx->next_cpu;
  
         if (hctx->queue->nr_hw_queues == 1)
                 return WORK_CPU_UNBOUND;
  
         if (--hctx->next_cpu_batch <= 0) {
-               int next_cpu;
  select_cpu:
-               next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
+               next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
                                 cpu_online_mask);
                 if (next_cpu >= nr_cpu_ids)
-                       next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask);
-
-               /*
-                * No online CPU is found, so have to make sure hctx->next_cpu
-                * is set correctly for not breaking workqueue.
-                */
-               if (next_cpu >= nr_cpu_ids)
-                       hctx->next_cpu = cpumask_first(hctx->cpumask);
-               else
-                       hctx->next_cpu = next_cpu;
+                       next_cpu = blk_mq_first_mapped_cpu(hctx);
                 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
         }
  
@@ -1284,7 +1307,7 @@ select_cpu:
          * Do unbound schedule if we can't find a online CPU for this hctx,
          * and it should only happen in the path of handling CPU DEAD.
          */
-       if (!cpu_online(hctx->next_cpu)) {
+       if (!cpu_online(next_cpu)) {
                 if (!tried) {
                         tried = true;
                         goto select_cpu;
@@ -1294,18 +1317,18 @@ select_cpu:
                  * Make sure to re-select CPU next time once after CPUs
                  * in hctx->cpumask become online again.
                  */
+               hctx->next_cpu = next_cpu;
                 hctx->next_cpu_batch = 1;
                 return WORK_CPU_UNBOUND;
         }
-       return hctx->next_cpu;
+
+       hctx->next_cpu = next_cpu;
+       return next_cpu;
  }
  
  static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
                                         unsigned long msecs)
  {
-       if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
-               return;
-
         if (unlikely(blk_mq_hctx_stopped(hctx)))
                 return;
  
@@ -1320,9 +1343,8 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
                 put_cpu();
         }
  
-       kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
-                                        &hctx->run_work,
-                                        msecs_to_jiffies(msecs));
+       kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
+                                   msecs_to_jiffies(msecs));
  }
  
  void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
@@ -1589,7 +1611,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                 BUG_ON(!rq->q);
                 if (rq->mq_ctx != this_ctx) {
                         if (this_ctx) {
-                               trace_block_unplug(this_q, depth, from_schedule);
+                               trace_block_unplug(this_q, depth, !from_schedule);
                                 blk_mq_sched_insert_requests(this_q, this_ctx,
                                                                 &ctx_list,
                                                                 from_schedule);
@@ -1609,7 +1631,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
          * on 'ctx_list'. Do those.
          */
         if (this_ctx) {
-               trace_block_unplug(this_q, depth, from_schedule);
+               trace_block_unplug(this_q, depth, !from_schedule);
                 blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
                                                 from_schedule);
         }
@@ -2278,8 +2300,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
                 /*
                  * Initialize batch roundrobin counts
                  */
-               hctx->next_cpu = cpumask_first_and(hctx->cpumask,
-                               cpu_online_mask);
+               hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
                 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
         }
  }
@@ -2326,7 +2347,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
  
         mutex_lock(&set->tag_list_lock);
         list_del_rcu(&q->tag_set_list);
-       INIT_LIST_HEAD(&q->tag_set_list);
         if (list_is_singular(&set->tag_list)) {
                 /* just transitioned to unshared */
                 set->flags &= ~BLK_MQ_F_TAG_SHARED;
@@ -2334,8 +2354,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
                 blk_mq_update_tag_set_depth(set, false);
         }
         mutex_unlock(&set->tag_list_lock);
-
         synchronize_rcu();
+       INIT_LIST_HEAD(&q->tag_set_list);
  }
  
  static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
@@ -2776,6 +2796,9 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
         if (!set)
                 return -EINVAL;
  
+       if (q->nr_requests == nr)
+               return 0;
+
         blk_mq_freeze_queue(q);
  
         ret = 0;