X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block%2Fblk-mq.c;h=da42485cb39d954c475a078db62967e9a47772e7;hb=74e11d8dd2ad6af3b48aa4302c28bf9f549560fc;hp=3362f274d0861dfaf7e68013c6461f5e818c5db8;hpb=6204c099aeb1d67951f4b5e31b29a55696738568;p=mirror_ubuntu-bionic-kernel.git diff --git a/block/blk-mq.c b/block/blk-mq.c index 3362f274d086..da42485cb39d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -119,6 +119,25 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); } +static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, + bool reserved) +{ + struct mq_inflight *mi = priv; + + if (rq->part == mi->part) + mi->inflight[rq_data_dir(rq)]++; +} + +void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]) +{ + struct mq_inflight mi = { .part = part, .inflight = inflight, }; + + inflight[0] = inflight[1] = 0; + blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi); +} + void blk_freeze_queue_start(struct request_queue *q) { int freeze_depth; @@ -279,7 +298,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->tag = -1; rq->internal_tag = tag; } else { - if (blk_mq_tag_busy(data->hctx)) { + if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) { rq->rq_flags = RQF_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); } @@ -357,6 +376,8 @@ static struct request *blk_mq_get_request(struct request_queue *q, */ if (!op_is_flush(op) && e->type->ops.mq.limit_depth) e->type->ops.mq.limit_depth(op, data); + } else { + blk_mq_tag_busy(data->hctx); } tag = blk_mq_get_tag(data); @@ -559,6 +580,22 @@ static void __blk_mq_complete_request(struct request *rq) put_cpu(); } +static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) +{ + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + rcu_read_unlock(); + else + srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); +} + +static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) +{ + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + rcu_read_lock(); + else + *srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); +} + /** * blk_mq_complete_request - end I/O on a request * @rq: the request being processed @@ -686,12 +723,20 @@ static void blk_mq_requeue_work(struct work_struct *work) spin_unlock_irq(&q->requeue_lock); list_for_each_entry_safe(rq, next, &rq_list, queuelist) { - if (!(rq->rq_flags & RQF_SOFTBARRIER)) + if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP))) continue; rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_sched_insert_request(rq, true, false, false, true); + /* + * If RQF_DONTPREP, rq has contained some driver specific + * data, so insert it to hctx dispatch list to avoid any + * merge. + */ + if (rq->rq_flags & RQF_DONTPREP) + blk_mq_request_bypass_insert(rq, false); + else + blk_mq_sched_insert_request(rq, true, false, false, true); } while (!list_empty(&rq_list)) { @@ -974,6 +1019,7 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT, }; + bool shared; might_sleep_if(wait); @@ -983,9 +1029,10 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) data.flags |= BLK_MQ_REQ_RESERVED; + shared = blk_mq_tag_busy(data.hctx); rq->tag = blk_mq_get_tag(&data); if (rq->tag >= 0) { - if (blk_mq_tag_busy(data.hctx)) { + if (shared) { rq->rq_flags |= RQF_MQ_INFLIGHT; atomic_inc(&data.hctx->nr_active); } @@ -1235,17 +1282,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) */ WARN_ON_ONCE(in_interrupt()); - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { - rcu_read_lock(); - blk_mq_sched_dispatch_requests(hctx); - rcu_read_unlock(); - } else { - might_sleep(); + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); - blk_mq_sched_dispatch_requests(hctx); - srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); - } + hctx_lock(hctx, &srcu_idx); + blk_mq_sched_dispatch_requests(hctx); + hctx_unlock(hctx, srcu_idx); } static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) @@ -1306,9 +1347,6 @@ select_cpu: static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, unsigned long msecs) { - if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx))) - return; - if (unlikely(blk_mq_hctx_stopped(hctx))) return; @@ -1335,7 +1373,23 @@ EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { - if (blk_mq_hctx_has_pending(hctx)) { + int srcu_idx; + bool need_run; + + /* + * When queue is quiesced, we may be switching io scheduler, or + * updating nr_hw_queues, or other things, and we can't run queue + * any more, even __blk_mq_hctx_has_pending() can't be called safely. + * + * And queue will be rerun in blk_mq_unquiesce_queue() if it is + * quiesced. + */ + hctx_lock(hctx, &srcu_idx); + need_run = !blk_queue_quiesced(hctx->queue) && + blk_mq_hctx_has_pending(hctx); + hctx_unlock(hctx, srcu_idx); + + if (need_run) { __blk_mq_delay_run_hw_queue(hctx, async, 0); return true; } @@ -1591,7 +1645,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) BUG_ON(!rq->q); if (rq->mq_ctx != this_ctx) { if (this_ctx) { - trace_block_unplug(this_q, depth, from_schedule); + trace_block_unplug(this_q, depth, !from_schedule); blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list, from_schedule); @@ -1611,7 +1665,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) * on 'ctx_list'. Do those. */ if (this_ctx) { - trace_block_unplug(this_q, depth, from_schedule); + trace_block_unplug(this_q, depth, !from_schedule); blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list, from_schedule); } @@ -1645,7 +1699,7 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, - blk_qc_t *cookie, bool may_sleep) + blk_qc_t *cookie) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1695,25 +1749,20 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, } insert: - blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep); + blk_mq_sched_insert_request(rq, false, run_queue, false, + hctx->flags & BLK_MQ_F_BLOCKING); } static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie) { - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { - rcu_read_lock(); - __blk_mq_try_issue_directly(hctx, rq, cookie, false); - rcu_read_unlock(); - } else { - unsigned int srcu_idx; + int srcu_idx; - might_sleep(); + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); - __blk_mq_try_issue_directly(hctx, rq, cookie, true); - srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); - } + hctx_lock(hctx, &srcu_idx); + __blk_mq_try_issue_directly(hctx, rq, cookie); + hctx_unlock(hctx, srcu_idx); } static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) @@ -2053,12 +2102,7 @@ static void blk_mq_exit_hctx(struct request_queue *q, if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); - if (hctx->flags & BLK_MQ_F_BLOCKING) - cleanup_srcu_struct(hctx->queue_rq_srcu); - blk_mq_remove_cpuhp(hctx); - blk_free_flush_queue(hctx->fq); - sbitmap_free(&hctx->ctx_map); } static void blk_mq_exit_hw_queues(struct request_queue *q, @@ -2099,12 +2143,12 @@ static int blk_mq_init_hctx(struct request_queue *q, * runtime */ hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), - GFP_KERNEL, node); + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); if (!hctx->ctxs) goto unregister_cpu_notifier; - if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), GFP_KERNEL, - node)) + if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node)) goto free_ctxs; hctx->nr_ctx = 0; @@ -2119,7 +2163,8 @@ static int blk_mq_init_hctx(struct request_queue *q, if (blk_mq_sched_init_hctx(q, hctx, hctx_idx)) goto exit_hctx; - hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (!hctx->fq) goto sched_exit_hctx; @@ -2136,7 +2181,7 @@ static int blk_mq_init_hctx(struct request_queue *q, return 0; free_fq: - kfree(hctx->fq); + blk_free_flush_queue(hctx->fq); sched_exit_hctx: blk_mq_sched_exit_hctx(q, hctx, hctx_idx); exit_hctx: @@ -2327,7 +2372,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) mutex_lock(&set->tag_list_lock); list_del_rcu(&q->tag_set_list); - INIT_LIST_HEAD(&q->tag_set_list); if (list_is_singular(&set->tag_list)) { /* just transitioned to unshared */ set->flags &= ~BLK_MQ_F_TAG_SHARED; @@ -2335,8 +2379,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) blk_mq_update_tag_set_depth(set, false); } mutex_unlock(&set->tag_list_lock); - synchronize_rcu(); + INIT_LIST_HEAD(&q->tag_set_list); } static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, @@ -2441,12 +2485,14 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, node = blk_mq_hw_queue_to_node(q->mq_map, i); hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), - GFP_KERNEL, node); + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node); if (!hctxs[i]) break; - if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL, - node)) { + if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node)) { kfree(hctxs[i]); hctxs[i] = NULL; break; @@ -2568,7 +2614,8 @@ err_exit: } EXPORT_SYMBOL(blk_mq_init_allocated_queue); -void blk_mq_free_queue(struct request_queue *q) +/* tags can _not_ be used after returning from blk_mq_exit_queue */ +void blk_mq_exit_queue(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; @@ -2777,7 +2824,11 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (!set) return -EINVAL; + if (q->nr_requests == nr) + return 0; + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); ret = 0; queue_for_each_hw_ctx(q, hctx, i) { @@ -2801,6 +2852,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (!ret) q->nr_requests = nr; + blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); return ret;