BugLink: https://bugs.launchpad.net/bugs/1848739
Dispatch may still be in-progress after queue is frozen, so we have to
quiesce queue before switching IO scheduler and updating nr_requests.
Also when switching io schedulers, blk_mq_run_hw_queue() may still be
called somewhere(such as from nvme_reset_work()), and io scheduler's
per-hctx data may not be setup yet, so cause oops even inside
blk_mq_hctx_has_pending(), such as it can be run just between:
ret = e->ops.mq.init_sched(q, e);
AND
ret = e->ops.mq.init_hctx(hctx, i)
inside blk_mq_init_sched().
This reverts commit
7a148c2fcff8330(block: don't call blk_mq_quiesce_queue()
after queue is frozen) basically, and makes sure blk_mq_hctx_has_pending
won't be called if queue is quiesced.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Fixes: 7a148c2fcff83309(block: don't call blk_mq_quiesce_queue() after queue is frozen)
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit
24f5a90f0d13a97b51aa79f468143fafea4246bb)
Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
- if (blk_mq_hctx_has_pending(hctx)) {
+ int srcu_idx;
+ bool need_run;
+
+ /*
+ * When queue is quiesced, we may be switching io scheduler, or
+ * updating nr_hw_queues, or other things, and we can't run queue
+ * any more, even __blk_mq_hctx_has_pending() can't be called safely.
+ *
+ * And queue will be rerun in blk_mq_unquiesce_queue() if it is
+ * quiesced.
+ */
+ if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
+ rcu_read_lock();
+ need_run = !blk_queue_quiesced(hctx->queue) &&
+ blk_mq_hctx_has_pending(hctx);
+ rcu_read_unlock();
+ } else {
+ srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
+ need_run = !blk_queue_quiesced(hctx->queue) &&
+ blk_mq_hctx_has_pending(hctx);
+ srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
+ }
+
+ if (need_run) {
__blk_mq_delay_run_hw_queue(hctx, async, 0);
return true;
}
__blk_mq_delay_run_hw_queue(hctx, async, 0);
return true;
}
return 0;
blk_mq_freeze_queue(q);
return 0;
blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
ret = 0;
queue_for_each_hw_ctx(q, hctx, i) {
ret = 0;
queue_for_each_hw_ctx(q, hctx, i) {
if (!ret)
q->nr_requests = nr;
if (!ret)
q->nr_requests = nr;
+ blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
return ret;
blk_mq_unfreeze_queue(q);
return ret;
int ret;
blk_mq_freeze_queue(q);
int ret;
blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
if (q->elevator) {
if (q->elevator->registered)
if (q->elevator) {
if (q->elevator->registered)
blk_add_trace_msg(q, "elv switch: none");
out:
blk_add_trace_msg(q, "elv switch: none");
out:
+ blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
return ret;
}
blk_mq_unfreeze_queue(q);
return ret;
}