freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
if (freeze_depth == 1) {
percpu_ref_kill(&q->q_usage_counter);
- blk_mq_run_hw_queues(q, false);
+ if (q->mq_ops)
+ blk_mq_run_hw_queues(q, false);
}
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
queue_for_each_hw_ctx(q, hctx, i)
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_wakeup_all(hctx->tags, true);
-
- /*
- * If we are called because the queue has now been marked as
- * dying, we need to ensure that processes currently waiting on
- * the queue are notified as well.
- */
- wake_up_all(&q->mq_freeze_wq);
}
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
rq->q = data->q;
rq->mq_ctx = data->ctx;
rq->cmd_flags = op;
+ if (data->flags & BLK_MQ_REQ_PREEMPT)
+ rq->rq_flags |= RQF_PREEMPT;
if (blk_queue_io_stat(data->q))
rq->rq_flags |= RQF_IO_STAT;
/* do not touch atomic flags, it needs atomic ops against the timer */
struct elevator_queue *e = q->elevator;
struct request *rq;
unsigned int tag;
- struct blk_mq_ctx *local_ctx = NULL;
+ bool put_ctx_on_error = false;
blk_queue_enter_live(q);
data->q = q;
- if (likely(!data->ctx))
- data->ctx = local_ctx = blk_mq_get_ctx(q);
+ if (likely(!data->ctx)) {
+ data->ctx = blk_mq_get_ctx(q);
+ put_ctx_on_error = true;
+ }
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
if (op & REQ_NOWAIT)
tag = blk_mq_get_tag(data);
if (tag == BLK_MQ_TAG_FAIL) {
- if (local_ctx) {
- blk_mq_put_ctx(local_ctx);
+ if (put_ctx_on_error) {
+ blk_mq_put_ctx(data->ctx);
data->ctx = NULL;
}
blk_queue_exit(q);
struct request *rq;
int ret;
- ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
+ ret = blk_queue_enter(q, flags);
if (ret)
return ERR_PTR(ret);
if (hctx_idx >= q->nr_hw_queues)
return ERR_PTR(-EIO);
- ret = blk_queue_enter(q, true);
+ ret = blk_queue_enter(q, flags);
if (ret)
return ERR_PTR(ret);
{
struct request_queue *q = rq->q;
+ blk_mq_put_driver_tag(rq);
+
trace_block_rq_requeue(q, rq);
wbt_requeue(q->rq_wb, &rq->issue_stat);
blk_mq_sched_requeue_request(rq);
return rq->tag != -1;
}
-static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
-{
- blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
- rq->tag = -1;
-
- if (rq->rq_flags & RQF_MQ_INFLIGHT) {
- rq->rq_flags &= ~RQF_MQ_INFLIGHT;
- atomic_dec(&hctx->nr_active);
- }
-}
-
-static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
-{
- if (rq->tag == -1 || rq->internal_tag == -1)
- return;
-
- __blk_mq_put_driver_tag(hctx, rq);
-}
-
-static void blk_mq_put_driver_tag(struct request *rq)
-{
- struct blk_mq_hw_ctx *hctx;
-
- if (rq->tag == -1 || rq->internal_tag == -1)
- return;
-
- hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
- __blk_mq_put_driver_tag(hctx, rq);
-}
-
-/*
- * If we fail getting a driver tag because all the driver tags are already
- * assigned and on the dispatch list, BUT the first entry does not have a
- * tag, then we could deadlock. For that case, move entries with assigned
- * driver tags to the front, leaving the set of tagged requests in the
- * same order, and the untagged set in the same order.
- */
-static bool reorder_tags_to_front(struct list_head *list)
-{
- struct request *rq, *tmp, *first = NULL;
-
- list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) {
- if (rq == first)
- break;
- if (rq->tag != -1) {
- list_move(&rq->queuelist, list);
- if (!first)
- first = rq;
- }
- }
-
- return first != NULL;
-}
-
-static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
- void *key)
+static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
+ int flags, void *key)
{
struct blk_mq_hw_ctx *hctx;
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
- list_del(&wait->entry);
- clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
+ list_del_init(&wait->entry);
blk_mq_run_hw_queue(hctx, true);
return 1;
}
-static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
+static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
+ struct request *rq)
{
+ struct blk_mq_hw_ctx *this_hctx = *hctx;
+ wait_queue_entry_t *wait = &this_hctx->dispatch_wait;
struct sbq_wait_state *ws;
+ if (!list_empty_careful(&wait->entry))
+ return false;
+
+ spin_lock(&this_hctx->lock);
+ if (!list_empty(&wait->entry)) {
+ spin_unlock(&this_hctx->lock);
+ return false;
+ }
+
+ ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
+ add_wait_queue(&ws->wait, wait);
+
/*
- * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
- * The thread which wins the race to grab this bit adds the hardware
- * queue to the wait queue.
+ * It's possible that a tag was freed in the window between the
+ * allocation failure and adding the hardware queue to the wait
+ * queue.
*/
- if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
- test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
+ if (!blk_mq_get_driver_tag(rq, hctx, false)) {
+ spin_unlock(&this_hctx->lock);
return false;
-
- init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
- ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
+ }
/*
- * As soon as this returns, it's no longer safe to fiddle with
- * hctx->dispatch_wait, since a completion can wake up the wait queue
- * and unlock the bit.
+ * We got a tag, remove ourselves from the wait queue to ensure
+ * someone else gets the wakeup.
*/
- add_wait_queue(&ws->wait, &hctx->dispatch_wait);
+ spin_lock_irq(&ws->wait.lock);
+ list_del_init(&wait->entry);
+ spin_unlock_irq(&ws->wait.lock);
+ spin_unlock(&this_hctx->lock);
return true;
}
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
- bool got_budget)
+ bool got_budget)
{
struct blk_mq_hw_ctx *hctx;
- struct request *rq;
+ struct request *rq, *nxt;
+ bool no_tag = false;
int errors, queued;
if (list_empty(list))
rq = list_first_entry(list, struct request, queuelist);
if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
- if (!queued && reorder_tags_to_front(list))
- continue;
-
/*
* The initial allocation attempt failed, so we need to
- * rerun the hardware queue when a tag is freed.
+ * rerun the hardware queue when a tag is freed. The
+ * waitqueue takes care of that. If the queue is run
+ * before we add this entry back on the dispatch list,
+ * we'll re-run it below.
*/
- if (!blk_mq_dispatch_wait_add(hctx)) {
- if (got_budget)
- blk_mq_put_dispatch_budget(hctx);
- break;
- }
-
- /*
- * It's possible that a tag was freed in the window
- * between the allocation failure and adding the
- * hardware queue to the wait queue.
- */
- if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
+ if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
if (got_budget)
blk_mq_put_dispatch_budget(hctx);
+ no_tag = true;
break;
}
}
- if (!got_budget) {
- ret = blk_mq_get_dispatch_budget(hctx);
- if (ret == BLK_STS_RESOURCE)
- break;
- if (ret != BLK_STS_OK)
- goto fail_rq;
+ if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
+ blk_mq_put_driver_tag(rq);
+ break;
}
list_del_init(&rq->queuelist);
if (list_empty(list))
bd.last = true;
else {
- struct request *nxt;
-
nxt = list_first_entry(list, struct request, queuelist);
bd.last = !blk_mq_get_driver_tag(nxt, NULL, false);
}
ret = q->mq_ops->queue_rq(hctx, &bd);
if (ret == BLK_STS_RESOURCE) {
- blk_mq_put_driver_tag_hctx(hctx, rq);
+ /*
+ * If an I/O scheduler has been configured and we got a
+ * driver tag for the next request already, free it again.
+ */
+ if (!list_empty(list)) {
+ nxt = list_first_entry(list, struct request, queuelist);
+ blk_mq_put_driver_tag(nxt);
+ }
list_add(&rq->queuelist, list);
__blk_mq_requeue_request(rq);
break;
}
- fail_rq:
if (unlikely(ret != BLK_STS_OK)) {
errors++;
blk_mq_end_request(rq, BLK_STS_IOERR);
* that is where we will continue on next queue run.
*/
if (!list_empty(list)) {
- /*
- * If an I/O scheduler has been configured and we got a driver
- * tag for the next request already, free it again.
- */
- rq = list_first_entry(list, struct request, queuelist);
- blk_mq_put_driver_tag(rq);
-
spin_lock(&hctx->lock);
list_splice_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock);
* it is no longer set that means that it was cleared by another
* thread and hence that a queue rerun is needed.
*
- * If TAG_WAITING is set that means that an I/O scheduler has
- * been configured and another thread is waiting for a driver
- * tag. To guarantee fairness, do not rerun this hardware queue
- * but let the other thread grab the driver tag.
+ * If 'no_tag' is set, that means that we failed getting
+ * a driver tag with an I/O scheduler attached. If our dispatch
+ * waitqueue is no longer active, ensure that we run the queue
+ * AFTER adding our entries back to the list.
*
* If no I/O scheduler has been configured it is possible that
* the hardware queue got stopped and restarted before requests
* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
* and dm-rq.
*/
- if (!blk_mq_sched_needs_restart(hctx) &&
- !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
+ if (!blk_mq_sched_needs_restart(hctx) ||
+ (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
blk_mq_run_hw_queue(hctx, true);
}
* Should only be used carefully, when the caller knows we want to
* bypass a potential IO scheduler on the target device.
*/
-void blk_mq_request_bypass_insert(struct request *rq)
+void blk_mq_request_bypass_insert(struct request *rq, bool run_queue)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
list_add_tail(&rq->queuelist, &hctx->dispatch);
spin_unlock(&hctx->lock);
- blk_mq_run_hw_queue(hctx, false);
+ if (run_queue)
+ blk_mq_run_hw_queue(hctx, false);
}
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
if (!blk_mq_get_driver_tag(rq, NULL, false))
goto insert;
- ret = blk_mq_get_dispatch_budget(hctx);
- if (ret == BLK_STS_RESOURCE) {
+ if (!blk_mq_get_dispatch_budget(hctx)) {
blk_mq_put_driver_tag(rq);
goto insert;
- } else if (ret != BLK_STS_OK)
- goto fail_rq;
+ }
new_cookie = request_to_qc_t(hctx, rq);
__blk_mq_requeue_request(rq);
goto insert;
default:
- fail_rq:
*cookie = BLK_QC_T_NONE;
blk_mq_end_request(rq, ret);
return;
if (unlikely(is_flush_fua)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- if (q->elevator) {
- blk_mq_sched_insert_request(rq, false, true, true,
- true);
- } else {
- blk_insert_flush(rq);
- blk_mq_run_hw_queue(data.hctx, true);
- }
+
+ /* bypass scheduler for flush rq */
+ blk_insert_flush(rq);
+ blk_mq_run_hw_queue(data.hctx, true);
} else if (plug && q->nr_hw_queues == 1) {
struct request *last = NULL;
hctx->nr_ctx = 0;
+ init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
+ INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
+
if (set->ops->init_hctx &&
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
goto free_bitmap;