if (rq->rq_flags & RQF_DONTPREP)
blk_mq_request_bypass_insert(rq, false);
else
- blk_mq_sched_insert_request(rq, true, false, false, true);
+ blk_mq_sched_insert_request(rq, true, false, false);
}
while (!list_empty(&rq_list)) {
rq = list_entry(rq_list.next, struct request, queuelist);
list_del_init(&rq->queuelist);
- blk_mq_sched_insert_request(rq, false, false, false, true);
+ blk_mq_sched_insert_request(rq, false, false, false);
}
blk_mq_run_hw_queues(q, false);
}
}
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT 8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR 4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ * factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+ unsigned int ewma;
+
+ if (hctx->queue->elevator)
+ return;
+
+ ewma = hctx->dispatch_busy;
+
+ if (!ewma && !busy)
+ return;
+
+ ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+ if (busy)
+ ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+ ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+ hctx->dispatch_busy = ewma;
+}
+
+#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
+
+/*
+ * Returns true if we did some work AND can potentially do more.
+ */
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
bool got_budget)
{
struct request *rq, *nxt;
bool no_tag = false;
int errors, queued;
+ blk_status_t ret = BLK_STS_OK;
if (list_empty(list))
return false;
errors = queued = 0;
do {
struct blk_mq_queue_data bd;
- blk_status_t ret;
rq = list_first_entry(list, struct request, queuelist);
}
ret = q->mq_ops->queue_rq(hctx, &bd);
- if (ret == BLK_STS_RESOURCE) {
+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
/*
* If an I/O scheduler has been configured and we got a
* driver tag for the next request already, free it
* that is where we will continue on next queue run.
*/
if (!list_empty(list)) {
+ bool needs_restart;
+
spin_lock(&hctx->lock);
list_splice_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock);
* - Some but not all block drivers stop a queue before
* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
* and dm-rq.
+ *
+ * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
+ * bit is set, run queue after a delay to avoid IO stalls
+ * that could otherwise occur if the queue is idle.
*/
- if (!blk_mq_sched_needs_restart(hctx) ||
+ needs_restart = blk_mq_sched_needs_restart(hctx);
+ if (!needs_restart ||
(no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
blk_mq_run_hw_queue(hctx, true);
- }
+ else if (needs_restart && (ret == BLK_STS_RESOURCE))
+ blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
+
+ blk_mq_update_dispatch_busy(hctx, true);
+ return false;
+ } else
+ blk_mq_update_dispatch_busy(hctx, false);
+
+ /*
+ * If the host/device is unable to accept more work, inform the
+ * caller of that.
+ */
+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+ return false;
return (queued + errors) != 0;
}
ret = q->mq_ops->queue_rq(hctx, &bd);
switch (ret) {
case BLK_STS_OK:
+ blk_mq_update_dispatch_busy(hctx, false);
*cookie = new_cookie;
break;
case BLK_STS_RESOURCE:
+ case BLK_STS_DEV_RESOURCE:
+ blk_mq_update_dispatch_busy(hctx, true);
__blk_mq_requeue_request(rq);
break;
default:
+ blk_mq_update_dispatch_busy(hctx, false);
*cookie = BLK_QC_T_NONE;
break;
}
return ret;
}
-static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
- struct request *rq,
- bool run_queue, bool bypass_insert)
-{
- if (!bypass_insert)
- blk_mq_sched_insert_request(rq, false, run_queue, false,
- hctx->flags & BLK_MQ_F_BLOCKING);
- else
- blk_mq_request_bypass_insert(rq, run_queue);
-}
-
static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
blk_qc_t *cookie,
struct request_queue *q = rq->q;
bool run_queue = true;
- /* RCU or SRCU read lock is needed before checking quiesced flag */
+ /*
+ * RCU or SRCU read lock is needed before checking quiesced flag.
+ *
+ * When queue is stopped or quiesced, ignore 'bypass_insert' from
+ * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
+ * and avoid driver to try to dispatch again.
+ */
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;
+ bypass_insert = false;
goto insert;
}
return __blk_mq_issue_directly(hctx, rq, cookie);
insert:
- __blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert);
if (bypass_insert)
return BLK_STS_RESOURCE;
+ blk_mq_sched_insert_request(rq, false, run_queue, false);
return BLK_STS_OK;
}
hctx_lock(hctx, &srcu_idx);
ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
- if (ret == BLK_STS_RESOURCE)
- __blk_mq_fallback_to_insert(hctx, rq, true, false);
+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+ blk_mq_sched_insert_request(rq, false, true, false);
else if (ret != BLK_STS_OK)
blk_mq_end_request(rq, ret);
hctx_unlock(hctx, srcu_idx);
}
-blk_status_t blk_mq_request_direct_issue(struct request *rq)
+blk_status_t blk_mq_request_issue_directly(struct request *rq)
{
blk_status_t ret;
int srcu_idx;
return ret;
}
+void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+ struct list_head *list)
+{
+ while (!list_empty(list)) {
+ blk_status_t ret;
+ struct request *rq = list_first_entry(list, struct request,
+ queuelist);
+
+ list_del_init(&rq->queuelist);
+ ret = blk_mq_request_issue_directly(rq);
+ if (ret != BLK_STS_OK) {
+ list_add(&rq->queuelist, list);
+ break;
+ }
+ }
+}
+
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
{
const int is_sync = op_is_sync(bio->bi_opf);
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
&cookie);
}
- } else if (q->nr_hw_queues > 1 && is_sync) {
+ } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
+ !data.hctx->dispatch_busy)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
blk_mq_try_issue_directly(data.hctx, rq, &cookie);
} else if (q->elevator) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- blk_mq_sched_insert_request(rq, false, true, true, true);
+ blk_mq_sched_insert_request(rq, false, true, true);
} else {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);