#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
+#include "blk-stat.h"
+#include "blk-wbt.h"
static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+/**
+ * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * @q: request queue.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked. Additionally, it is not prevented that
+ * new queue_rq() calls occur unless the queue has been stopped first.
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned int i;
+ bool rcu = false;
+
+ blk_mq_stop_hw_queues(q);
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (hctx->flags & BLK_MQ_F_BLOCKING)
+ synchronize_srcu(&hctx->queue_rq_srcu);
+ else
+ rcu = true;
+ }
+ if (rcu)
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
+
void blk_mq_wake_waiters(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
EXPORT_SYMBOL(blk_mq_can_queue);
static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
- struct request *rq, int op,
- unsigned int op_flags)
+ struct request *rq, unsigned int op)
{
- if (blk_queue_io_stat(q))
- op_flags |= REQ_IO_STAT;
-
INIT_LIST_HEAD(&rq->queuelist);
/* csd/requeue_work/fifo_time is initialized before use */
rq->q = q;
rq->mq_ctx = ctx;
- req_set_op_attrs(rq, op, op_flags);
+ rq->cmd_flags = op;
+ if (blk_queue_io_stat(q))
+ rq->rq_flags |= RQF_IO_STAT;
/* do not touch atomic flags, it needs atomic ops against the timer */
rq->cpu = -1;
INIT_HLIST_NODE(&rq->hash);
rq->end_io_data = NULL;
rq->next_rq = NULL;
- ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
+ ctx->rq_dispatched[op_is_sync(op)]++;
}
static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op)
{
struct request *rq;
unsigned int tag;
rq = data->hctx->tags->rqs[tag];
if (blk_mq_tag_busy(data->hctx)) {
- rq->cmd_flags = REQ_MQ_INFLIGHT;
+ rq->rq_flags = RQF_MQ_INFLIGHT;
atomic_inc(&data->hctx->nr_active);
}
rq->tag = tag;
- blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
+ blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
return rq;
}
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
+ rq = __blk_mq_alloc_request(&alloc_data, rw);
blk_mq_put_ctx(ctx);
if (!rq) {
ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
+ rq = __blk_mq_alloc_request(&alloc_data, rw);
if (!rq) {
ret = -EWOULDBLOCK;
goto out_queue_exit;
const int tag = rq->tag;
struct request_queue *q = rq->q;
- if (rq->cmd_flags & REQ_MQ_INFLIGHT)
+ if (rq->rq_flags & RQF_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active);
- rq->cmd_flags = 0;
+
+ wbt_done(q->rq_wb, &rq->issue_stat);
+ rq->rq_flags = 0;
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
blk_mq_put_tag(hctx, ctx, tag);
blk_account_io_done(rq);
if (rq->end_io) {
+ wbt_done(rq->q->rq_wb, &rq->issue_stat);
rq->end_io(rq, error);
} else {
if (unlikely(blk_bidi_rq(rq)))
put_cpu();
}
+static void blk_mq_stat_add(struct request *rq)
+{
+ if (rq->rq_flags & RQF_STATS) {
+ /*
+ * We could rq->mq_ctx here, but there's less of a risk
+ * of races if we have the completion event add the stats
+ * to the local software queue.
+ */
+ struct blk_mq_ctx *ctx;
+
+ ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id());
+ blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq);
+ }
+}
+
static void __blk_mq_complete_request(struct request *rq)
{
struct request_queue *q = rq->q;
+ blk_mq_stat_add(rq);
+
if (!q->softirq_done_fn)
blk_mq_end_request(rq, rq->errors);
else
if (unlikely(blk_bidi_rq(rq)))
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
+ if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
+ blk_stat_set_issue_time(&rq->issue_stat);
+ rq->rq_flags |= RQF_STATS;
+ wbt_issue(q->rq_wb, &rq->issue_stat);
+ }
+
blk_add_timer(rq);
/*
struct request_queue *q = rq->q;
trace_block_rq_requeue(q, rq);
+ wbt_requeue(q->rq_wb, &rq->issue_stat);
if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
if (q->dma_drain_size && blk_rq_bytes(rq))
}
}
-void blk_mq_requeue_request(struct request *rq)
+void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
{
__blk_mq_requeue_request(rq);
BUG_ON(blk_queued_rq(rq));
- blk_mq_add_to_requeue_list(rq, true);
+ blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
}
EXPORT_SYMBOL(blk_mq_requeue_request);
spin_unlock_irqrestore(&q->requeue_lock, flags);
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
- if (!(rq->cmd_flags & REQ_SOFTBARRIER))
+ if (!(rq->rq_flags & RQF_SOFTBARRIER))
continue;
- rq->cmd_flags &= ~REQ_SOFTBARRIER;
+ rq->rq_flags &= ~RQF_SOFTBARRIER;
list_del_init(&rq->queuelist);
blk_mq_insert_request(rq, true, false, false);
}
blk_mq_insert_request(rq, false, false, false);
}
- /*
- * Use the start variant of queue running here, so that running
- * the requeue work will kick stopped queues.
- */
- blk_mq_start_hw_queues(q);
+ blk_mq_run_hw_queues(q, false);
}
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+ bool kick_requeue_list)
{
struct request_queue *q = rq->q;
unsigned long flags;
* We abuse this flag that is otherwise used by the I/O scheduler to
* request head insertation from the workqueue.
*/
- BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER);
+ BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
spin_lock_irqsave(&q->requeue_lock, flags);
if (at_head) {
- rq->cmd_flags |= REQ_SOFTBARRIER;
+ rq->rq_flags |= RQF_SOFTBARRIER;
list_add(&rq->queuelist, &q->requeue_list);
} else {
list_add_tail(&rq->queuelist, &q->requeue_list);
}
spin_unlock_irqrestore(&q->requeue_lock, flags);
-}
-EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
-void blk_mq_cancel_requeue_work(struct request_queue *q)
-{
- cancel_delayed_work_sync(&q->requeue_work);
+ if (kick_requeue_list)
+ blk_mq_kick_requeue_list(q);
}
-EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
+EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q)
{
* of IO. In particular, we'd like FIFO behaviour on handling existing
* items on the hctx->dispatch list. Ignore that for now.
*/
-static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct request *rq;
struct list_head *dptr;
int queued;
- if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
+ if (unlikely(blk_mq_hctx_stopped(hctx)))
return;
- WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
- cpu_online(hctx->next_cpu));
-
hctx->run++;
/*
}
}
+static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+{
+ int srcu_idx;
+
+ WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+ cpu_online(hctx->next_cpu));
+
+ if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
+ rcu_read_lock();
+ blk_mq_process_rq_list(hctx);
+ rcu_read_unlock();
+ } else {
+ srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+ blk_mq_process_rq_list(hctx);
+ srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+ }
+}
+
/*
* It'd be great if the workqueue API had a way to pass
* in a mask and had some smarts for more clever placement.
return WORK_CPU_UNBOUND;
if (--hctx->next_cpu_batch <= 0) {
- int cpu = hctx->next_cpu, next_cpu;
+ int next_cpu;
next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
if (next_cpu >= nr_cpu_ids)
hctx->next_cpu = next_cpu;
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
-
- return cpu;
}
return hctx->next_cpu;
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
- if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state) ||
- !blk_mq_hw_queue_mapped(hctx)))
+ if (unlikely(blk_mq_hctx_stopped(hctx) ||
+ !blk_mq_hw_queue_mapped(hctx)))
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
queue_for_each_hw_ctx(q, hctx, i) {
if ((!blk_mq_hctx_has_pending(hctx) &&
list_empty_careful(&hctx->dispatch)) ||
- test_bit(BLK_MQ_S_STOPPED, &hctx->state))
+ blk_mq_hctx_stopped(hctx))
continue;
blk_mq_run_hw_queue(hctx, async);
}
EXPORT_SYMBOL(blk_mq_run_hw_queues);
+/**
+ * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
+ * @q: request queue.
+ *
+ * The caller is responsible for serializing this function against
+ * blk_mq_{start,stop}_hw_queue().
+ */
+bool blk_mq_queue_stopped(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ int i;
+
+ queue_for_each_hw_ctx(q, hctx, i)
+ if (blk_mq_hctx_stopped(hctx))
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(blk_mq_queue_stopped);
+
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
cancel_work(&hctx->run_work);
int i;
queue_for_each_hw_ctx(q, hctx, i) {
- if (!test_bit(BLK_MQ_S_STOPPED, &hctx->state))
+ if (!blk_mq_hctx_stopped(hctx))
continue;
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
}
-struct blk_map_ctx {
- struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *ctx;
-};
-
static struct request *blk_mq_map_request(struct request_queue *q,
struct bio *bio,
- struct blk_map_ctx *data)
+ struct blk_mq_alloc_data *data)
{
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct request *rq;
- int op = bio_data_dir(bio);
- int op_flags = 0;
- struct blk_mq_alloc_data alloc_data;
blk_queue_enter_live(q);
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, ctx->cpu);
- if (rw_is_sync(bio_op(bio), bio->bi_opf))
- op_flags |= REQ_SYNC;
-
- trace_block_getrq(q, bio, op);
- blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
+ trace_block_getrq(q, bio, bio->bi_opf);
+ blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
+ rq = __blk_mq_alloc_request(data, bio->bi_opf);
- hctx->queued++;
- data->hctx = hctx;
- data->ctx = ctx;
+ data->hctx->queued++;
return rq;
}
-static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
+static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, blk_qc_t *cookie)
{
int ret;
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
struct blk_mq_queue_data bd = {
.rq = rq,
.list = NULL,
};
blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num);
+ if (blk_mq_hctx_stopped(hctx))
+ goto insert;
+
/*
* For OK queue, we are done. For error, kill it. Any other
* error (busy), just add it to our list as we previously
ret = q->mq_ops->queue_rq(hctx, &bd);
if (ret == BLK_MQ_RQ_QUEUE_OK) {
*cookie = new_cookie;
- return 0;
+ return;
}
__blk_mq_requeue_request(rq);
*cookie = BLK_QC_T_NONE;
rq->errors = -EIO;
blk_mq_end_request(rq, rq->errors);
- return 0;
+ return;
}
- return -1;
+insert:
+ blk_mq_insert_request(rq, false, true, true);
}
/*
*/
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
{
- const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
+ const int is_sync = op_is_sync(bio->bi_opf);
const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
- struct blk_map_ctx data;
+ struct blk_mq_alloc_data data;
struct request *rq;
- unsigned int request_count = 0;
+ unsigned int request_count = 0, srcu_idx;
struct blk_plug *plug;
struct request *same_queue_rq = NULL;
blk_qc_t cookie;
+ unsigned int wb_acct;
blk_queue_bounce(q, &bio);
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
return BLK_QC_T_NONE;
+ wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+
rq = blk_mq_map_request(q, bio, &data);
- if (unlikely(!rq))
+ if (unlikely(!rq)) {
+ __wbt_done(q->rq_wb, wb_acct);
return BLK_QC_T_NONE;
+ }
+
+ wbt_track(&rq->issue_stat, wb_acct);
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
blk_mq_bio_to_request(rq, bio);
/*
- * We do limited pluging. If the bio can be merged, do that.
+ * We do limited plugging. If the bio can be merged, do that.
* Otherwise the existing request in the plug list will be
* issued. So the plug list will have one request at most
*/
blk_mq_put_ctx(data.ctx);
if (!old_rq)
goto done;
- if (!blk_mq_direct_issue_request(old_rq, &cookie))
- goto done;
- blk_mq_insert_request(old_rq, false, true, true);
+
+ if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
+ rcu_read_lock();
+ blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
+ rcu_read_unlock();
+ } else {
+ srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
+ blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
+ srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
+ }
goto done;
}
*/
static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
{
- const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
+ const int is_sync = op_is_sync(bio->bi_opf);
const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
struct blk_plug *plug;
unsigned int request_count = 0;
- struct blk_map_ctx data;
+ struct blk_mq_alloc_data data;
struct request *rq;
blk_qc_t cookie;
+ unsigned int wb_acct;
blk_queue_bounce(q, &bio);
} else
request_count = blk_plug_queued_count(q);
+ wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+
rq = blk_mq_map_request(q, bio, &data);
- if (unlikely(!rq))
+ if (unlikely(!rq)) {
+ __wbt_done(q->rq_wb, wb_acct);
return BLK_QC_T_NONE;
+ }
+
+ wbt_track(&rq->issue_stat, wb_acct);
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
*/
plug = current->plug;
if (plug) {
+ struct request *last = NULL;
+
blk_mq_bio_to_request(rq, bio);
if (!request_count)
trace_block_plug(q);
+ else
+ last = list_entry_rq(plug->mq_list.prev);
blk_mq_put_ctx(data.ctx);
- if (request_count >= BLK_MAX_REQUEST_COUNT) {
+ if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
+ blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
+ if (hctx->flags & BLK_MQ_F_BLOCKING)
+ cleanup_srcu_struct(&hctx->queue_rq_srcu);
+
blk_mq_remove_cpuhp(hctx);
blk_free_flush_queue(hctx->fq);
sbitmap_free(&hctx->ctx_map);
flush_start_tag + hctx_idx, node))
goto free_fq;
+ if (hctx->flags & BLK_MQ_F_BLOCKING)
+ init_srcu_struct(&hctx->queue_rq_srcu);
+
return 0;
free_fq:
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
+ blk_stat_init(&__ctx->stat[BLK_STAT_READ]);
+ blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]);
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i))
list_del_init(&q->all_q_node);
mutex_unlock(&all_q_mutex);
+ wbt_exit(q);
+
blk_mq_del_queue_tag_set(q);
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);