}
EXPORT_SYMBOL(blk_mq_alloc_request);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+ unsigned int flags, unsigned int hctx_idx)
+{
+ struct blk_mq_hw_ctx *hctx;
+ struct blk_mq_ctx *ctx;
+ struct request *rq;
+ struct blk_mq_alloc_data alloc_data;
+ int ret;
+
+ /*
+ * If the tag allocator sleeps we could get an allocation for a
+ * different hardware context. No need to complicate the low level
+ * allocator for this for the rare use case of a command tied to
+ * a specific queue.
+ */
+ if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
+ return ERR_PTR(-EINVAL);
+
+ if (hctx_idx >= q->nr_hw_queues)
+ return ERR_PTR(-EIO);
+
+ ret = blk_queue_enter(q, true);
+ if (ret)
+ return ERR_PTR(ret);
+
+ hctx = q->queue_hw_ctx[hctx_idx];
+ ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+ blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
+ rq = __blk_mq_alloc_request(&alloc_data, rw);
+ if (!rq) {
+ blk_queue_exit(q);
+ return ERR_PTR(-EWOULDBLOCK);
+ }
+
+ return rq;
+}
+EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+
static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx, struct request *rq)
{
};
int i;
- if (blk_queue_enter(q, true))
+ /* A deadlock might occur if a request is stuck requiring a
+ * timeout at the same time a queue freeze is waiting
+ * completion, since the timeout code would not be able to
+ * acquire the queue reference here.
+ *
+ * That's why we don't use blk_queue_enter here; instead, we use
+ * percpu_ref_tryget directly, because we need to be able to
+ * obtain a reference even in the short window between the queue
+ * starting to freeze, by dropping the first reference in
+ * blk_mq_freeze_queue_start, and the moment the last request is
+ * consumed, marked by the instant q_usage_counter reaches
+ * zero.
+ */
+ if (!percpu_ref_tryget(&q->q_usage_counter))
return;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
struct list_head *dptr;
int queued;
- WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
-
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
return;
+ WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+ cpu_online(hctx->next_cpu));
+
hctx->run++;
/*
switch (ret) {
case BLK_MQ_RQ_QUEUE_OK:
queued++;
- continue;
+ break;
case BLK_MQ_RQ_QUEUE_BUSY:
list_add(&rq->queuelist, &rq_list);
__blk_mq_requeue_request(rq);
return WORK_CPU_UNBOUND;
if (--hctx->next_cpu_batch <= 0) {
- int cpu = hctx->next_cpu, next_cpu;
+ int next_cpu;
next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
if (next_cpu >= nr_cpu_ids)
hctx->next_cpu = next_cpu;
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
-
- return cpu;
}
return hctx->next_cpu;
EXPORT_SYMBOL(blk_mq_delay_queue);
static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx,
struct request *rq,
bool at_head)
{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+
trace_block_rq_insert(hctx->queue, rq);
if (at_head)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
- __blk_mq_insert_req_list(hctx, ctx, rq, at_head);
+ __blk_mq_insert_req_list(hctx, rq, at_head);
blk_mq_hctx_mark_pending(hctx, ctx);
}
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
- bool async)
+ bool async)
{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
-
- current_ctx = blk_mq_get_ctx(q);
- if (!cpu_online(ctx->cpu))
- rq->mq_ctx = ctx = current_ctx;
hctx = q->mq_ops->map_queue(q, ctx->cpu);
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
-
- blk_mq_put_ctx(current_ctx);
}
static void blk_mq_insert_requests(struct request_queue *q,
{
struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *current_ctx;
trace_block_unplug(q, depth, !from_schedule);
- current_ctx = blk_mq_get_ctx(q);
-
- if (!cpu_online(ctx->cpu))
- ctx = current_ctx;
hctx = q->mq_ops->map_queue(q, ctx->cpu);
/*
struct request *rq;
rq = list_first_entry(list, struct request, queuelist);
+ BUG_ON(rq->mq_ctx != ctx);
list_del_init(&rq->queuelist);
- rq->mq_ctx = ctx;
- __blk_mq_insert_req_list(hctx, ctx, rq, false);
+ __blk_mq_insert_req_list(hctx, rq, false);
}
blk_mq_hctx_mark_pending(hctx, ctx);
spin_unlock(&ctx->lock);
blk_mq_run_hw_queue(hctx, from_schedule);
- blk_mq_put_ctx(current_ctx);
}
static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
blk_queue_bounce(q, &bio);
+ blk_queue_split(q, &bio, q->bio_split);
+
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio_io_error(bio);
return BLK_QC_T_NONE;
}
- blk_queue_split(q, &bio, q->bio_split);
-
- if (!is_flush_fua && !blk_queue_nomerges(q)) {
- if (blk_attempt_plug_merge(q, bio, &request_count,
- &same_queue_rq))
- return BLK_QC_T_NONE;
- } else
- request_count = blk_plug_queued_count(q);
+ if (!is_flush_fua && !blk_queue_nomerges(q) &&
+ blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
+ return BLK_QC_T_NONE;
rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq))
blk_mq_put_ctx(data.ctx);
if (!old_rq)
goto done;
- if (!blk_mq_direct_issue_request(old_rq, &cookie))
- goto done;
- blk_mq_insert_request(old_rq, false, true, true);
+ if (test_bit(BLK_MQ_S_STOPPED, &data.hctx->state) ||
+ blk_mq_direct_issue_request(old_rq, &cookie) != 0)
+ blk_mq_insert_request(old_rq, false, true, true);
goto done;
}
blk_queue_split(q, &bio, q->bio_split);
- if (!is_flush_fua && !blk_queue_nomerges(q) &&
- blk_attempt_plug_merge(q, bio, &request_count, NULL))
- return BLK_QC_T_NONE;
+ if (!is_flush_fua && !blk_queue_nomerges(q)) {
+ if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
+ return BLK_QC_T_NONE;
+ } else
+ request_count = blk_plug_queued_count(q);
rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq))
INIT_LIST_HEAD(&tags->page_list);
tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+ GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
set->numa_node);
if (!tags->rqs) {
blk_mq_free_tags(tags);
do {
page = alloc_pages_node(set->numa_node,
- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
+ GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
this_order);
if (page)
break;
* Allow kmemleak to scan these pages as they contain pointers
* to additional allocations like via ops->init_request().
*/
- kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
+ kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
entries_per_page = order_to_size(this_order) / rq_size;
to_do = min(entries_per_page, set->queue_depth - i);
left -= to_do * rq_size;
return 0;
}
+/*
+ * 'cpu' is going away. splice any existing rq_list entries from this
+ * software queue to the hw queue dispatch list, and ensure that it
+ * gets run.
+ */
static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
{
- struct request_queue *q = hctx->queue;
struct blk_mq_ctx *ctx;
LIST_HEAD(tmp);
- /*
- * Move ctx entries to new CPU, if this one is going away.
- */
- ctx = __blk_mq_get_ctx(q, cpu);
+ ctx = __blk_mq_get_ctx(hctx->queue, cpu);
spin_lock(&ctx->lock);
if (!list_empty(&ctx->rq_list)) {
if (list_empty(&tmp))
return NOTIFY_OK;
- ctx = blk_mq_get_ctx(q);
- spin_lock(&ctx->lock);
-
- while (!list_empty(&tmp)) {
- struct request *rq;
-
- rq = list_first_entry(&tmp, struct request, queuelist);
- rq->mq_ctx = ctx;
- list_move_tail(&rq->queuelist, &ctx->rq_list);
- }
-
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
- blk_mq_hctx_mark_pending(hctx, ctx);
-
- spin_unlock(&ctx->lock);
+ spin_lock(&hctx->lock);
+ list_splice_tail_init(&tmp, &hctx->dispatch);
+ spin_unlock(&hctx->lock);
blk_mq_run_hw_queue(hctx, true);
- blk_mq_put_ctx(ctx);
return NOTIFY_OK;
}
static void blk_mq_map_swqueue(struct request_queue *q,
const struct cpumask *online_mask)
{
- unsigned int i;
+ unsigned int i, hctx_idx;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set;
/*
* Map software to hardware queues
*/
- queue_for_each_ctx(q, ctx, i) {
+ for_each_possible_cpu(i) {
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpumask_test_cpu(i, online_mask))
continue;
+ hctx_idx = q->mq_map[i];
+ /* unmapped hw queue can be remapped after CPU topo changed */
+ if (!set->tags[hctx_idx]) {
+ set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx);
+
+ /*
+ * If tags initialization fail for some hctx,
+ * that hctx won't be brought online. In this
+ * case, remap the current ctx to hctx[0] which
+ * is guaranteed to always have tags allocated
+ */
+ if (!set->tags[hctx_idx])
+ q->mq_map[i] = 0;
+ }
+
+ ctx = per_cpu_ptr(q->queue_ctx, i);
hctx = q->mq_ops->map_queue(q, i);
cpumask_set_cpu(i, hctx->cpumask);
ctx->index_hw = hctx->nr_ctx;
* disable it and free the request entries.
*/
if (!hctx->nr_ctx) {
- if (set->tags[i]) {
+ /* Never unmap queue 0. We need it as a
+ * fallback in case of a new remap fails
+ * allocation
+ */
+ if (i && set->tags[i]) {
blk_mq_free_rq_map(set, set->tags[i], i);
set->tags[i] = NULL;
}
continue;
}
- /* unmapped hw queue can be remapped after CPU topo changed */
- if (!set->tags[i])
- set->tags[i] = blk_mq_init_rq_map(set, i);
hctx->tags = set->tags[i];
WARN_ON(!hctx->tags);