]> git.proxmox.com Git - mirror_ubuntu-disco-kernel.git/blame - block/blk-mq-tag.c
blkcg: rename blkg_try_get to blkg_tryget
[mirror_ubuntu-disco-kernel.git] / block / blk-mq-tag.c
CommitLineData
75bb4625 1/*
88459642
OS
2 * Tag allocation using scalable bitmaps. Uses active queue tracking to support
3 * fairer distribution of tags between multiple submitters when a shared tag map
4 * is used.
75bb4625
JA
5 *
6 * Copyright (C) 2013-2014 Jens Axboe
7 */
320ae51f
JA
8#include <linux/kernel.h>
9#include <linux/module.h>
320ae51f
JA
10
11#include <linux/blk-mq.h>
12#include "blk.h"
13#include "blk-mq.h"
14#include "blk-mq-tag.h"
15
320ae51f
JA
16bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
17{
4bb659b1
JA
18 if (!tags)
19 return true;
20
88459642 21 return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
0d2602ca
JA
22}
23
24/*
25 * If a previously inactive queue goes active, bump the active user count.
d263ed99
JW
26 * We need to do this before try to allocate driver tag, then even if fail
27 * to get tag when first time, the other shared-tag users could reserve
28 * budget for it.
0d2602ca
JA
29 */
30bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
31{
32 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
33 !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
34 atomic_inc(&hctx->tags->active_queues);
35
36 return true;
37}
38
39/*
aed3ea94 40 * Wakeup all potentially sleeping on tags
0d2602ca 41 */
aed3ea94 42void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
0d2602ca 43{
88459642
OS
44 sbitmap_queue_wake_all(&tags->bitmap_tags);
45 if (include_reserve)
46 sbitmap_queue_wake_all(&tags->breserved_tags);
0d2602ca
JA
47}
48
e3a2b3f9
JA
49/*
50 * If a previously busy queue goes inactive, potential waiters could now
51 * be allowed to queue. Wake them up and check.
52 */
53void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
54{
55 struct blk_mq_tags *tags = hctx->tags;
56
57 if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
58 return;
59
60 atomic_dec(&tags->active_queues);
61
aed3ea94 62 blk_mq_tag_wakeup_all(tags, false);
e3a2b3f9
JA
63}
64
0d2602ca
JA
65/*
66 * For shared tag users, we track the number of currently active users
67 * and attempt to provide a fair share of the tag depth for each of them.
68 */
69static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
88459642 70 struct sbitmap_queue *bt)
0d2602ca
JA
71{
72 unsigned int depth, users;
73
74 if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
75 return true;
76 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
77 return true;
78
79 /*
80 * Don't try dividing an ant
81 */
88459642 82 if (bt->sb.depth == 1)
0d2602ca
JA
83 return true;
84
85 users = atomic_read(&hctx->tags->active_queues);
86 if (!users)
87 return true;
88
89 /*
90 * Allow at least some tags
91 */
88459642 92 depth = max((bt->sb.depth + users - 1) / users, 4U);
0d2602ca
JA
93 return atomic_read(&hctx->nr_active) < depth;
94}
95
200e86b3
JA
96static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
97 struct sbitmap_queue *bt)
4bb659b1 98{
200e86b3
JA
99 if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
100 !hctx_may_queue(data->hctx, bt))
0d2602ca 101 return -1;
229a9287
OS
102 if (data->shallow_depth)
103 return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
104 else
105 return __sbitmap_queue_get(bt);
4bb659b1
JA
106}
107
4941115b 108unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
320ae51f 109{
4941115b
JA
110 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
111 struct sbitmap_queue *bt;
88459642 112 struct sbq_wait_state *ws;
4bb659b1 113 DEFINE_WAIT(wait);
4941115b 114 unsigned int tag_offset;
bd6737f1 115 bool drop_ctx;
320ae51f
JA
116 int tag;
117
4941115b
JA
118 if (data->flags & BLK_MQ_REQ_RESERVED) {
119 if (unlikely(!tags->nr_reserved_tags)) {
120 WARN_ON_ONCE(1);
121 return BLK_MQ_TAG_FAIL;
122 }
123 bt = &tags->breserved_tags;
124 tag_offset = 0;
125 } else {
126 bt = &tags->bitmap_tags;
127 tag_offset = tags->nr_reserved_tags;
128 }
129
200e86b3 130 tag = __blk_mq_get_tag(data, bt);
4bb659b1 131 if (tag != -1)
4941115b 132 goto found_tag;
4bb659b1 133
6f3b0e8b 134 if (data->flags & BLK_MQ_REQ_NOWAIT)
4941115b 135 return BLK_MQ_TAG_FAIL;
4bb659b1 136
4941115b 137 ws = bt_wait_ptr(bt, data->hctx);
bd6737f1 138 drop_ctx = data->ctx == NULL;
4bb659b1 139 do {
e6fc4649
ML
140 struct sbitmap_queue *bt_prev;
141
b3223207
BVA
142 /*
143 * We're out of tags on this hardware queue, kick any
144 * pending IO submits before going to sleep waiting for
8cecb07d 145 * some to complete.
b3223207 146 */
8cecb07d 147 blk_mq_run_hw_queue(data->hctx, false);
b3223207 148
080ff351
JA
149 /*
150 * Retry tag allocation after running the hardware queue,
151 * as running the queue may also have found completions.
152 */
200e86b3 153 tag = __blk_mq_get_tag(data, bt);
080ff351
JA
154 if (tag != -1)
155 break;
156
4e5dff41
JA
157 prepare_to_wait_exclusive(&ws->wait, &wait,
158 TASK_UNINTERRUPTIBLE);
159
160 tag = __blk_mq_get_tag(data, bt);
161 if (tag != -1)
162 break;
163
bd6737f1
JA
164 if (data->ctx)
165 blk_mq_put_ctx(data->ctx);
cb96a42c 166
e6fc4649 167 bt_prev = bt;
4bb659b1 168 io_schedule();
cb96a42c
ML
169
170 data->ctx = blk_mq_get_ctx(data->q);
7d7e0f90 171 data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
4941115b
JA
172 tags = blk_mq_tags_from_data(data);
173 if (data->flags & BLK_MQ_REQ_RESERVED)
174 bt = &tags->breserved_tags;
175 else
176 bt = &tags->bitmap_tags;
177
88459642 178 finish_wait(&ws->wait, &wait);
e6fc4649
ML
179
180 /*
181 * If destination hw queue is changed, fake wake up on
182 * previous queue for compensating the wake up miss, so
183 * other allocations on previous queue won't be starved.
184 */
185 if (bt != bt_prev)
186 sbitmap_queue_wake_up(bt_prev);
187
4941115b 188 ws = bt_wait_ptr(bt, data->hctx);
4bb659b1
JA
189 } while (1);
190
bd6737f1
JA
191 if (drop_ctx && data->ctx)
192 blk_mq_put_ctx(data->ctx);
193
88459642 194 finish_wait(&ws->wait, &wait);
320ae51f 195
4941115b
JA
196found_tag:
197 return tag + tag_offset;
320ae51f
JA
198}
199
4941115b
JA
200void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
201 struct blk_mq_ctx *ctx, unsigned int tag)
320ae51f 202{
415b806d 203 if (!blk_mq_tag_is_reserved(tags, tag)) {
4bb659b1
JA
204 const int real_tag = tag - tags->nr_reserved_tags;
205
70114c39 206 BUG_ON(real_tag >= tags->nr_tags);
f4a644db 207 sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
70114c39
JA
208 } else {
209 BUG_ON(tag >= tags->nr_reserved_tags);
f4a644db 210 sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
70114c39 211 }
320ae51f
JA
212}
213
88459642
OS
214struct bt_iter_data {
215 struct blk_mq_hw_ctx *hctx;
216 busy_iter_fn *fn;
217 void *data;
218 bool reserved;
219};
220
221static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
320ae51f 222{
88459642
OS
223 struct bt_iter_data *iter_data = data;
224 struct blk_mq_hw_ctx *hctx = iter_data->hctx;
225 struct blk_mq_tags *tags = hctx->tags;
226 bool reserved = iter_data->reserved;
81481eb4 227 struct request *rq;
4bb659b1 228
88459642
OS
229 if (!reserved)
230 bitnr += tags->nr_reserved_tags;
231 rq = tags->rqs[bitnr];
4bb659b1 232
7f5562d5
JA
233 /*
234 * We can hit rq == NULL here, because the tagging functions
235 * test and set the bit before assining ->rqs[].
236 */
237 if (rq && rq->q == hctx->queue)
88459642
OS
238 iter_data->fn(hctx, rq, iter_data->data, reserved);
239 return true;
240}
4bb659b1 241
88459642
OS
242static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
243 busy_iter_fn *fn, void *data, bool reserved)
244{
245 struct bt_iter_data iter_data = {
246 .hctx = hctx,
247 .fn = fn,
248 .data = data,
249 .reserved = reserved,
250 };
251
252 sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
320ae51f
JA
253}
254
88459642
OS
255struct bt_tags_iter_data {
256 struct blk_mq_tags *tags;
257 busy_tag_iter_fn *fn;
258 void *data;
259 bool reserved;
260};
261
262static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
f26cdc85 263{
88459642
OS
264 struct bt_tags_iter_data *iter_data = data;
265 struct blk_mq_tags *tags = iter_data->tags;
266 bool reserved = iter_data->reserved;
f26cdc85 267 struct request *rq;
f26cdc85 268
88459642
OS
269 if (!reserved)
270 bitnr += tags->nr_reserved_tags;
7f5562d5
JA
271
272 /*
273 * We can hit rq == NULL here, because the tagging functions
274 * test and set the bit before assining ->rqs[].
275 */
88459642 276 rq = tags->rqs[bitnr];
2d5ba0e2 277 if (rq && blk_mq_request_started(rq))
7f5562d5 278 iter_data->fn(rq, iter_data->data, reserved);
f26cdc85 279
88459642
OS
280 return true;
281}
282
283static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
284 busy_tag_iter_fn *fn, void *data, bool reserved)
285{
286 struct bt_tags_iter_data iter_data = {
287 .tags = tags,
288 .fn = fn,
289 .data = data,
290 .reserved = reserved,
291 };
292
293 if (tags->rqs)
294 sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
f26cdc85
KB
295}
296
e8f1e163
SG
297static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
298 busy_tag_iter_fn *fn, void *priv)
f26cdc85
KB
299{
300 if (tags->nr_reserved_tags)
88459642
OS
301 bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
302 bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
f26cdc85 303}
f26cdc85 304
e0489487
SG
305void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
306 busy_tag_iter_fn *fn, void *priv)
307{
308 int i;
309
310 for (i = 0; i < tagset->nr_hw_queues; i++) {
311 if (tagset->tags && tagset->tags[i])
312 blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
313 }
314}
315EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
316
0bf6cd5b 317void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
81481eb4 318 void *priv)
320ae51f 319{
0bf6cd5b
CH
320 struct blk_mq_hw_ctx *hctx;
321 int i;
322
f5bbbbe4
JW
323 /*
324 * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
325 * queue_hw_ctx after freeze the queue. So we could use q_usage_counter
326 * to avoid race with it. __blk_mq_update_nr_hw_queues will users
327 * synchronize_rcu to ensure all of the users go out of the critical
328 * section below and see zeroed q_usage_counter.
329 */
330 rcu_read_lock();
331 if (percpu_ref_is_zero(&q->q_usage_counter)) {
332 rcu_read_unlock();
333 return;
334 }
0bf6cd5b
CH
335
336 queue_for_each_hw_ctx(q, hctx, i) {
337 struct blk_mq_tags *tags = hctx->tags;
338
339 /*
340 * If not software queues are currently mapped to this
341 * hardware queue, there's nothing to check
342 */
343 if (!blk_mq_hw_queue_mapped(hctx))
344 continue;
345
346 if (tags->nr_reserved_tags)
88459642
OS
347 bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
348 bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
4bb659b1 349 }
f5bbbbe4 350 rcu_read_unlock();
4bb659b1
JA
351}
352
f4a644db
OS
353static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
354 bool round_robin, int node)
4bb659b1 355{
f4a644db
OS
356 return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
357 node);
4bb659b1
JA
358}
359
360static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
24391c0d 361 int node, int alloc_policy)
4bb659b1
JA
362{
363 unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
f4a644db 364 bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
4bb659b1 365
f4a644db 366 if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
88459642 367 goto free_tags;
f4a644db
OS
368 if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
369 node))
88459642 370 goto free_bitmap_tags;
4bb659b1
JA
371
372 return tags;
88459642
OS
373free_bitmap_tags:
374 sbitmap_queue_free(&tags->bitmap_tags);
375free_tags:
4bb659b1
JA
376 kfree(tags);
377 return NULL;
378}
379
320ae51f 380struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
24391c0d
SL
381 unsigned int reserved_tags,
382 int node, int alloc_policy)
320ae51f 383{
320ae51f 384 struct blk_mq_tags *tags;
320ae51f
JA
385
386 if (total_tags > BLK_MQ_TAG_MAX) {
387 pr_err("blk-mq: tag depth too large\n");
388 return NULL;
389 }
390
391 tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
392 if (!tags)
393 return NULL;
394
320ae51f
JA
395 tags->nr_tags = total_tags;
396 tags->nr_reserved_tags = reserved_tags;
320ae51f 397
24391c0d 398 return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
320ae51f
JA
399}
400
401void blk_mq_free_tags(struct blk_mq_tags *tags)
402{
88459642
OS
403 sbitmap_queue_free(&tags->bitmap_tags);
404 sbitmap_queue_free(&tags->breserved_tags);
320ae51f
JA
405 kfree(tags);
406}
407
70f36b60
JA
408int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
409 struct blk_mq_tags **tagsptr, unsigned int tdepth,
410 bool can_grow)
e3a2b3f9 411{
70f36b60
JA
412 struct blk_mq_tags *tags = *tagsptr;
413
414 if (tdepth <= tags->nr_reserved_tags)
e3a2b3f9
JA
415 return -EINVAL;
416
417 /*
70f36b60
JA
418 * If we are allowed to grow beyond the original size, allocate
419 * a new set of tags before freeing the old one.
e3a2b3f9 420 */
70f36b60
JA
421 if (tdepth > tags->nr_tags) {
422 struct blk_mq_tag_set *set = hctx->queue->tag_set;
423 struct blk_mq_tags *new;
424 bool ret;
425
426 if (!can_grow)
427 return -EINVAL;
428
429 /*
430 * We need some sort of upper limit, set it high enough that
431 * no valid use cases should require more.
432 */
433 if (tdepth > 16 * BLKDEV_MAX_RQ)
434 return -EINVAL;
435
75d6e175
ML
436 new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
437 tags->nr_reserved_tags);
70f36b60
JA
438 if (!new)
439 return -ENOMEM;
440 ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
441 if (ret) {
442 blk_mq_free_rq_map(new);
443 return -ENOMEM;
444 }
445
446 blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
447 blk_mq_free_rq_map(*tagsptr);
448 *tagsptr = new;
449 } else {
450 /*
451 * Don't need (or can't) update reserved tags here, they
452 * remain static and should never need resizing.
453 */
75d6e175
ML
454 sbitmap_queue_resize(&tags->bitmap_tags,
455 tdepth - tags->nr_reserved_tags);
70f36b60 456 }
88459642 457
e3a2b3f9
JA
458 return 0;
459}
460
205fb5f5
BVA
461/**
462 * blk_mq_unique_tag() - return a tag that is unique queue-wide
463 * @rq: request for which to compute a unique tag
464 *
465 * The tag field in struct request is unique per hardware queue but not over
466 * all hardware queues. Hence this function that returns a tag with the
467 * hardware context index in the upper bits and the per hardware queue tag in
468 * the lower bits.
469 *
470 * Note: When called for a request that is queued on a non-multiqueue request
471 * queue, the hardware context index is set to zero.
472 */
473u32 blk_mq_unique_tag(struct request *rq)
474{
475 struct request_queue *q = rq->q;
476 struct blk_mq_hw_ctx *hctx;
477 int hwq = 0;
478
479 if (q->mq_ops) {
7d7e0f90 480 hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
205fb5f5
BVA
481 hwq = hctx->queue_num;
482 }
483
484 return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
485 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
486}
487EXPORT_SYMBOL(blk_mq_unique_tag);