]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/md/dm.c
dm: remove now unused bio-based io_pool and _io_cache
[mirror_ubuntu-jammy-kernel.git] / drivers / md / dm.c
CommitLineData
1da177e4
LT
1/*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
784aae73 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
1da177e4
LT
4 *
5 * This file is released under the GPL.
6 */
7
4cc96131
MS
8#include "dm-core.h"
9#include "dm-rq.h"
51e5b2bd 10#include "dm-uevent.h"
1da177e4
LT
11
12#include <linux/init.h>
13#include <linux/module.h>
48c9c27b 14#include <linux/mutex.h>
174cd4b1 15#include <linux/sched/signal.h>
1da177e4
LT
16#include <linux/blkpg.h>
17#include <linux/bio.h>
1da177e4 18#include <linux/mempool.h>
f26c5719 19#include <linux/dax.h>
1da177e4
LT
20#include <linux/slab.h>
21#include <linux/idr.h>
7e026c8c 22#include <linux/uio.h>
3ac51e74 23#include <linux/hdreg.h>
3f77316d 24#include <linux/delay.h>
ffcc3936 25#include <linux/wait.h>
71cdb697 26#include <linux/pr.h>
b0b4d7c6 27#include <linux/refcount.h>
55782138 28
72d94861
AK
29#define DM_MSG_PREFIX "core"
30
60935eb2
MB
31/*
32 * Cookies are numeric values sent with CHANGE and REMOVE
33 * uevents while resuming, removing or renaming the device.
34 */
35#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
36#define DM_COOKIE_LENGTH 24
37
1da177e4
LT
38static const char *_name = DM_NAME;
39
40static unsigned int major = 0;
41static unsigned int _major = 0;
42
d15b774c
AK
43static DEFINE_IDR(_minor_idr);
44
f32c10b0 45static DEFINE_SPINLOCK(_minor_lock);
2c140a24
MP
46
47static void do_deferred_remove(struct work_struct *w);
48
49static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
50
acfe0ad7
MP
51static struct workqueue_struct *deferred_remove_workqueue;
52
93e6442c
MP
53atomic_t dm_global_event_nr = ATOMIC_INIT(0);
54DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
55
62e08243
MP
56void dm_issue_global_event(void)
57{
58 atomic_inc(&dm_global_event_nr);
59 wake_up(&dm_global_eventq);
60}
61
64f52b0e
MS
62/*
63 * One of these is allocated (on-stack) per original bio.
64 */
65struct clone_info {
66 struct mapped_device *md;
67 struct dm_table *map;
68 struct bio *bio;
69 struct dm_io *io;
70 sector_t sector;
71 unsigned sector_count;
72};
73
74/*
75 * One of these is allocated per clone bio.
76 */
77#define DM_TIO_MAGIC 7282014
78struct dm_target_io {
79 unsigned magic;
80 struct dm_io *io;
81 struct dm_target *ti;
82 unsigned target_bio_nr;
83 unsigned *len_ptr;
84 bool inside_dm_io;
85 struct bio clone;
86};
87
1da177e4 88/*
745dc570 89 * One of these is allocated per original bio.
64f52b0e 90 * It contains the first clone used for that original.
1da177e4 91 */
64f52b0e 92#define DM_IO_MAGIC 5191977
1da177e4 93struct dm_io {
64f52b0e 94 unsigned magic;
1da177e4 95 struct mapped_device *md;
4e4cbee9 96 blk_status_t status;
1da177e4 97 atomic_t io_count;
745dc570 98 struct bio *orig_bio;
3eaf840e 99 unsigned long start_time;
f88fb981 100 spinlock_t endio_lock;
fd2ed4d2 101 struct dm_stats_aux stats_aux;
64f52b0e
MS
102 /* last member of dm_target_io is 'struct bio' */
103 struct dm_target_io tio;
1da177e4
LT
104};
105
64f52b0e
MS
106void *dm_per_bio_data(struct bio *bio, size_t data_size)
107{
108 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
109 if (!tio->inside_dm_io)
110 return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
111 return (char *)bio - offsetof(struct dm_target_io, clone) - offsetof(struct dm_io, tio) - data_size;
112}
113EXPORT_SYMBOL_GPL(dm_per_bio_data);
114
115struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
116{
117 struct dm_io *io = (struct dm_io *)((char *)data + data_size);
118 if (io->magic == DM_IO_MAGIC)
119 return (struct bio *)((char *)io + offsetof(struct dm_io, tio) + offsetof(struct dm_target_io, clone));
120 BUG_ON(io->magic != DM_TIO_MAGIC);
121 return (struct bio *)((char *)io + offsetof(struct dm_target_io, clone));
122}
123EXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data);
124
125unsigned dm_bio_get_target_bio_nr(const struct bio *bio)
126{
127 return container_of(bio, struct dm_target_io, clone)->target_bio_nr;
128}
129EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
130
ba61fdd1
JM
131#define MINOR_ALLOCED ((void *)-1)
132
1da177e4
LT
133/*
134 * Bits for the md->flags field.
135 */
1eb787ec 136#define DMF_BLOCK_IO_FOR_SUSPEND 0
1da177e4 137#define DMF_SUSPENDED 1
aa8d7c2f 138#define DMF_FROZEN 2
fba9f90e 139#define DMF_FREEING 3
5c6bd75d 140#define DMF_DELETING 4
2e93ccc1 141#define DMF_NOFLUSH_SUSPENDING 5
8ae12666
KO
142#define DMF_DEFERRED_REMOVE 6
143#define DMF_SUSPENDED_INTERNALLY 7
1da177e4 144
115485e8 145#define DM_NUMA_NODE NUMA_NO_NODE
115485e8 146static int dm_numa_node = DM_NUMA_NODE;
faad87df 147
e6ee8c0b
KU
148/*
149 * For mempools pre-allocation at the table loading time.
150 */
151struct dm_md_mempools {
e6ee8c0b 152 struct bio_set *bs;
64f52b0e 153 struct bio_set *io_bs;
e6ee8c0b
KU
154};
155
86f1152b
BM
156struct table_device {
157 struct list_head list;
b0b4d7c6 158 refcount_t count;
86f1152b
BM
159 struct dm_dev dm_dev;
160};
161
8fbf26ad 162static struct kmem_cache *_rq_tio_cache;
1ae49ea2 163static struct kmem_cache *_rq_cache;
94818742 164
e8603136
MS
165/*
166 * Bio-based DM's mempools' reserved IOs set by the user.
167 */
4cc96131 168#define RESERVED_BIO_BASED_IOS 16
e8603136
MS
169static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
170
115485e8
MS
171static int __dm_get_module_param_int(int *module_param, int min, int max)
172{
6aa7de05 173 int param = READ_ONCE(*module_param);
115485e8
MS
174 int modified_param = 0;
175 bool modified = true;
176
177 if (param < min)
178 modified_param = min;
179 else if (param > max)
180 modified_param = max;
181 else
182 modified = false;
183
184 if (modified) {
185 (void)cmpxchg(module_param, param, modified_param);
186 param = modified_param;
187 }
188
189 return param;
190}
191
4cc96131
MS
192unsigned __dm_get_module_param(unsigned *module_param,
193 unsigned def, unsigned max)
f4790826 194{
6aa7de05 195 unsigned param = READ_ONCE(*module_param);
09c2d531 196 unsigned modified_param = 0;
f4790826 197
09c2d531
MS
198 if (!param)
199 modified_param = def;
200 else if (param > max)
201 modified_param = max;
f4790826 202
09c2d531
MS
203 if (modified_param) {
204 (void)cmpxchg(module_param, param, modified_param);
205 param = modified_param;
f4790826
MS
206 }
207
09c2d531 208 return param;
f4790826
MS
209}
210
e8603136
MS
211unsigned dm_get_reserved_bio_based_ios(void)
212{
09c2d531 213 return __dm_get_module_param(&reserved_bio_based_ios,
4cc96131 214 RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS);
e8603136
MS
215}
216EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
217
115485e8
MS
218static unsigned dm_get_numa_node(void)
219{
220 return __dm_get_module_param_int(&dm_numa_node,
221 DM_NUMA_NODE, num_online_nodes() - 1);
222}
223
1da177e4
LT
224static int __init local_init(void)
225{
51157b4a 226 int r = -ENOMEM;
1da177e4 227
8fbf26ad
KU
228 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
229 if (!_rq_tio_cache)
dde1e1ec 230 return r;
8fbf26ad 231
eca7ee6d 232 _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request),
1ae49ea2
MS
233 __alignof__(struct request), 0, NULL);
234 if (!_rq_cache)
235 goto out_free_rq_tio_cache;
236
51e5b2bd 237 r = dm_uevent_init();
51157b4a 238 if (r)
1ae49ea2 239 goto out_free_rq_cache;
51e5b2bd 240
acfe0ad7
MP
241 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
242 if (!deferred_remove_workqueue) {
243 r = -ENOMEM;
244 goto out_uevent_exit;
245 }
246
1da177e4
LT
247 _major = major;
248 r = register_blkdev(_major, _name);
51157b4a 249 if (r < 0)
acfe0ad7 250 goto out_free_workqueue;
1da177e4
LT
251
252 if (!_major)
253 _major = r;
254
255 return 0;
51157b4a 256
acfe0ad7
MP
257out_free_workqueue:
258 destroy_workqueue(deferred_remove_workqueue);
51157b4a
KU
259out_uevent_exit:
260 dm_uevent_exit();
1ae49ea2
MS
261out_free_rq_cache:
262 kmem_cache_destroy(_rq_cache);
8fbf26ad
KU
263out_free_rq_tio_cache:
264 kmem_cache_destroy(_rq_tio_cache);
51157b4a
KU
265
266 return r;
1da177e4
LT
267}
268
269static void local_exit(void)
270{
2c140a24 271 flush_scheduled_work();
acfe0ad7 272 destroy_workqueue(deferred_remove_workqueue);
2c140a24 273
1ae49ea2 274 kmem_cache_destroy(_rq_cache);
8fbf26ad 275 kmem_cache_destroy(_rq_tio_cache);
00d59405 276 unregister_blkdev(_major, _name);
51e5b2bd 277 dm_uevent_exit();
1da177e4
LT
278
279 _major = 0;
280
281 DMINFO("cleaned up");
282}
283
b9249e55 284static int (*_inits[])(void) __initdata = {
1da177e4
LT
285 local_init,
286 dm_target_init,
287 dm_linear_init,
288 dm_stripe_init,
952b3557 289 dm_io_init,
945fa4d2 290 dm_kcopyd_init,
1da177e4 291 dm_interface_init,
fd2ed4d2 292 dm_statistics_init,
1da177e4
LT
293};
294
b9249e55 295static void (*_exits[])(void) = {
1da177e4
LT
296 local_exit,
297 dm_target_exit,
298 dm_linear_exit,
299 dm_stripe_exit,
952b3557 300 dm_io_exit,
945fa4d2 301 dm_kcopyd_exit,
1da177e4 302 dm_interface_exit,
fd2ed4d2 303 dm_statistics_exit,
1da177e4
LT
304};
305
306static int __init dm_init(void)
307{
308 const int count = ARRAY_SIZE(_inits);
309
310 int r, i;
311
312 for (i = 0; i < count; i++) {
313 r = _inits[i]();
314 if (r)
315 goto bad;
316 }
317
318 return 0;
319
320 bad:
321 while (i--)
322 _exits[i]();
323
324 return r;
325}
326
327static void __exit dm_exit(void)
328{
329 int i = ARRAY_SIZE(_exits);
330
331 while (i--)
332 _exits[i]();
d15b774c
AK
333
334 /*
335 * Should be empty by this point.
336 */
d15b774c 337 idr_destroy(&_minor_idr);
1da177e4
LT
338}
339
340/*
341 * Block device functions
342 */
432a212c
MA
343int dm_deleting_md(struct mapped_device *md)
344{
345 return test_bit(DMF_DELETING, &md->flags);
346}
347
fe5f9f2c 348static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1da177e4
LT
349{
350 struct mapped_device *md;
351
fba9f90e
JM
352 spin_lock(&_minor_lock);
353
fe5f9f2c 354 md = bdev->bd_disk->private_data;
fba9f90e
JM
355 if (!md)
356 goto out;
357
5c6bd75d 358 if (test_bit(DMF_FREEING, &md->flags) ||
432a212c 359 dm_deleting_md(md)) {
fba9f90e
JM
360 md = NULL;
361 goto out;
362 }
363
1da177e4 364 dm_get(md);
5c6bd75d 365 atomic_inc(&md->open_count);
fba9f90e
JM
366out:
367 spin_unlock(&_minor_lock);
368
369 return md ? 0 : -ENXIO;
1da177e4
LT
370}
371
db2a144b 372static void dm_blk_close(struct gendisk *disk, fmode_t mode)
1da177e4 373{
63a4f065 374 struct mapped_device *md;
6e9624b8 375
4a1aeb98
MB
376 spin_lock(&_minor_lock);
377
63a4f065
MS
378 md = disk->private_data;
379 if (WARN_ON(!md))
380 goto out;
381
2c140a24
MP
382 if (atomic_dec_and_test(&md->open_count) &&
383 (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
acfe0ad7 384 queue_work(deferred_remove_workqueue, &deferred_remove_work);
2c140a24 385
1da177e4 386 dm_put(md);
63a4f065 387out:
4a1aeb98 388 spin_unlock(&_minor_lock);
1da177e4
LT
389}
390
5c6bd75d
AK
391int dm_open_count(struct mapped_device *md)
392{
393 return atomic_read(&md->open_count);
394}
395
396/*
397 * Guarantees nothing is using the device before it's deleted.
398 */
2c140a24 399int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
5c6bd75d
AK
400{
401 int r = 0;
402
403 spin_lock(&_minor_lock);
404
2c140a24 405 if (dm_open_count(md)) {
5c6bd75d 406 r = -EBUSY;
2c140a24
MP
407 if (mark_deferred)
408 set_bit(DMF_DEFERRED_REMOVE, &md->flags);
409 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
410 r = -EEXIST;
5c6bd75d
AK
411 else
412 set_bit(DMF_DELETING, &md->flags);
413
414 spin_unlock(&_minor_lock);
415
416 return r;
417}
418
2c140a24
MP
419int dm_cancel_deferred_remove(struct mapped_device *md)
420{
421 int r = 0;
422
423 spin_lock(&_minor_lock);
424
425 if (test_bit(DMF_DELETING, &md->flags))
426 r = -EBUSY;
427 else
428 clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
429
430 spin_unlock(&_minor_lock);
431
432 return r;
433}
434
435static void do_deferred_remove(struct work_struct *w)
436{
437 dm_deferred_remove();
438}
439
fd2ed4d2
MP
440sector_t dm_get_size(struct mapped_device *md)
441{
442 return get_capacity(md->disk);
443}
444
9974fa2c
MS
445struct request_queue *dm_get_md_queue(struct mapped_device *md)
446{
447 return md->queue;
448}
449
fd2ed4d2
MP
450struct dm_stats *dm_get_stats(struct mapped_device *md)
451{
452 return &md->stats;
453}
454
3ac51e74
DW
455static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
456{
457 struct mapped_device *md = bdev->bd_disk->private_data;
458
459 return dm_get_geometry(md, geo);
460}
461
956a4025
MS
462static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
463 struct block_device **bdev,
464 fmode_t *mode)
aa129a22 465{
66482026 466 struct dm_target *tgt;
6c182cd8 467 struct dm_table *map;
956a4025 468 int srcu_idx, r;
aa129a22 469
6c182cd8 470retry:
e56f81e0 471 r = -ENOTTY;
956a4025 472 map = dm_get_live_table(md, &srcu_idx);
aa129a22
MB
473 if (!map || !dm_table_get_size(map))
474 goto out;
475
476 /* We only support devices that have a single target */
477 if (dm_table_get_num_targets(map) != 1)
478 goto out;
479
66482026
MS
480 tgt = dm_table_get_target(map, 0);
481 if (!tgt->type->prepare_ioctl)
4d341d82 482 goto out;
aa129a22 483
4f186f8b 484 if (dm_suspended_md(md)) {
aa129a22
MB
485 r = -EAGAIN;
486 goto out;
487 }
488
66482026 489 r = tgt->type->prepare_ioctl(tgt, bdev, mode);
e56f81e0
CH
490 if (r < 0)
491 goto out;
aa129a22 492
956a4025
MS
493 bdgrab(*bdev);
494 dm_put_live_table(md, srcu_idx);
e56f81e0 495 return r;
aa129a22 496
aa129a22 497out:
956a4025 498 dm_put_live_table(md, srcu_idx);
5bbbfdf6 499 if (r == -ENOTCONN && !fatal_signal_pending(current)) {
6c182cd8
HR
500 msleep(10);
501 goto retry;
502 }
e56f81e0
CH
503 return r;
504}
505
506static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
507 unsigned int cmd, unsigned long arg)
508{
509 struct mapped_device *md = bdev->bd_disk->private_data;
956a4025 510 int r;
e56f81e0 511
956a4025 512 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
e56f81e0
CH
513 if (r < 0)
514 return r;
6c182cd8 515
e56f81e0
CH
516 if (r > 0) {
517 /*
e980f623
CH
518 * Target determined this ioctl is being issued against a
519 * subset of the parent bdev; require extra privileges.
e56f81e0 520 */
e980f623
CH
521 if (!capable(CAP_SYS_RAWIO)) {
522 DMWARN_LIMIT(
523 "%s: sending ioctl %x to DM device without required privilege.",
524 current->comm, cmd);
525 r = -ENOIOCTLCMD;
e56f81e0 526 goto out;
e980f623 527 }
e56f81e0 528 }
6c182cd8 529
66482026 530 r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
e56f81e0 531out:
956a4025 532 bdput(bdev);
aa129a22
MB
533 return r;
534}
535
028867ac 536static struct dm_io *alloc_io(struct mapped_device *md)
1da177e4 537{
64f52b0e
MS
538 struct dm_io *io;
539 struct dm_target_io *tio;
540 struct bio *clone;
541
542 clone = bio_alloc_bioset(GFP_NOIO, 0, md->io_bs);
543 if (!clone)
544 return NULL;
545
546 tio = container_of(clone, struct dm_target_io, clone);
547 tio->inside_dm_io = true;
548 tio->io = NULL;
549
550 io = container_of(tio, struct dm_io, tio);
551 io->magic = DM_IO_MAGIC;
552
553 return io;
1da177e4
LT
554}
555
028867ac 556static void free_io(struct mapped_device *md, struct dm_io *io)
1da177e4 557{
64f52b0e
MS
558 bio_put(&io->tio.clone);
559}
560
561static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti,
562 unsigned target_bio_nr, gfp_t gfp_mask)
563{
564 struct dm_target_io *tio;
565
566 if (!ci->io->tio.io) {
567 /* the dm_target_io embedded in ci->io is available */
568 tio = &ci->io->tio;
569 } else {
570 struct bio *clone = bio_alloc_bioset(gfp_mask, 0, ci->md->bs);
571 if (!clone)
572 return NULL;
573
574 tio = container_of(clone, struct dm_target_io, clone);
575 tio->inside_dm_io = false;
576 }
577
578 tio->magic = DM_TIO_MAGIC;
579 tio->io = ci->io;
580 tio->ti = ti;
581 tio->target_bio_nr = target_bio_nr;
582
583 return tio;
1da177e4
LT
584}
585
cfae7529 586static void free_tio(struct dm_target_io *tio)
1da177e4 587{
64f52b0e
MS
588 if (tio->inside_dm_io)
589 return;
dba14160 590 bio_put(&tio->clone);
1da177e4
LT
591}
592
4cc96131 593int md_in_flight(struct mapped_device *md)
90abb8c4
KU
594{
595 return atomic_read(&md->pending[READ]) +
596 atomic_read(&md->pending[WRITE]);
597}
598
3eaf840e
JNN
599static void start_io_acct(struct dm_io *io)
600{
601 struct mapped_device *md = io->md;
745dc570 602 struct bio *bio = io->orig_bio;
c9959059 603 int cpu;
fd2ed4d2 604 int rw = bio_data_dir(bio);
3eaf840e
JNN
605
606 io->start_time = jiffies;
607
074a7aca 608 cpu = part_stat_lock();
d62e26b3 609 part_round_stats(md->queue, cpu, &dm_disk(md)->part0);
074a7aca 610 part_stat_unlock();
1e9bb880
SL
611 atomic_set(&dm_disk(md)->part0.in_flight[rw],
612 atomic_inc_return(&md->pending[rw]));
fd2ed4d2
MP
613
614 if (unlikely(dm_stats_used(&md->stats)))
528ec5ab
MC
615 dm_stats_account_io(&md->stats, bio_data_dir(bio),
616 bio->bi_iter.bi_sector, bio_sectors(bio),
617 false, 0, &io->stats_aux);
3eaf840e
JNN
618}
619
d221d2e7 620static void end_io_acct(struct dm_io *io)
3eaf840e
JNN
621{
622 struct mapped_device *md = io->md;
745dc570 623 struct bio *bio = io->orig_bio;
3eaf840e 624 unsigned long duration = jiffies - io->start_time;
18c0b223 625 int pending;
3eaf840e
JNN
626 int rw = bio_data_dir(bio);
627
d62e26b3 628 generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time);
3eaf840e 629
fd2ed4d2 630 if (unlikely(dm_stats_used(&md->stats)))
528ec5ab
MC
631 dm_stats_account_io(&md->stats, bio_data_dir(bio),
632 bio->bi_iter.bi_sector, bio_sectors(bio),
633 true, duration, &io->stats_aux);
fd2ed4d2 634
af7e466a
MP
635 /*
636 * After this is decremented the bio must not be touched if it is
d87f4c14 637 * a flush.
af7e466a 638 */
1e9bb880
SL
639 pending = atomic_dec_return(&md->pending[rw]);
640 atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
316d315b 641 pending += atomic_read(&md->pending[rw^0x1]);
3eaf840e 642
d221d2e7
MP
643 /* nudge anyone waiting on suspend queue */
644 if (!pending)
645 wake_up(&md->wait);
3eaf840e
JNN
646}
647
1da177e4
LT
648/*
649 * Add the bio to the list of deferred io.
650 */
92c63902 651static void queue_io(struct mapped_device *md, struct bio *bio)
1da177e4 652{
05447420 653 unsigned long flags;
1da177e4 654
05447420 655 spin_lock_irqsave(&md->deferred_lock, flags);
1da177e4 656 bio_list_add(&md->deferred, bio);
05447420 657 spin_unlock_irqrestore(&md->deferred_lock, flags);
6a8736d1 658 queue_work(md->wq, &md->work);
1da177e4
LT
659}
660
661/*
662 * Everyone (including functions in this file), should use this
663 * function to access the md->map field, and make sure they call
83d5e5b0 664 * dm_put_live_table() when finished.
1da177e4 665 */
83d5e5b0 666struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
1da177e4 667{
83d5e5b0
MP
668 *srcu_idx = srcu_read_lock(&md->io_barrier);
669
670 return srcu_dereference(md->map, &md->io_barrier);
671}
1da177e4 672
83d5e5b0
MP
673void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
674{
675 srcu_read_unlock(&md->io_barrier, srcu_idx);
676}
677
678void dm_sync_table(struct mapped_device *md)
679{
680 synchronize_srcu(&md->io_barrier);
681 synchronize_rcu_expedited();
682}
683
684/*
685 * A fast alternative to dm_get_live_table/dm_put_live_table.
686 * The caller must not block between these two functions.
687 */
688static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
689{
690 rcu_read_lock();
691 return rcu_dereference(md->map);
692}
1da177e4 693
83d5e5b0
MP
694static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
695{
696 rcu_read_unlock();
1da177e4
LT
697}
698
86f1152b
BM
699/*
700 * Open a table device so we can use it as a map destination.
701 */
702static int open_table_device(struct table_device *td, dev_t dev,
703 struct mapped_device *md)
704{
705 static char *_claim_ptr = "I belong to device-mapper";
706 struct block_device *bdev;
707
708 int r;
709
710 BUG_ON(td->dm_dev.bdev);
711
712 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
713 if (IS_ERR(bdev))
714 return PTR_ERR(bdev);
715
716 r = bd_link_disk_holder(bdev, dm_disk(md));
717 if (r) {
718 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
719 return r;
720 }
721
722 td->dm_dev.bdev = bdev;
817bf402 723 td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
86f1152b
BM
724 return 0;
725}
726
727/*
728 * Close a table device that we've been using.
729 */
730static void close_table_device(struct table_device *td, struct mapped_device *md)
731{
732 if (!td->dm_dev.bdev)
733 return;
734
735 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
736 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
817bf402 737 put_dax(td->dm_dev.dax_dev);
86f1152b 738 td->dm_dev.bdev = NULL;
817bf402 739 td->dm_dev.dax_dev = NULL;
86f1152b
BM
740}
741
742static struct table_device *find_table_device(struct list_head *l, dev_t dev,
743 fmode_t mode) {
744 struct table_device *td;
745
746 list_for_each_entry(td, l, list)
747 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
748 return td;
749
750 return NULL;
751}
752
753int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
754 struct dm_dev **result) {
755 int r;
756 struct table_device *td;
757
758 mutex_lock(&md->table_devices_lock);
759 td = find_table_device(&md->table_devices, dev, mode);
760 if (!td) {
115485e8 761 td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
86f1152b
BM
762 if (!td) {
763 mutex_unlock(&md->table_devices_lock);
764 return -ENOMEM;
765 }
766
767 td->dm_dev.mode = mode;
768 td->dm_dev.bdev = NULL;
769
770 if ((r = open_table_device(td, dev, md))) {
771 mutex_unlock(&md->table_devices_lock);
772 kfree(td);
773 return r;
774 }
775
776 format_dev_t(td->dm_dev.name, dev);
777
b0b4d7c6 778 refcount_set(&td->count, 1);
86f1152b 779 list_add(&td->list, &md->table_devices);
b0b4d7c6
ER
780 } else {
781 refcount_inc(&td->count);
86f1152b 782 }
86f1152b
BM
783 mutex_unlock(&md->table_devices_lock);
784
785 *result = &td->dm_dev;
786 return 0;
787}
788EXPORT_SYMBOL_GPL(dm_get_table_device);
789
790void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
791{
792 struct table_device *td = container_of(d, struct table_device, dm_dev);
793
794 mutex_lock(&md->table_devices_lock);
b0b4d7c6 795 if (refcount_dec_and_test(&td->count)) {
86f1152b
BM
796 close_table_device(td, md);
797 list_del(&td->list);
798 kfree(td);
799 }
800 mutex_unlock(&md->table_devices_lock);
801}
802EXPORT_SYMBOL(dm_put_table_device);
803
804static void free_table_devices(struct list_head *devices)
805{
806 struct list_head *tmp, *next;
807
808 list_for_each_safe(tmp, next, devices) {
809 struct table_device *td = list_entry(tmp, struct table_device, list);
810
811 DMWARN("dm_destroy: %s still exists with %d references",
b0b4d7c6 812 td->dm_dev.name, refcount_read(&td->count));
86f1152b
BM
813 kfree(td);
814 }
815}
816
3ac51e74
DW
817/*
818 * Get the geometry associated with a dm device
819 */
820int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
821{
822 *geo = md->geometry;
823
824 return 0;
825}
826
827/*
828 * Set the geometry of a device.
829 */
830int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
831{
832 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
833
834 if (geo->start > sz) {
835 DMWARN("Start sector is beyond the geometry limits.");
836 return -EINVAL;
837 }
838
839 md->geometry = *geo;
840
841 return 0;
842}
843
2e93ccc1
KU
844static int __noflush_suspending(struct mapped_device *md)
845{
846 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
847}
848
1da177e4
LT
849/*
850 * Decrements the number of outstanding ios that a bio has been
851 * cloned into, completing the original io if necc.
852 */
4e4cbee9 853static void dec_pending(struct dm_io *io, blk_status_t error)
1da177e4 854{
2e93ccc1 855 unsigned long flags;
4e4cbee9 856 blk_status_t io_error;
b35f8caa
MB
857 struct bio *bio;
858 struct mapped_device *md = io->md;
2e93ccc1
KU
859
860 /* Push-back supersedes any I/O errors */
f88fb981
KU
861 if (unlikely(error)) {
862 spin_lock_irqsave(&io->endio_lock, flags);
745dc570 863 if (!(io->status == BLK_STS_DM_REQUEUE && __noflush_suspending(md)))
4e4cbee9 864 io->status = error;
f88fb981
KU
865 spin_unlock_irqrestore(&io->endio_lock, flags);
866 }
1da177e4
LT
867
868 if (atomic_dec_and_test(&io->io_count)) {
4e4cbee9 869 if (io->status == BLK_STS_DM_REQUEUE) {
2e93ccc1
KU
870 /*
871 * Target requested pushing back the I/O.
2e93ccc1 872 */
022c2611 873 spin_lock_irqsave(&md->deferred_lock, flags);
6a8736d1 874 if (__noflush_suspending(md))
745dc570
MS
875 /* NOTE early return due to BLK_STS_DM_REQUEUE below */
876 bio_list_add_head(&md->deferred, io->orig_bio);
6a8736d1 877 else
2e93ccc1 878 /* noflush suspend was interrupted. */
4e4cbee9 879 io->status = BLK_STS_IOERR;
022c2611 880 spin_unlock_irqrestore(&md->deferred_lock, flags);
2e93ccc1
KU
881 }
882
4e4cbee9 883 io_error = io->status;
745dc570 884 bio = io->orig_bio;
6a8736d1
TH
885 end_io_acct(io);
886 free_io(md, io);
887
4e4cbee9 888 if (io_error == BLK_STS_DM_REQUEUE)
6a8736d1 889 return;
2e93ccc1 890
1eff9d32 891 if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
af7e466a 892 /*
6a8736d1 893 * Preflush done for flush with data, reissue
28a8f0d3 894 * without REQ_PREFLUSH.
af7e466a 895 */
1eff9d32 896 bio->bi_opf &= ~REQ_PREFLUSH;
6a8736d1 897 queue_io(md, bio);
af7e466a 898 } else {
b372d360 899 /* done with normal IO or empty flush */
4e4cbee9 900 bio->bi_status = io_error;
4246a0b6 901 bio_endio(bio);
b35f8caa 902 }
1da177e4
LT
903 }
904}
905
4cc96131 906void disable_write_same(struct mapped_device *md)
7eee4ae2
MS
907{
908 struct queue_limits *limits = dm_get_queue_limits(md);
909
910 /* device doesn't really support WRITE SAME, disable it */
911 limits->max_write_same_sectors = 0;
912}
913
ac62d620
CH
914void disable_write_zeroes(struct mapped_device *md)
915{
916 struct queue_limits *limits = dm_get_queue_limits(md);
917
918 /* device doesn't really support WRITE ZEROES, disable it */
919 limits->max_write_zeroes_sectors = 0;
920}
921
4246a0b6 922static void clone_endio(struct bio *bio)
1da177e4 923{
4e4cbee9 924 blk_status_t error = bio->bi_status;
bfc6d41c 925 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
b35f8caa 926 struct dm_io *io = tio->io;
9faf400f 927 struct mapped_device *md = tio->io->md;
1da177e4
LT
928 dm_endio_fn endio = tio->ti->type->end_io;
929
4e4cbee9 930 if (unlikely(error == BLK_STS_TARGET)) {
ac62d620 931 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
74d46992 932 !bio->bi_disk->queue->limits.max_write_same_sectors)
ac62d620
CH
933 disable_write_same(md);
934 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
74d46992 935 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
ac62d620
CH
936 disable_write_zeroes(md);
937 }
7eee4ae2 938
1be56909 939 if (endio) {
4e4cbee9 940 int r = endio(tio->ti, bio, &error);
1be56909
CH
941 switch (r) {
942 case DM_ENDIO_REQUEUE:
4e4cbee9 943 error = BLK_STS_DM_REQUEUE;
1be56909
CH
944 /*FALLTHRU*/
945 case DM_ENDIO_DONE:
946 break;
947 case DM_ENDIO_INCOMPLETE:
948 /* The target will handle the io */
949 return;
950 default:
951 DMWARN("unimplemented target endio return value: %d", r);
952 BUG();
953 }
954 }
955
cfae7529 956 free_tio(tio);
b35f8caa 957 dec_pending(io, error);
1da177e4
LT
958}
959
56a67df7
MS
960/*
961 * Return maximum size of I/O possible at the supplied sector up to the current
962 * target boundary.
963 */
964static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
965{
966 sector_t target_offset = dm_target_offset(ti, sector);
967
968 return ti->len - target_offset;
969}
970
971static sector_t max_io_len(sector_t sector, struct dm_target *ti)
1da177e4 972{
56a67df7 973 sector_t len = max_io_len_target_boundary(sector, ti);
542f9038 974 sector_t offset, max_len;
1da177e4
LT
975
976 /*
542f9038 977 * Does the target need to split even further?
1da177e4 978 */
542f9038
MS
979 if (ti->max_io_len) {
980 offset = dm_target_offset(ti, sector);
981 if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
982 max_len = sector_div(offset, ti->max_io_len);
983 else
984 max_len = offset & (ti->max_io_len - 1);
985 max_len = ti->max_io_len - max_len;
986
987 if (len > max_len)
988 len = max_len;
1da177e4
LT
989 }
990
991 return len;
992}
993
542f9038
MS
994int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
995{
996 if (len > UINT_MAX) {
997 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
998 (unsigned long long)len, UINT_MAX);
999 ti->error = "Maximum size of target IO is too large";
1000 return -EINVAL;
1001 }
1002
1003 ti->max_io_len = (uint32_t) len;
1004
1005 return 0;
1006}
1007EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
1008
f26c5719
DW
1009static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
1010 sector_t sector, int *srcu_idx)
545ed20e 1011{
545ed20e
TK
1012 struct dm_table *map;
1013 struct dm_target *ti;
545ed20e 1014
f26c5719 1015 map = dm_get_live_table(md, srcu_idx);
545ed20e 1016 if (!map)
f26c5719 1017 return NULL;
545ed20e
TK
1018
1019 ti = dm_table_find_target(map, sector);
1020 if (!dm_target_is_valid(ti))
f26c5719 1021 return NULL;
545ed20e 1022
f26c5719
DW
1023 return ti;
1024}
545ed20e 1025
f26c5719
DW
1026static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
1027 long nr_pages, void **kaddr, pfn_t *pfn)
1028{
1029 struct mapped_device *md = dax_get_private(dax_dev);
1030 sector_t sector = pgoff * PAGE_SECTORS;
1031 struct dm_target *ti;
1032 long len, ret = -EIO;
1033 int srcu_idx;
545ed20e 1034
f26c5719 1035 ti = dm_dax_get_live_target(md, sector, &srcu_idx);
545ed20e 1036
f26c5719
DW
1037 if (!ti)
1038 goto out;
1039 if (!ti->type->direct_access)
1040 goto out;
1041 len = max_io_len(sector, ti) / PAGE_SECTORS;
1042 if (len < 1)
1043 goto out;
1044 nr_pages = min(len, nr_pages);
545ed20e 1045 if (ti->type->direct_access)
817bf402
DW
1046 ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
1047
f26c5719 1048 out:
545ed20e 1049 dm_put_live_table(md, srcu_idx);
f26c5719
DW
1050
1051 return ret;
545ed20e
TK
1052}
1053
7e026c8c
DW
1054static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
1055 void *addr, size_t bytes, struct iov_iter *i)
1056{
1057 struct mapped_device *md = dax_get_private(dax_dev);
1058 sector_t sector = pgoff * PAGE_SECTORS;
1059 struct dm_target *ti;
1060 long ret = 0;
1061 int srcu_idx;
1062
1063 ti = dm_dax_get_live_target(md, sector, &srcu_idx);
1064
1065 if (!ti)
1066 goto out;
1067 if (!ti->type->dax_copy_from_iter) {
1068 ret = copy_from_iter(addr, bytes, i);
1069 goto out;
1070 }
1071 ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
1072 out:
1073 dm_put_live_table(md, srcu_idx);
1074
1075 return ret;
1076}
1077
1dd40c3e
MP
1078/*
1079 * A target may call dm_accept_partial_bio only from the map routine. It is
c06b3e58 1080 * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
1dd40c3e
MP
1081 *
1082 * dm_accept_partial_bio informs the dm that the target only wants to process
1083 * additional n_sectors sectors of the bio and the rest of the data should be
1084 * sent in a next bio.
1085 *
1086 * A diagram that explains the arithmetics:
1087 * +--------------------+---------------+-------+
1088 * | 1 | 2 | 3 |
1089 * +--------------------+---------------+-------+
1090 *
1091 * <-------------- *tio->len_ptr --------------->
1092 * <------- bi_size ------->
1093 * <-- n_sectors -->
1094 *
1095 * Region 1 was already iterated over with bio_advance or similar function.
1096 * (it may be empty if the target doesn't use bio_advance)
1097 * Region 2 is the remaining bio size that the target wants to process.
1098 * (it may be empty if region 1 is non-empty, although there is no reason
1099 * to make it empty)
1100 * The target requires that region 3 is to be sent in the next bio.
1101 *
1102 * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
1103 * the partially processed part (the sum of regions 1+2) must be the same for all
1104 * copies of the bio.
1105 */
1106void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
1107{
1108 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
1109 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
1eff9d32 1110 BUG_ON(bio->bi_opf & REQ_PREFLUSH);
1dd40c3e
MP
1111 BUG_ON(bi_size > *tio->len_ptr);
1112 BUG_ON(n_sectors > bi_size);
1113 *tio->len_ptr -= bi_size - n_sectors;
1114 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
1115}
1116EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
1117
10999307
DLM
1118/*
1119 * The zone descriptors obtained with a zone report indicate
1120 * zone positions within the target device. The zone descriptors
1121 * must be remapped to match their position within the dm device.
1122 * A target may call dm_remap_zone_report after completion of a
1123 * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained
1124 * from the target device mapping to the dm device.
1125 */
1126void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
1127{
1128#ifdef CONFIG_BLK_DEV_ZONED
1129 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
745dc570 1130 struct bio *report_bio = tio->io->orig_bio;
10999307
DLM
1131 struct blk_zone_report_hdr *hdr = NULL;
1132 struct blk_zone *zone;
1133 unsigned int nr_rep = 0;
1134 unsigned int ofst;
1135 struct bio_vec bvec;
1136 struct bvec_iter iter;
1137 void *addr;
1138
1139 if (bio->bi_status)
1140 return;
1141
1142 /*
1143 * Remap the start sector of the reported zones. For sequential zones,
1144 * also remap the write pointer position.
1145 */
1146 bio_for_each_segment(bvec, report_bio, iter) {
1147 addr = kmap_atomic(bvec.bv_page);
1148
1149 /* Remember the report header in the first page */
1150 if (!hdr) {
1151 hdr = addr;
1152 ofst = sizeof(struct blk_zone_report_hdr);
1153 } else
1154 ofst = 0;
1155
1156 /* Set zones start sector */
1157 while (hdr->nr_zones && ofst < bvec.bv_len) {
1158 zone = addr + ofst;
1159 if (zone->start >= start + ti->len) {
1160 hdr->nr_zones = 0;
1161 break;
1162 }
1163 zone->start = zone->start + ti->begin - start;
1164 if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
1165 if (zone->cond == BLK_ZONE_COND_FULL)
1166 zone->wp = zone->start + zone->len;
1167 else if (zone->cond == BLK_ZONE_COND_EMPTY)
1168 zone->wp = zone->start;
1169 else
1170 zone->wp = zone->wp + ti->begin - start;
1171 }
1172 ofst += sizeof(struct blk_zone);
1173 hdr->nr_zones--;
1174 nr_rep++;
1175 }
1176
1177 if (addr != hdr)
1178 kunmap_atomic(addr);
1179
1180 if (!hdr->nr_zones)
1181 break;
1182 }
1183
1184 if (hdr) {
1185 hdr->nr_zones = nr_rep;
1186 kunmap_atomic(hdr);
1187 }
1188
1189 bio_advance(report_bio, report_bio->bi_iter.bi_size);
1190
1191#else /* !CONFIG_BLK_DEV_ZONED */
1192 bio->bi_status = BLK_STS_NOTSUPP;
1193#endif
1194}
1195EXPORT_SYMBOL_GPL(dm_remap_zone_report);
1196
bd2a49b8 1197static void __map_bio(struct dm_target_io *tio)
1da177e4
LT
1198{
1199 int r;
2056a782 1200 sector_t sector;
dba14160 1201 struct bio *clone = &tio->clone;
64f52b0e 1202 struct dm_io *io = tio->io;
bd2a49b8 1203 struct dm_target *ti = tio->ti;
1da177e4 1204
1da177e4 1205 clone->bi_end_io = clone_endio;
1da177e4
LT
1206
1207 /*
1208 * Map the clone. If r == 0 we don't need to do
1209 * anything, the target has assumed ownership of
1210 * this io.
1211 */
64f52b0e 1212 atomic_inc(&io->io_count);
4f024f37 1213 sector = clone->bi_iter.bi_sector;
d67a5f4b 1214
7de3ee57 1215 r = ti->type->map(ti, clone);
846785e6
CH
1216 switch (r) {
1217 case DM_MAPIO_SUBMITTED:
1218 break;
1219 case DM_MAPIO_REMAPPED:
1da177e4 1220 /* the bio has been remapped so dispatch it */
74d46992 1221 trace_block_bio_remap(clone->bi_disk->queue, clone,
64f52b0e 1222 bio_dev(io->orig_bio), sector);
1da177e4 1223 generic_make_request(clone);
846785e6
CH
1224 break;
1225 case DM_MAPIO_KILL:
4e4cbee9 1226 free_tio(tio);
64f52b0e 1227 dec_pending(io, BLK_STS_IOERR);
4e4cbee9 1228 break;
846785e6 1229 case DM_MAPIO_REQUEUE:
cfae7529 1230 free_tio(tio);
64f52b0e 1231 dec_pending(io, BLK_STS_DM_REQUEUE);
846785e6
CH
1232 break;
1233 default:
45cbcd79
KU
1234 DMWARN("unimplemented target map return value: %d", r);
1235 BUG();
1da177e4
LT
1236 }
1237}
1238
e0d6609a 1239static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
bd2a49b8 1240{
4f024f37
KO
1241 bio->bi_iter.bi_sector = sector;
1242 bio->bi_iter.bi_size = to_bytes(len);
1da177e4
LT
1243}
1244
1245/*
1246 * Creates a bio that consists of range of complete bvecs.
1247 */
c80914e8
MS
1248static int clone_bio(struct dm_target_io *tio, struct bio *bio,
1249 sector_t sector, unsigned len)
1da177e4 1250{
dba14160 1251 struct bio *clone = &tio->clone;
1da177e4 1252
1c3b13e6
KO
1253 __bio_clone_fast(clone, bio);
1254
e2460f2a
MP
1255 if (unlikely(bio_integrity(bio) != NULL)) {
1256 int r;
1257
1258 if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
1259 !dm_target_passes_integrity(tio->ti->type))) {
1260 DMWARN("%s: the target %s doesn't support integrity data.",
1261 dm_device_name(tio->io->md),
1262 tio->ti->type->name);
1263 return -EIO;
1264 }
1265
1266 r = bio_integrity_clone(clone, bio, GFP_NOIO);
c80914e8
MS
1267 if (r < 0)
1268 return r;
1269 }
bd2a49b8 1270
264c869d
DLM
1271 if (bio_op(bio) != REQ_OP_ZONE_REPORT)
1272 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
1c3b13e6
KO
1273 clone->bi_iter.bi_size = to_bytes(len);
1274
e2460f2a 1275 if (unlikely(bio_integrity(bio) != NULL))
fbd08e76 1276 bio_integrity_trim(clone);
c80914e8
MS
1277
1278 return 0;
1da177e4
LT
1279}
1280
318716dd
MS
1281static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
1282 struct dm_target *ti, unsigned num_bios)
1283{
1284 struct dm_target_io *tio;
1285 int try;
1286
1287 if (!num_bios)
1288 return;
1289
1290 if (num_bios == 1) {
1291 tio = alloc_tio(ci, ti, 0, GFP_NOIO);
1292 bio_list_add(blist, &tio->clone);
1293 return;
1294 }
1295
1296 for (try = 0; try < 2; try++) {
1297 int bio_nr;
1298 struct bio *bio;
1299
1300 if (try)
1301 mutex_lock(&ci->md->table_devices_lock);
1302 for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
1303 tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT);
1304 if (!tio)
1305 break;
1306
1307 bio_list_add(blist, &tio->clone);
1308 }
1309 if (try)
1310 mutex_unlock(&ci->md->table_devices_lock);
1311 if (bio_nr == num_bios)
1312 return;
1313
1314 while ((bio = bio_list_pop(blist))) {
1315 tio = container_of(bio, struct dm_target_io, clone);
1316 free_tio(tio);
1317 }
1318 }
1319}
1320
14fe594d 1321static void __clone_and_map_simple_bio(struct clone_info *ci,
318716dd 1322 struct dm_target_io *tio, unsigned *len)
9015df24 1323{
dba14160 1324 struct bio *clone = &tio->clone;
9015df24 1325
1dd40c3e
MP
1326 tio->len_ptr = len;
1327
99778273 1328 __bio_clone_fast(clone, ci->bio);
bd2a49b8 1329 if (len)
1dd40c3e 1330 bio_setup_sector(clone, ci->sector, *len);
f9ab94ce 1331
bd2a49b8 1332 __map_bio(tio);
f9ab94ce
MP
1333}
1334
14fe594d 1335static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
1dd40c3e 1336 unsigned num_bios, unsigned *len)
06a426ce 1337{
318716dd
MS
1338 struct bio_list blist = BIO_EMPTY_LIST;
1339 struct bio *bio;
1340 struct dm_target_io *tio;
1341
1342 alloc_multiple_bios(&blist, ci, ti, num_bios);
06a426ce 1343
318716dd
MS
1344 while ((bio = bio_list_pop(&blist))) {
1345 tio = container_of(bio, struct dm_target_io, clone);
1346 __clone_and_map_simple_bio(ci, tio, len);
1347 }
06a426ce
MS
1348}
1349
14fe594d 1350static int __send_empty_flush(struct clone_info *ci)
f9ab94ce 1351{
06a426ce 1352 unsigned target_nr = 0;
f9ab94ce
MP
1353 struct dm_target *ti;
1354
b372d360 1355 BUG_ON(bio_has_data(ci->bio));
f9ab94ce 1356 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1dd40c3e 1357 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
f9ab94ce 1358
f9ab94ce
MP
1359 return 0;
1360}
1361
c80914e8 1362static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
f31c21e4 1363 sector_t sector, unsigned *len)
5ae89a87 1364{
dba14160 1365 struct bio *bio = ci->bio;
5ae89a87 1366 struct dm_target_io *tio;
f31c21e4 1367 int r;
5ae89a87 1368
318716dd 1369 tio = alloc_tio(ci, ti, 0, GFP_NOIO);
f31c21e4
N
1370 tio->len_ptr = len;
1371 r = clone_bio(tio, bio, sector, *len);
1372 if (r < 0) {
1373 free_tio(tio);
1374 return r;
b0d8ed4d 1375 }
f31c21e4 1376 __map_bio(tio);
c80914e8 1377
f31c21e4 1378 return 0;
5ae89a87
MS
1379}
1380
55a62eef 1381typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
23508a96 1382
55a62eef 1383static unsigned get_num_discard_bios(struct dm_target *ti)
23508a96 1384{
55a62eef 1385 return ti->num_discard_bios;
23508a96
MS
1386}
1387
55a62eef 1388static unsigned get_num_write_same_bios(struct dm_target *ti)
23508a96 1389{
55a62eef 1390 return ti->num_write_same_bios;
23508a96
MS
1391}
1392
ac62d620
CH
1393static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
1394{
1395 return ti->num_write_zeroes_bios;
1396}
1397
23508a96 1398typedef bool (*is_split_required_fn)(struct dm_target *ti);
9eef87da 1399
23508a96
MS
1400static bool is_split_required_for_discard(struct dm_target *ti)
1401{
55a62eef 1402 return ti->split_discard_bios;
cec47e3d
KU
1403}
1404
3d7f4562 1405static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
14fe594d
AK
1406 get_num_bios_fn get_num_bios,
1407 is_split_required_fn is_split_required)
ba1cbad9 1408{
e0d6609a 1409 unsigned len;
55a62eef 1410 unsigned num_bios;
ba1cbad9 1411
3d7f4562
MS
1412 /*
1413 * Even though the device advertised support for this type of
1414 * request, that does not mean every target supports it, and
1415 * reconfiguration might also have changed that since the
1416 * check was performed.
1417 */
1418 num_bios = get_num_bios ? get_num_bios(ti) : 0;
1419 if (!num_bios)
1420 return -EOPNOTSUPP;
ba1cbad9 1421
3d7f4562
MS
1422 if (is_split_required && !is_split_required(ti))
1423 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1424 else
1425 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
de3ec86d 1426
3d7f4562 1427 __send_duplicate_bios(ci, ti, num_bios, &len);
e262f347 1428
3d7f4562
MS
1429 ci->sector += len;
1430 ci->sector_count -= len;
5ae89a87
MS
1431
1432 return 0;
ba1cbad9
MS
1433}
1434
3d7f4562 1435static int __send_discard(struct clone_info *ci, struct dm_target *ti)
23508a96 1436{
3d7f4562 1437 return __send_changing_extent_only(ci, ti, get_num_discard_bios,
14fe594d 1438 is_split_required_for_discard);
23508a96 1439}
0ce65797 1440
3d7f4562 1441static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
0ce65797 1442{
3d7f4562 1443 return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL);
0ce65797
MS
1444}
1445
3d7f4562 1446static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
ac62d620 1447{
3d7f4562 1448 return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL);
ac62d620
CH
1449}
1450
e4c93811
AK
1451/*
1452 * Select the correct strategy for processing a non-flush bio.
1453 */
14fe594d 1454static int __split_and_process_non_flush(struct clone_info *ci)
0ce65797 1455{
dba14160 1456 struct bio *bio = ci->bio;
512875bd 1457 struct dm_target *ti;
1c3b13e6 1458 unsigned len;
c80914e8 1459 int r;
0ce65797 1460
512875bd
JN
1461 ti = dm_table_find_target(ci->map, ci->sector);
1462 if (!dm_target_is_valid(ti))
1463 return -EIO;
1464
3d7f4562
MS
1465 if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
1466 return __send_discard(ci, ti);
1467 else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
1468 return __send_write_same(ci, ti);
1469 else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
1470 return __send_write_zeroes(ci, ti);
1471
264c869d
DLM
1472 if (bio_op(bio) == REQ_OP_ZONE_REPORT)
1473 len = ci->sector_count;
1474 else
1475 len = min_t(sector_t, max_io_len(ci->sector, ti),
1476 ci->sector_count);
0ce65797 1477
c80914e8
MS
1478 r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
1479 if (r < 0)
1480 return r;
0ce65797 1481
1c3b13e6
KO
1482 ci->sector += len;
1483 ci->sector_count -= len;
0ce65797 1484
1c3b13e6 1485 return 0;
0ce65797
MS
1486}
1487
1da177e4 1488/*
14fe594d 1489 * Entry point to split a bio into clones and submit them to the targets.
1da177e4 1490 */
83d5e5b0
MP
1491static void __split_and_process_bio(struct mapped_device *md,
1492 struct dm_table *map, struct bio *bio)
0ce65797 1493{
1da177e4 1494 struct clone_info ci;
512875bd 1495 int error = 0;
1da177e4 1496
83d5e5b0 1497 if (unlikely(!map)) {
6a8736d1 1498 bio_io_error(bio);
f0b9a450
MP
1499 return;
1500 }
692d0eb9 1501
83d5e5b0 1502 ci.map = map;
1da177e4 1503 ci.md = md;
1da177e4 1504 ci.io = alloc_io(md);
4e4cbee9 1505 ci.io->status = 0;
1da177e4 1506 atomic_set(&ci.io->io_count, 1);
745dc570 1507 ci.io->orig_bio = bio;
1da177e4 1508 ci.io->md = md;
f88fb981 1509 spin_lock_init(&ci.io->endio_lock);
4f024f37 1510 ci.sector = bio->bi_iter.bi_sector;
0ce65797 1511
3eaf840e 1512 start_io_acct(ci.io);
0ce65797 1513
1eff9d32 1514 if (bio->bi_opf & REQ_PREFLUSH) {
b372d360
MS
1515 ci.bio = &ci.md->flush_bio;
1516 ci.sector_count = 0;
14fe594d 1517 error = __send_empty_flush(&ci);
b372d360 1518 /* dec_pending submits any data associated with flush */
a4aa5e56
DLM
1519 } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
1520 ci.bio = bio;
1521 ci.sector_count = 0;
1522 error = __split_and_process_non_flush(&ci);
b372d360 1523 } else {
6a8736d1 1524 ci.bio = bio;
d87f4c14 1525 ci.sector_count = bio_sectors(bio);
18a25da8 1526 while (ci.sector_count && !error) {
14fe594d 1527 error = __split_and_process_non_flush(&ci);
18a25da8
N
1528 if (current->bio_list && ci.sector_count && !error) {
1529 /*
1530 * Remainder must be passed to generic_make_request()
1531 * so that it gets handled *after* bios already submitted
1532 * have been completely processed.
1533 * We take a clone of the original to store in
745dc570 1534 * ci.io->orig_bio to be used by end_io_acct() and
18a25da8
N
1535 * for dec_pending to use for completion handling.
1536 * As this path is not used for REQ_OP_ZONE_REPORT,
745dc570 1537 * the usage of io->orig_bio in dm_remap_zone_report()
18a25da8
N
1538 * won't be affected by this reassignment.
1539 */
1540 struct bio *b = bio_clone_bioset(bio, GFP_NOIO,
1541 md->queue->bio_split);
745dc570 1542 ci.io->orig_bio = b;
18a25da8
N
1543 bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9);
1544 bio_chain(b, bio);
1545 generic_make_request(bio);
1546 break;
1547 }
1548 }
d87f4c14 1549 }
0ce65797 1550
1da177e4 1551 /* drop the extra reference count */
54385bf7 1552 dec_pending(ci.io, errno_to_blk_status(error));
0ce65797
MS
1553}
1554
cec47e3d 1555/*
18a25da8
N
1556 * The request function that remaps the bio to one target and
1557 * splits off any remainder.
cec47e3d 1558 */
dece1635 1559static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
cec47e3d 1560{
12f03a49 1561 int rw = bio_data_dir(bio);
cec47e3d 1562 struct mapped_device *md = q->queuedata;
83d5e5b0
MP
1563 int srcu_idx;
1564 struct dm_table *map;
cec47e3d 1565
83d5e5b0 1566 map = dm_get_live_table(md, &srcu_idx);
29e4013d 1567
d62e26b3 1568 generic_start_io_acct(q, rw, bio_sectors(bio), &dm_disk(md)->part0);
d0bcb878 1569
6a8736d1
TH
1570 /* if we're suspended, we have to queue this io for later */
1571 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
83d5e5b0 1572 dm_put_live_table(md, srcu_idx);
9eef87da 1573
1eff9d32 1574 if (!(bio->bi_opf & REQ_RAHEAD))
6a8736d1
TH
1575 queue_io(md, bio);
1576 else
54d9a1b4 1577 bio_io_error(bio);
dece1635 1578 return BLK_QC_T_NONE;
cec47e3d 1579 }
1da177e4 1580
83d5e5b0
MP
1581 __split_and_process_bio(md, map, bio);
1582 dm_put_live_table(md, srcu_idx);
dece1635 1583 return BLK_QC_T_NONE;
cec47e3d
KU
1584}
1585
1da177e4
LT
1586static int dm_any_congested(void *congested_data, int bdi_bits)
1587{
8a57dfc6
CS
1588 int r = bdi_bits;
1589 struct mapped_device *md = congested_data;
1590 struct dm_table *map;
1da177e4 1591
1eb787ec 1592 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
e522c039 1593 if (dm_request_based(md)) {
cec47e3d 1594 /*
e522c039
MS
1595 * With request-based DM we only need to check the
1596 * top-level queue for congestion.
cec47e3d 1597 */
dc3b17cc 1598 r = md->queue->backing_dev_info->wb.state & bdi_bits;
e522c039
MS
1599 } else {
1600 map = dm_get_live_table_fast(md);
1601 if (map)
cec47e3d 1602 r = dm_table_any_congested(map, bdi_bits);
e522c039 1603 dm_put_live_table_fast(md);
8a57dfc6
CS
1604 }
1605 }
1606
1da177e4
LT
1607 return r;
1608}
1609
1610/*-----------------------------------------------------------------
1611 * An IDR is used to keep track of allocated minor numbers.
1612 *---------------------------------------------------------------*/
2b06cfff 1613static void free_minor(int minor)
1da177e4 1614{
f32c10b0 1615 spin_lock(&_minor_lock);
1da177e4 1616 idr_remove(&_minor_idr, minor);
f32c10b0 1617 spin_unlock(&_minor_lock);
1da177e4
LT
1618}
1619
1620/*
1621 * See if the device with a specific minor # is free.
1622 */
cf13ab8e 1623static int specific_minor(int minor)
1da177e4 1624{
c9d76be6 1625 int r;
1da177e4
LT
1626
1627 if (minor >= (1 << MINORBITS))
1628 return -EINVAL;
1629
c9d76be6 1630 idr_preload(GFP_KERNEL);
f32c10b0 1631 spin_lock(&_minor_lock);
1da177e4 1632
c9d76be6 1633 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
1da177e4 1634
f32c10b0 1635 spin_unlock(&_minor_lock);
c9d76be6
TH
1636 idr_preload_end();
1637 if (r < 0)
1638 return r == -ENOSPC ? -EBUSY : r;
1639 return 0;
1da177e4
LT
1640}
1641
cf13ab8e 1642static int next_free_minor(int *minor)
1da177e4 1643{
c9d76be6 1644 int r;
62f75c2f 1645
c9d76be6 1646 idr_preload(GFP_KERNEL);
f32c10b0 1647 spin_lock(&_minor_lock);
1da177e4 1648
c9d76be6 1649 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
1da177e4 1650
f32c10b0 1651 spin_unlock(&_minor_lock);
c9d76be6
TH
1652 idr_preload_end();
1653 if (r < 0)
1654 return r;
1655 *minor = r;
1656 return 0;
1da177e4
LT
1657}
1658
83d5cde4 1659static const struct block_device_operations dm_blk_dops;
f26c5719 1660static const struct dax_operations dm_dax_ops;
1da177e4 1661
53d5914f
MP
1662static void dm_wq_work(struct work_struct *work);
1663
4cc96131 1664void dm_init_md_queue(struct mapped_device *md)
4a0b4ddf 1665{
ad5f498f
MP
1666 /*
1667 * Initialize data that will only be used by a non-blk-mq DM queue
1668 * - must do so here (in alloc_dev callchain) before queue is used
1669 */
1670 md->queue->queuedata = md;
dc3b17cc 1671 md->queue->backing_dev_info->congested_data = md;
bfebd1cd 1672}
4a0b4ddf 1673
4cc96131 1674void dm_init_normal_md_queue(struct mapped_device *md)
bfebd1cd 1675{
17e149b8 1676 md->use_blk_mq = false;
bfebd1cd
MS
1677 dm_init_md_queue(md);
1678
1679 /*
1680 * Initialize aspects of queue that aren't relevant for blk-mq
1681 */
dc3b17cc 1682 md->queue->backing_dev_info->congested_fn = dm_any_congested;
4a0b4ddf
MS
1683}
1684
0f20972f
MS
1685static void cleanup_mapped_device(struct mapped_device *md)
1686{
0f20972f
MS
1687 if (md->wq)
1688 destroy_workqueue(md->wq);
1689 if (md->kworker_task)
1690 kthread_stop(md->kworker_task);
0f20972f
MS
1691 if (md->bs)
1692 bioset_free(md->bs);
64f52b0e
MS
1693 if (md->io_bs)
1694 bioset_free(md->io_bs);
0f20972f 1695
f26c5719
DW
1696 if (md->dax_dev) {
1697 kill_dax(md->dax_dev);
1698 put_dax(md->dax_dev);
1699 md->dax_dev = NULL;
1700 }
1701
0f20972f
MS
1702 if (md->disk) {
1703 spin_lock(&_minor_lock);
1704 md->disk->private_data = NULL;
1705 spin_unlock(&_minor_lock);
0f20972f
MS
1706 del_gendisk(md->disk);
1707 put_disk(md->disk);
1708 }
1709
1710 if (md->queue)
1711 blk_cleanup_queue(md->queue);
1712
d09960b0
TE
1713 cleanup_srcu_struct(&md->io_barrier);
1714
0f20972f
MS
1715 if (md->bdev) {
1716 bdput(md->bdev);
1717 md->bdev = NULL;
1718 }
4cc96131
MS
1719
1720 dm_mq_cleanup_mapped_device(md);
0f20972f
MS
1721}
1722
1da177e4
LT
1723/*
1724 * Allocate and initialise a blank device with a given minor.
1725 */
2b06cfff 1726static struct mapped_device *alloc_dev(int minor)
1da177e4 1727{
115485e8 1728 int r, numa_node_id = dm_get_numa_node();
f26c5719 1729 struct dax_device *dax_dev;
115485e8 1730 struct mapped_device *md;
ba61fdd1 1731 void *old_md;
1da177e4 1732
856eb091 1733 md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
1da177e4
LT
1734 if (!md) {
1735 DMWARN("unable to allocate device, out of memory.");
1736 return NULL;
1737 }
1738
10da4f79 1739 if (!try_module_get(THIS_MODULE))
6ed7ade8 1740 goto bad_module_get;
10da4f79 1741
1da177e4 1742 /* get a minor number for the dev */
2b06cfff 1743 if (minor == DM_ANY_MINOR)
cf13ab8e 1744 r = next_free_minor(&minor);
2b06cfff 1745 else
cf13ab8e 1746 r = specific_minor(minor);
1da177e4 1747 if (r < 0)
6ed7ade8 1748 goto bad_minor;
1da177e4 1749
83d5e5b0
MP
1750 r = init_srcu_struct(&md->io_barrier);
1751 if (r < 0)
1752 goto bad_io_barrier;
1753
115485e8 1754 md->numa_node_id = numa_node_id;
4cc96131 1755 md->use_blk_mq = dm_use_blk_mq_default();
591ddcfc 1756 md->init_tio_pdu = false;
a5664dad 1757 md->type = DM_TYPE_NONE;
e61290a4 1758 mutex_init(&md->suspend_lock);
a5664dad 1759 mutex_init(&md->type_lock);
86f1152b 1760 mutex_init(&md->table_devices_lock);
022c2611 1761 spin_lock_init(&md->deferred_lock);
1da177e4 1762 atomic_set(&md->holders, 1);
5c6bd75d 1763 atomic_set(&md->open_count, 0);
1da177e4 1764 atomic_set(&md->event_nr, 0);
7a8c3d3b
MA
1765 atomic_set(&md->uevent_seq, 0);
1766 INIT_LIST_HEAD(&md->uevent_list);
86f1152b 1767 INIT_LIST_HEAD(&md->table_devices);
7a8c3d3b 1768 spin_lock_init(&md->uevent_lock);
1da177e4 1769
115485e8 1770 md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
1da177e4 1771 if (!md->queue)
0f20972f 1772 goto bad;
1da177e4 1773
4a0b4ddf 1774 dm_init_md_queue(md);
9faf400f 1775
115485e8 1776 md->disk = alloc_disk_node(1, numa_node_id);
1da177e4 1777 if (!md->disk)
0f20972f 1778 goto bad;
1da177e4 1779
316d315b
NK
1780 atomic_set(&md->pending[0], 0);
1781 atomic_set(&md->pending[1], 0);
f0b04115 1782 init_waitqueue_head(&md->wait);
53d5914f 1783 INIT_WORK(&md->work, dm_wq_work);
f0b04115 1784 init_waitqueue_head(&md->eventq);
2995fa78 1785 init_completion(&md->kobj_holder.completion);
2eb6e1e3 1786 md->kworker_task = NULL;
f0b04115 1787
1da177e4
LT
1788 md->disk->major = _major;
1789 md->disk->first_minor = minor;
1790 md->disk->fops = &dm_blk_dops;
1791 md->disk->queue = md->queue;
1792 md->disk->private_data = md;
1793 sprintf(md->disk->disk_name, "dm-%d", minor);
f26c5719
DW
1794
1795 dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
1796 if (!dax_dev)
1797 goto bad;
1798 md->dax_dev = dax_dev;
1799
1da177e4 1800 add_disk(md->disk);
7e51f257 1801 format_dev_t(md->name, MKDEV(_major, minor));
1da177e4 1802
670368a8 1803 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
304f3f6a 1804 if (!md->wq)
0f20972f 1805 goto bad;
304f3f6a 1806
32a926da
MP
1807 md->bdev = bdget_disk(md->disk, 0);
1808 if (!md->bdev)
0f20972f 1809 goto bad;
32a926da 1810
3a83f467 1811 bio_init(&md->flush_bio, NULL, 0);
74d46992 1812 bio_set_dev(&md->flush_bio, md->bdev);
ff0361b3 1813 md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
6a8736d1 1814
fd2ed4d2
MP
1815 dm_stats_init(&md->stats);
1816
ba61fdd1 1817 /* Populate the mapping, nobody knows we exist yet */
f32c10b0 1818 spin_lock(&_minor_lock);
ba61fdd1 1819 old_md = idr_replace(&_minor_idr, md, minor);
f32c10b0 1820 spin_unlock(&_minor_lock);
ba61fdd1
JM
1821
1822 BUG_ON(old_md != MINOR_ALLOCED);
1823
1da177e4
LT
1824 return md;
1825
0f20972f
MS
1826bad:
1827 cleanup_mapped_device(md);
83d5e5b0 1828bad_io_barrier:
1da177e4 1829 free_minor(minor);
6ed7ade8 1830bad_minor:
10da4f79 1831 module_put(THIS_MODULE);
6ed7ade8 1832bad_module_get:
856eb091 1833 kvfree(md);
1da177e4
LT
1834 return NULL;
1835}
1836
ae9da83f
JN
1837static void unlock_fs(struct mapped_device *md);
1838
1da177e4
LT
1839static void free_dev(struct mapped_device *md)
1840{
f331c029 1841 int minor = MINOR(disk_devt(md->disk));
63d94e48 1842
32a926da 1843 unlock_fs(md);
2eb6e1e3 1844
0f20972f 1845 cleanup_mapped_device(md);
63a4f065 1846
86f1152b 1847 free_table_devices(&md->table_devices);
63a4f065 1848 dm_stats_cleanup(&md->stats);
63a4f065
MS
1849 free_minor(minor);
1850
10da4f79 1851 module_put(THIS_MODULE);
856eb091 1852 kvfree(md);
1da177e4
LT
1853}
1854
e6ee8c0b
KU
1855static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
1856{
c0820cf5 1857 struct dm_md_mempools *p = dm_table_get_md_mempools(t);
e6ee8c0b 1858
0776aa0e 1859 if (dm_table_bio_based(t)) {
64f52b0e
MS
1860 /*
1861 * The md may already have mempools that need changing.
1862 * If so, reload bioset because front_pad may have changed
1863 * because a different table was loaded.
1864 */
0776aa0e 1865 if (md->bs) {
16245bdc 1866 bioset_free(md->bs);
0776aa0e 1867 md->bs = NULL;
16245bdc 1868 }
64f52b0e
MS
1869 if (md->io_bs) {
1870 bioset_free(md->io_bs);
1871 md->io_bs = NULL;
1872 }
0776aa0e
MS
1873
1874 } else if (md->bs) {
4e6e36c3
MS
1875 /*
1876 * There's no need to reload with request-based dm
1877 * because the size of front_pad doesn't change.
1878 * Note for future: If you are to reload bioset,
1879 * prep-ed requests in the queue may refer
1880 * to bio from the old bioset, so you must walk
1881 * through the queue to unprep.
1882 */
1883 goto out;
c0820cf5 1884 }
e6ee8c0b 1885
dde1e1ec 1886 BUG_ON(!p || md->bs || md->io_bs);
cbc4e3c1 1887
e6ee8c0b
KU
1888 md->bs = p->bs;
1889 p->bs = NULL;
64f52b0e
MS
1890 md->io_bs = p->io_bs;
1891 p->io_bs = NULL;
e6ee8c0b 1892out:
02233342 1893 /* mempool bind completed, no longer need any mempools in the table */
e6ee8c0b
KU
1894 dm_table_free_md_mempools(t);
1895}
1896
1da177e4
LT
1897/*
1898 * Bind a table to the device.
1899 */
1900static void event_callback(void *context)
1901{
7a8c3d3b
MA
1902 unsigned long flags;
1903 LIST_HEAD(uevents);
1da177e4
LT
1904 struct mapped_device *md = (struct mapped_device *) context;
1905
7a8c3d3b
MA
1906 spin_lock_irqsave(&md->uevent_lock, flags);
1907 list_splice_init(&md->uevent_list, &uevents);
1908 spin_unlock_irqrestore(&md->uevent_lock, flags);
1909
ed9e1982 1910 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
7a8c3d3b 1911
1da177e4
LT
1912 atomic_inc(&md->event_nr);
1913 wake_up(&md->eventq);
62e08243 1914 dm_issue_global_event();
1da177e4
LT
1915}
1916
c217649b
MS
1917/*
1918 * Protected by md->suspend_lock obtained by dm_swap_table().
1919 */
4e90188b 1920static void __set_size(struct mapped_device *md, sector_t size)
1da177e4 1921{
1ea0654e
BVA
1922 lockdep_assert_held(&md->suspend_lock);
1923
4e90188b 1924 set_capacity(md->disk, size);
1da177e4 1925
db8fef4f 1926 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
1da177e4
LT
1927}
1928
042d2a9b
AK
1929/*
1930 * Returns old map, which caller must destroy.
1931 */
1932static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
1933 struct queue_limits *limits)
1da177e4 1934{
042d2a9b 1935 struct dm_table *old_map;
165125e1 1936 struct request_queue *q = md->queue;
1da177e4
LT
1937 sector_t size;
1938
5a8f1f80
BVA
1939 lockdep_assert_held(&md->suspend_lock);
1940
1da177e4 1941 size = dm_table_get_size(t);
3ac51e74
DW
1942
1943 /*
1944 * Wipe any geometry if the size of the table changed.
1945 */
fd2ed4d2 1946 if (size != dm_get_size(md))
3ac51e74
DW
1947 memset(&md->geometry, 0, sizeof(md->geometry));
1948
32a926da 1949 __set_size(md, size);
d5816876 1950
2ca3310e
AK
1951 dm_table_event_callback(t, event_callback, md);
1952
e6ee8c0b
KU
1953 /*
1954 * The queue hasn't been stopped yet, if the old table type wasn't
1955 * for request-based during suspension. So stop it to prevent
1956 * I/O mapping before resume.
1957 * This must be done before setting the queue restrictions,
1958 * because request-based dm may be run just after the setting.
1959 */
16f12266 1960 if (dm_table_request_based(t)) {
eca7ee6d 1961 dm_stop_queue(q);
16f12266
MS
1962 /*
1963 * Leverage the fact that request-based DM targets are
1964 * immutable singletons and establish md->immutable_target
1965 * - used to optimize both dm_request_fn and dm_mq_queue_rq
1966 */
1967 md->immutable_target = dm_table_get_immutable_target(t);
1968 }
e6ee8c0b
KU
1969
1970 __bind_mempools(md, t);
1971
a12f5d48 1972 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
1d3aa6f6 1973 rcu_assign_pointer(md->map, (void *)t);
36a0456f
AK
1974 md->immutable_target_type = dm_table_get_immutable_target_type(t);
1975
754c5fc7 1976 dm_table_set_restrictions(t, q, limits);
41abc4e1
HR
1977 if (old_map)
1978 dm_sync_table(md);
1da177e4 1979
042d2a9b 1980 return old_map;
1da177e4
LT
1981}
1982
a7940155
AK
1983/*
1984 * Returns unbound table for the caller to free.
1985 */
1986static struct dm_table *__unbind(struct mapped_device *md)
1da177e4 1987{
a12f5d48 1988 struct dm_table *map = rcu_dereference_protected(md->map, 1);
1da177e4
LT
1989
1990 if (!map)
a7940155 1991 return NULL;
1da177e4
LT
1992
1993 dm_table_event_callback(map, NULL, NULL);
9cdb8520 1994 RCU_INIT_POINTER(md->map, NULL);
83d5e5b0 1995 dm_sync_table(md);
a7940155
AK
1996
1997 return map;
1da177e4
LT
1998}
1999
2000/*
2001 * Constructor for a new device.
2002 */
2b06cfff 2003int dm_create(int minor, struct mapped_device **result)
1da177e4
LT
2004{
2005 struct mapped_device *md;
2006
2b06cfff 2007 md = alloc_dev(minor);
1da177e4
LT
2008 if (!md)
2009 return -ENXIO;
2010
784aae73
MB
2011 dm_sysfs_init(md);
2012
1da177e4
LT
2013 *result = md;
2014 return 0;
2015}
2016
a5664dad
MS
2017/*
2018 * Functions to manage md->type.
2019 * All are required to hold md->type_lock.
2020 */
2021void dm_lock_md_type(struct mapped_device *md)
2022{
2023 mutex_lock(&md->type_lock);
2024}
2025
2026void dm_unlock_md_type(struct mapped_device *md)
2027{
2028 mutex_unlock(&md->type_lock);
2029}
2030
7e0d574f 2031void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type)
a5664dad 2032{
00c4fc3b 2033 BUG_ON(!mutex_is_locked(&md->type_lock));
a5664dad
MS
2034 md->type = type;
2035}
2036
7e0d574f 2037enum dm_queue_mode dm_get_md_type(struct mapped_device *md)
a5664dad
MS
2038{
2039 return md->type;
2040}
2041
36a0456f
AK
2042struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2043{
2044 return md->immutable_target_type;
2045}
2046
f84cb8a4
MS
2047/*
2048 * The queue_limits are only valid as long as you have a reference
2049 * count on 'md'.
2050 */
2051struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2052{
2053 BUG_ON(!atomic_read(&md->holders));
2054 return &md->queue->limits;
2055}
2056EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2057
4a0b4ddf
MS
2058/*
2059 * Setup the DM device's queue based on md's type
2060 */
591ddcfc 2061int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
4a0b4ddf 2062{
bfebd1cd 2063 int r;
7e0d574f 2064 enum dm_queue_mode type = dm_get_md_type(md);
bfebd1cd 2065
545ed20e 2066 switch (type) {
bfebd1cd 2067 case DM_TYPE_REQUEST_BASED:
eb8db831 2068 r = dm_old_init_request_queue(md, t);
bfebd1cd 2069 if (r) {
eca7ee6d 2070 DMERR("Cannot initialize queue for request-based mapped device");
bfebd1cd 2071 return r;
ff36ab34 2072 }
bfebd1cd
MS
2073 break;
2074 case DM_TYPE_MQ_REQUEST_BASED:
e83068a5 2075 r = dm_mq_init_request_queue(md, t);
bfebd1cd 2076 if (r) {
eca7ee6d 2077 DMERR("Cannot initialize queue for request-based dm-mq mapped device");
bfebd1cd
MS
2078 return r;
2079 }
2080 break;
2081 case DM_TYPE_BIO_BASED:
545ed20e 2082 case DM_TYPE_DAX_BIO_BASED:
eca7ee6d 2083 dm_init_normal_md_queue(md);
ff36ab34 2084 blk_queue_make_request(md->queue, dm_make_request);
bfebd1cd 2085 break;
7e0d574f
BVA
2086 case DM_TYPE_NONE:
2087 WARN_ON_ONCE(true);
2088 break;
4a0b4ddf
MS
2089 }
2090
2091 return 0;
2092}
2093
2bec1f4a 2094struct mapped_device *dm_get_md(dev_t dev)
1da177e4
LT
2095{
2096 struct mapped_device *md;
1da177e4
LT
2097 unsigned minor = MINOR(dev);
2098
2099 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2100 return NULL;
2101
f32c10b0 2102 spin_lock(&_minor_lock);
1da177e4
LT
2103
2104 md = idr_find(&_minor_idr, minor);
49de5769
MS
2105 if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) ||
2106 test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
2107 md = NULL;
2108 goto out;
fba9f90e 2109 }
49de5769 2110 dm_get(md);
fba9f90e 2111out:
f32c10b0 2112 spin_unlock(&_minor_lock);
1da177e4 2113
637842cf
DT
2114 return md;
2115}
3cf2e4ba 2116EXPORT_SYMBOL_GPL(dm_get_md);
d229a958 2117
9ade92a9 2118void *dm_get_mdptr(struct mapped_device *md)
637842cf 2119{
9ade92a9 2120 return md->interface_ptr;
1da177e4
LT
2121}
2122
2123void dm_set_mdptr(struct mapped_device *md, void *ptr)
2124{
2125 md->interface_ptr = ptr;
2126}
2127
2128void dm_get(struct mapped_device *md)
2129{
2130 atomic_inc(&md->holders);
3f77316d 2131 BUG_ON(test_bit(DMF_FREEING, &md->flags));
1da177e4
LT
2132}
2133
09ee96b2
MP
2134int dm_hold(struct mapped_device *md)
2135{
2136 spin_lock(&_minor_lock);
2137 if (test_bit(DMF_FREEING, &md->flags)) {
2138 spin_unlock(&_minor_lock);
2139 return -EBUSY;
2140 }
2141 dm_get(md);
2142 spin_unlock(&_minor_lock);
2143 return 0;
2144}
2145EXPORT_SYMBOL_GPL(dm_hold);
2146
72d94861
AK
2147const char *dm_device_name(struct mapped_device *md)
2148{
2149 return md->name;
2150}
2151EXPORT_SYMBOL_GPL(dm_device_name);
2152
3f77316d 2153static void __dm_destroy(struct mapped_device *md, bool wait)
1da177e4 2154{
3b785fbc 2155 struct request_queue *q = dm_get_md_queue(md);
1134e5ae 2156 struct dm_table *map;
83d5e5b0 2157 int srcu_idx;
1da177e4 2158
3f77316d 2159 might_sleep();
fba9f90e 2160
63a4f065 2161 spin_lock(&_minor_lock);
3f77316d
KU
2162 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2163 set_bit(DMF_FREEING, &md->flags);
2164 spin_unlock(&_minor_lock);
3b785fbc 2165
2e91c369 2166 blk_set_queue_dying(q);
3f77316d 2167
02233342 2168 if (dm_request_based(md) && md->kworker_task)
3989144f 2169 kthread_flush_worker(&md->kworker);
2eb6e1e3 2170
ab7c7bb6
MP
2171 /*
2172 * Take suspend_lock so that presuspend and postsuspend methods
2173 * do not race with internal suspend.
2174 */
2175 mutex_lock(&md->suspend_lock);
2a708cff 2176 map = dm_get_live_table(md, &srcu_idx);
3f77316d
KU
2177 if (!dm_suspended_md(md)) {
2178 dm_table_presuspend_targets(map);
2179 dm_table_postsuspend_targets(map);
1da177e4 2180 }
83d5e5b0
MP
2181 /* dm_put_live_table must be before msleep, otherwise deadlock is possible */
2182 dm_put_live_table(md, srcu_idx);
2a708cff 2183 mutex_unlock(&md->suspend_lock);
83d5e5b0 2184
3f77316d
KU
2185 /*
2186 * Rare, but there may be I/O requests still going to complete,
2187 * for example. Wait for all references to disappear.
2188 * No one should increment the reference count of the mapped_device,
2189 * after the mapped_device state becomes DMF_FREEING.
2190 */
2191 if (wait)
2192 while (atomic_read(&md->holders))
2193 msleep(1);
2194 else if (atomic_read(&md->holders))
2195 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2196 dm_device_name(md), atomic_read(&md->holders));
2197
2198 dm_sysfs_exit(md);
3f77316d
KU
2199 dm_table_destroy(__unbind(md));
2200 free_dev(md);
2201}
2202
2203void dm_destroy(struct mapped_device *md)
2204{
2205 __dm_destroy(md, true);
2206}
2207
2208void dm_destroy_immediate(struct mapped_device *md)
2209{
2210 __dm_destroy(md, false);
2211}
2212
2213void dm_put(struct mapped_device *md)
2214{
2215 atomic_dec(&md->holders);
1da177e4 2216}
79eb885c 2217EXPORT_SYMBOL_GPL(dm_put);
1da177e4 2218
b48633f8 2219static int dm_wait_for_completion(struct mapped_device *md, long task_state)
46125c1c
MB
2220{
2221 int r = 0;
9f4c3f87 2222 DEFINE_WAIT(wait);
46125c1c
MB
2223
2224 while (1) {
9f4c3f87 2225 prepare_to_wait(&md->wait, &wait, task_state);
46125c1c 2226
b4324fee 2227 if (!md_in_flight(md))
46125c1c
MB
2228 break;
2229
e3fabdfd 2230 if (signal_pending_state(task_state, current)) {
46125c1c
MB
2231 r = -EINTR;
2232 break;
2233 }
2234
2235 io_schedule();
2236 }
9f4c3f87 2237 finish_wait(&md->wait, &wait);
b44ebeb0 2238
46125c1c
MB
2239 return r;
2240}
2241
1da177e4
LT
2242/*
2243 * Process the deferred bios
2244 */
ef208587 2245static void dm_wq_work(struct work_struct *work)
1da177e4 2246{
ef208587
MP
2247 struct mapped_device *md = container_of(work, struct mapped_device,
2248 work);
6d6f10df 2249 struct bio *c;
83d5e5b0
MP
2250 int srcu_idx;
2251 struct dm_table *map;
1da177e4 2252
83d5e5b0 2253 map = dm_get_live_table(md, &srcu_idx);
ef208587 2254
3b00b203 2255 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
df12ee99
AK
2256 spin_lock_irq(&md->deferred_lock);
2257 c = bio_list_pop(&md->deferred);
2258 spin_unlock_irq(&md->deferred_lock);
2259
6a8736d1 2260 if (!c)
df12ee99 2261 break;
022c2611 2262
e6ee8c0b
KU
2263 if (dm_request_based(md))
2264 generic_make_request(c);
6a8736d1 2265 else
83d5e5b0 2266 __split_and_process_bio(md, map, c);
022c2611 2267 }
73d410c0 2268
83d5e5b0 2269 dm_put_live_table(md, srcu_idx);
1da177e4
LT
2270}
2271
9a1fb464 2272static void dm_queue_flush(struct mapped_device *md)
304f3f6a 2273{
3b00b203 2274 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
4e857c58 2275 smp_mb__after_atomic();
53d5914f 2276 queue_work(md->wq, &md->work);
304f3f6a
MB
2277}
2278
1da177e4 2279/*
042d2a9b 2280 * Swap in a new table, returning the old one for the caller to destroy.
1da177e4 2281 */
042d2a9b 2282struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
1da177e4 2283{
87eb5b21 2284 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
754c5fc7 2285 struct queue_limits limits;
042d2a9b 2286 int r;
1da177e4 2287
e61290a4 2288 mutex_lock(&md->suspend_lock);
1da177e4
LT
2289
2290 /* device must be suspended */
4f186f8b 2291 if (!dm_suspended_md(md))
93c534ae 2292 goto out;
1da177e4 2293
3ae70656
MS
2294 /*
2295 * If the new table has no data devices, retain the existing limits.
2296 * This helps multipath with queue_if_no_path if all paths disappear,
2297 * then new I/O is queued based on these limits, and then some paths
2298 * reappear.
2299 */
2300 if (dm_table_has_no_data_devices(table)) {
83d5e5b0 2301 live_map = dm_get_live_table_fast(md);
3ae70656
MS
2302 if (live_map)
2303 limits = md->queue->limits;
83d5e5b0 2304 dm_put_live_table_fast(md);
3ae70656
MS
2305 }
2306
87eb5b21
MC
2307 if (!live_map) {
2308 r = dm_calculate_queue_limits(table, &limits);
2309 if (r) {
2310 map = ERR_PTR(r);
2311 goto out;
2312 }
042d2a9b 2313 }
754c5fc7 2314
042d2a9b 2315 map = __bind(md, table, &limits);
62e08243 2316 dm_issue_global_event();
1da177e4 2317
93c534ae 2318out:
e61290a4 2319 mutex_unlock(&md->suspend_lock);
042d2a9b 2320 return map;
1da177e4
LT
2321}
2322
2323/*
2324 * Functions to lock and unlock any filesystem running on the
2325 * device.
2326 */
2ca3310e 2327static int lock_fs(struct mapped_device *md)
1da177e4 2328{
e39e2e95 2329 int r;
1da177e4
LT
2330
2331 WARN_ON(md->frozen_sb);
dfbe03f6 2332
db8fef4f 2333 md->frozen_sb = freeze_bdev(md->bdev);
dfbe03f6 2334 if (IS_ERR(md->frozen_sb)) {
cf222b37 2335 r = PTR_ERR(md->frozen_sb);
e39e2e95
AK
2336 md->frozen_sb = NULL;
2337 return r;
dfbe03f6
AK
2338 }
2339
aa8d7c2f
AK
2340 set_bit(DMF_FROZEN, &md->flags);
2341
1da177e4
LT
2342 return 0;
2343}
2344
2ca3310e 2345static void unlock_fs(struct mapped_device *md)
1da177e4 2346{
aa8d7c2f
AK
2347 if (!test_bit(DMF_FROZEN, &md->flags))
2348 return;
2349
db8fef4f 2350 thaw_bdev(md->bdev, md->frozen_sb);
1da177e4 2351 md->frozen_sb = NULL;
aa8d7c2f 2352 clear_bit(DMF_FROZEN, &md->flags);
1da177e4
LT
2353}
2354
2355/*
b48633f8
BVA
2356 * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG
2357 * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE
2358 * @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY
2359 *
ffcc3936
MS
2360 * If __dm_suspend returns 0, the device is completely quiescent
2361 * now. There is no request-processing activity. All new requests
2362 * are being added to md->deferred list.
cec47e3d 2363 */
ffcc3936 2364static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
b48633f8 2365 unsigned suspend_flags, long task_state,
eaf9a736 2366 int dmf_suspended_flag)
1da177e4 2367{
ffcc3936
MS
2368 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
2369 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
2370 int r;
1da177e4 2371
5a8f1f80
BVA
2372 lockdep_assert_held(&md->suspend_lock);
2373
2e93ccc1
KU
2374 /*
2375 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
2376 * This flag is cleared before dm_suspend returns.
2377 */
2378 if (noflush)
2379 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
86331f39
BVA
2380 else
2381 pr_debug("%s: suspending with flush\n", dm_device_name(md));
2e93ccc1 2382
d67ee213
MS
2383 /*
2384 * This gets reverted if there's an error later and the targets
2385 * provide the .presuspend_undo hook.
2386 */
cf222b37
AK
2387 dm_table_presuspend_targets(map);
2388
32a926da 2389 /*
9f518b27
KU
2390 * Flush I/O to the device.
2391 * Any I/O submitted after lock_fs() may not be flushed.
2392 * noflush takes precedence over do_lockfs.
2393 * (lock_fs() flushes I/Os and waits for them to complete.)
32a926da
MP
2394 */
2395 if (!noflush && do_lockfs) {
2396 r = lock_fs(md);
d67ee213
MS
2397 if (r) {
2398 dm_table_presuspend_undo_targets(map);
ffcc3936 2399 return r;
d67ee213 2400 }
aa8d7c2f 2401 }
1da177e4
LT
2402
2403 /*
3b00b203
MP
2404 * Here we must make sure that no processes are submitting requests
2405 * to target drivers i.e. no one may be executing
2406 * __split_and_process_bio. This is called from dm_request and
2407 * dm_wq_work.
2408 *
2409 * To get all processes out of __split_and_process_bio in dm_request,
2410 * we take the write lock. To prevent any process from reentering
6a8736d1
TH
2411 * __split_and_process_bio from dm_request and quiesce the thread
2412 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
2413 * flush_workqueue(md->wq).
1da177e4 2414 */
1eb787ec 2415 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
41abc4e1
HR
2416 if (map)
2417 synchronize_srcu(&md->io_barrier);
1da177e4 2418
d0bcb878 2419 /*
29e4013d
TH
2420 * Stop md->queue before flushing md->wq in case request-based
2421 * dm defers requests to md->wq from md->queue.
d0bcb878 2422 */
2eb6e1e3 2423 if (dm_request_based(md)) {
eca7ee6d 2424 dm_stop_queue(md->queue);
02233342 2425 if (md->kworker_task)
3989144f 2426 kthread_flush_worker(&md->kworker);
2eb6e1e3 2427 }
cec47e3d 2428
d0bcb878
KU
2429 flush_workqueue(md->wq);
2430
1da177e4 2431 /*
3b00b203
MP
2432 * At this point no more requests are entering target request routines.
2433 * We call dm_wait_for_completion to wait for all existing requests
2434 * to finish.
1da177e4 2435 */
b48633f8 2436 r = dm_wait_for_completion(md, task_state);
eaf9a736
MS
2437 if (!r)
2438 set_bit(dmf_suspended_flag, &md->flags);
1da177e4 2439
6d6f10df 2440 if (noflush)
022c2611 2441 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
41abc4e1
HR
2442 if (map)
2443 synchronize_srcu(&md->io_barrier);
2e93ccc1 2444
1da177e4 2445 /* were we interrupted ? */
46125c1c 2446 if (r < 0) {
9a1fb464 2447 dm_queue_flush(md);
73d410c0 2448
cec47e3d 2449 if (dm_request_based(md))
eca7ee6d 2450 dm_start_queue(md->queue);
cec47e3d 2451
2ca3310e 2452 unlock_fs(md);
d67ee213 2453 dm_table_presuspend_undo_targets(map);
ffcc3936 2454 /* pushback list is already flushed, so skip flush */
2ca3310e 2455 }
1da177e4 2456
ffcc3936
MS
2457 return r;
2458}
2459
2460/*
2461 * We need to be able to change a mapping table under a mounted
2462 * filesystem. For example we might want to move some data in
2463 * the background. Before the table can be swapped with
2464 * dm_bind_table, dm_suspend must be called to flush any in
2465 * flight bios and ensure that any further io gets deferred.
2466 */
2467/*
2468 * Suspend mechanism in request-based dm.
2469 *
2470 * 1. Flush all I/Os by lock_fs() if needed.
2471 * 2. Stop dispatching any I/O by stopping the request_queue.
2472 * 3. Wait for all in-flight I/Os to be completed or requeued.
2473 *
2474 * To abort suspend, start the request_queue.
2475 */
2476int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2477{
2478 struct dm_table *map = NULL;
2479 int r = 0;
2480
2481retry:
2482 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2483
2484 if (dm_suspended_md(md)) {
2485 r = -EINVAL;
2486 goto out_unlock;
2487 }
2488
2489 if (dm_suspended_internally_md(md)) {
2490 /* already internally suspended, wait for internal resume */
2491 mutex_unlock(&md->suspend_lock);
2492 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2493 if (r)
2494 return r;
2495 goto retry;
2496 }
2497
a12f5d48 2498 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
ffcc3936 2499
eaf9a736 2500 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
ffcc3936
MS
2501 if (r)
2502 goto out_unlock;
3b00b203 2503
4d4471cb
KU
2504 dm_table_postsuspend_targets(map);
2505
d287483d 2506out_unlock:
e61290a4 2507 mutex_unlock(&md->suspend_lock);
cf222b37 2508 return r;
1da177e4
LT
2509}
2510
ffcc3936
MS
2511static int __dm_resume(struct mapped_device *md, struct dm_table *map)
2512{
2513 if (map) {
2514 int r = dm_table_resume_targets(map);
2515 if (r)
2516 return r;
2517 }
2518
2519 dm_queue_flush(md);
2520
2521 /*
2522 * Flushing deferred I/Os must be done after targets are resumed
2523 * so that mapping of targets can work correctly.
2524 * Request-based dm is queueing the deferred I/Os in its request_queue.
2525 */
2526 if (dm_request_based(md))
eca7ee6d 2527 dm_start_queue(md->queue);
ffcc3936
MS
2528
2529 unlock_fs(md);
2530
2531 return 0;
2532}
2533
1da177e4
LT
2534int dm_resume(struct mapped_device *md)
2535{
8dc23658 2536 int r;
cf222b37 2537 struct dm_table *map = NULL;
1da177e4 2538
ffcc3936 2539retry:
8dc23658 2540 r = -EINVAL;
ffcc3936
MS
2541 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2542
4f186f8b 2543 if (!dm_suspended_md(md))
cf222b37 2544 goto out;
cf222b37 2545
ffcc3936
MS
2546 if (dm_suspended_internally_md(md)) {
2547 /* already internally suspended, wait for internal resume */
2548 mutex_unlock(&md->suspend_lock);
2549 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2550 if (r)
2551 return r;
2552 goto retry;
2553 }
2554
a12f5d48 2555 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2ca3310e 2556 if (!map || !dm_table_get_size(map))
cf222b37 2557 goto out;
1da177e4 2558
ffcc3936 2559 r = __dm_resume(md, map);
8757b776
MB
2560 if (r)
2561 goto out;
2ca3310e 2562
2ca3310e 2563 clear_bit(DMF_SUSPENDED, &md->flags);
cf222b37 2564out:
e61290a4 2565 mutex_unlock(&md->suspend_lock);
2ca3310e 2566
cf222b37 2567 return r;
1da177e4
LT
2568}
2569
fd2ed4d2
MP
2570/*
2571 * Internal suspend/resume works like userspace-driven suspend. It waits
2572 * until all bios finish and prevents issuing new bios to the target drivers.
2573 * It may be used only from the kernel.
fd2ed4d2
MP
2574 */
2575
ffcc3936 2576static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
fd2ed4d2 2577{
ffcc3936
MS
2578 struct dm_table *map = NULL;
2579
1ea0654e
BVA
2580 lockdep_assert_held(&md->suspend_lock);
2581
96b26c8c 2582 if (md->internal_suspend_count++)
ffcc3936
MS
2583 return; /* nested internal suspend */
2584
2585 if (dm_suspended_md(md)) {
2586 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2587 return; /* nest suspend */
2588 }
2589
a12f5d48 2590 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
ffcc3936
MS
2591
2592 /*
2593 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
2594 * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend
2595 * would require changing .presuspend to return an error -- avoid this
2596 * until there is a need for more elaborate variants of internal suspend.
2597 */
eaf9a736
MS
2598 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE,
2599 DMF_SUSPENDED_INTERNALLY);
ffcc3936
MS
2600
2601 dm_table_postsuspend_targets(map);
2602}
2603
2604static void __dm_internal_resume(struct mapped_device *md)
2605{
96b26c8c
MP
2606 BUG_ON(!md->internal_suspend_count);
2607
2608 if (--md->internal_suspend_count)
ffcc3936
MS
2609 return; /* resume from nested internal suspend */
2610
fd2ed4d2 2611 if (dm_suspended_md(md))
ffcc3936
MS
2612 goto done; /* resume from nested suspend */
2613
2614 /*
2615 * NOTE: existing callers don't need to call dm_table_resume_targets
2616 * (which may fail -- so best to avoid it for now by passing NULL map)
2617 */
2618 (void) __dm_resume(md, NULL);
2619
2620done:
2621 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2622 smp_mb__after_atomic();
2623 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
2624}
2625
2626void dm_internal_suspend_noflush(struct mapped_device *md)
2627{
2628 mutex_lock(&md->suspend_lock);
2629 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
2630 mutex_unlock(&md->suspend_lock);
2631}
2632EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
2633
2634void dm_internal_resume(struct mapped_device *md)
2635{
2636 mutex_lock(&md->suspend_lock);
2637 __dm_internal_resume(md);
2638 mutex_unlock(&md->suspend_lock);
2639}
2640EXPORT_SYMBOL_GPL(dm_internal_resume);
2641
2642/*
2643 * Fast variants of internal suspend/resume hold md->suspend_lock,
2644 * which prevents interaction with userspace-driven suspend.
2645 */
2646
2647void dm_internal_suspend_fast(struct mapped_device *md)
2648{
2649 mutex_lock(&md->suspend_lock);
2650 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
fd2ed4d2
MP
2651 return;
2652
2653 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2654 synchronize_srcu(&md->io_barrier);
2655 flush_workqueue(md->wq);
2656 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2657}
b735fede 2658EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
fd2ed4d2 2659
ffcc3936 2660void dm_internal_resume_fast(struct mapped_device *md)
fd2ed4d2 2661{
ffcc3936 2662 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
fd2ed4d2
MP
2663 goto done;
2664
2665 dm_queue_flush(md);
2666
2667done:
2668 mutex_unlock(&md->suspend_lock);
2669}
b735fede 2670EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
fd2ed4d2 2671
1da177e4
LT
2672/*-----------------------------------------------------------------
2673 * Event notification.
2674 *---------------------------------------------------------------*/
3abf85b5 2675int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
60935eb2 2676 unsigned cookie)
69267a30 2677{
60935eb2
MB
2678 char udev_cookie[DM_COOKIE_LENGTH];
2679 char *envp[] = { udev_cookie, NULL };
2680
2681 if (!cookie)
3abf85b5 2682 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
60935eb2
MB
2683 else {
2684 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2685 DM_COOKIE_ENV_VAR_NAME, cookie);
3abf85b5
PR
2686 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
2687 action, envp);
60935eb2 2688 }
69267a30
AK
2689}
2690
7a8c3d3b
MA
2691uint32_t dm_next_uevent_seq(struct mapped_device *md)
2692{
2693 return atomic_add_return(1, &md->uevent_seq);
2694}
2695
1da177e4
LT
2696uint32_t dm_get_event_nr(struct mapped_device *md)
2697{
2698 return atomic_read(&md->event_nr);
2699}
2700
2701int dm_wait_event(struct mapped_device *md, int event_nr)
2702{
2703 return wait_event_interruptible(md->eventq,
2704 (event_nr != atomic_read(&md->event_nr)));
2705}
2706
7a8c3d3b
MA
2707void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
2708{
2709 unsigned long flags;
2710
2711 spin_lock_irqsave(&md->uevent_lock, flags);
2712 list_add(elist, &md->uevent_list);
2713 spin_unlock_irqrestore(&md->uevent_lock, flags);
2714}
2715
1da177e4
LT
2716/*
2717 * The gendisk is only valid as long as you have a reference
2718 * count on 'md'.
2719 */
2720struct gendisk *dm_disk(struct mapped_device *md)
2721{
2722 return md->disk;
2723}
65ff5b7d 2724EXPORT_SYMBOL_GPL(dm_disk);
1da177e4 2725
784aae73
MB
2726struct kobject *dm_kobject(struct mapped_device *md)
2727{
2995fa78 2728 return &md->kobj_holder.kobj;
784aae73
MB
2729}
2730
784aae73
MB
2731struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2732{
2733 struct mapped_device *md;
2734
2995fa78 2735 md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
784aae73 2736
b9a41d21
HT
2737 spin_lock(&_minor_lock);
2738 if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
2739 md = NULL;
2740 goto out;
2741 }
784aae73 2742 dm_get(md);
b9a41d21
HT
2743out:
2744 spin_unlock(&_minor_lock);
2745
784aae73
MB
2746 return md;
2747}
2748
4f186f8b 2749int dm_suspended_md(struct mapped_device *md)
1da177e4
LT
2750{
2751 return test_bit(DMF_SUSPENDED, &md->flags);
2752}
2753
ffcc3936
MS
2754int dm_suspended_internally_md(struct mapped_device *md)
2755{
2756 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2757}
2758
2c140a24
MP
2759int dm_test_deferred_remove_flag(struct mapped_device *md)
2760{
2761 return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
2762}
2763
64dbce58
KU
2764int dm_suspended(struct dm_target *ti)
2765{
ecdb2e25 2766 return dm_suspended_md(dm_table_get_md(ti->table));
64dbce58
KU
2767}
2768EXPORT_SYMBOL_GPL(dm_suspended);
2769
2e93ccc1
KU
2770int dm_noflush_suspending(struct dm_target *ti)
2771{
ecdb2e25 2772 return __noflush_suspending(dm_table_get_md(ti->table));
2e93ccc1
KU
2773}
2774EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2775
7e0d574f 2776struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
0776aa0e
MS
2777 unsigned integrity, unsigned per_io_data_size,
2778 unsigned min_pool_size)
e6ee8c0b 2779{
115485e8 2780 struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
78d8e58a 2781 unsigned int pool_size = 0;
64f52b0e 2782 unsigned int front_pad, io_front_pad;
e6ee8c0b
KU
2783
2784 if (!pools)
4e6e36c3 2785 return NULL;
e6ee8c0b 2786
78d8e58a
MS
2787 switch (type) {
2788 case DM_TYPE_BIO_BASED:
545ed20e 2789 case DM_TYPE_DAX_BIO_BASED:
0776aa0e 2790 pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
30187e1d 2791 front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
64f52b0e
MS
2792 io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio);
2793 pools->io_bs = bioset_create(pool_size, io_front_pad, 0);
2794 if (!pools->io_bs)
2795 goto out;
2796 if (integrity && bioset_integrity_create(pools->io_bs, pool_size))
2797 goto out;
78d8e58a
MS
2798 break;
2799 case DM_TYPE_REQUEST_BASED:
78d8e58a 2800 case DM_TYPE_MQ_REQUEST_BASED:
0776aa0e 2801 pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
78d8e58a 2802 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
591ddcfc 2803 /* per_io_data_size is used for blk-mq pdu at queue allocation */
78d8e58a
MS
2804 break;
2805 default:
2806 BUG();
2807 }
2808
4a3f54d9 2809 pools->bs = bioset_create(pool_size, front_pad, 0);
e6ee8c0b 2810 if (!pools->bs)
5f015204 2811 goto out;
e6ee8c0b 2812
a91a2785 2813 if (integrity && bioset_integrity_create(pools->bs, pool_size))
5f015204 2814 goto out;
a91a2785 2815
e6ee8c0b 2816 return pools;
5f1b670d 2817
5f1b670d
CH
2818out:
2819 dm_free_md_mempools(pools);
78d8e58a 2820
4e6e36c3 2821 return NULL;
e6ee8c0b
KU
2822}
2823
2824void dm_free_md_mempools(struct dm_md_mempools *pools)
2825{
2826 if (!pools)
2827 return;
2828
e6ee8c0b
KU
2829 if (pools->bs)
2830 bioset_free(pools->bs);
64f52b0e
MS
2831 if (pools->io_bs)
2832 bioset_free(pools->io_bs);
e6ee8c0b
KU
2833
2834 kfree(pools);
2835}
2836
9c72bad1
CH
2837struct dm_pr {
2838 u64 old_key;
2839 u64 new_key;
2840 u32 flags;
2841 bool fail_early;
2842};
2843
2844static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
2845 void *data)
71cdb697
CH
2846{
2847 struct mapped_device *md = bdev->bd_disk->private_data;
9c72bad1
CH
2848 struct dm_table *table;
2849 struct dm_target *ti;
2850 int ret = -ENOTTY, srcu_idx;
71cdb697 2851
9c72bad1
CH
2852 table = dm_get_live_table(md, &srcu_idx);
2853 if (!table || !dm_table_get_size(table))
2854 goto out;
71cdb697 2855
9c72bad1
CH
2856 /* We only support devices that have a single target */
2857 if (dm_table_get_num_targets(table) != 1)
2858 goto out;
2859 ti = dm_table_get_target(table, 0);
71cdb697 2860
9c72bad1
CH
2861 ret = -EINVAL;
2862 if (!ti->type->iterate_devices)
2863 goto out;
2864
2865 ret = ti->type->iterate_devices(ti, fn, data);
2866out:
2867 dm_put_live_table(md, srcu_idx);
2868 return ret;
2869}
2870
2871/*
2872 * For register / unregister we need to manually call out to every path.
2873 */
2874static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
2875 sector_t start, sector_t len, void *data)
2876{
2877 struct dm_pr *pr = data;
2878 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
2879
2880 if (!ops || !ops->pr_register)
2881 return -EOPNOTSUPP;
2882 return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
2883}
2884
2885static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
2886 u32 flags)
2887{
2888 struct dm_pr pr = {
2889 .old_key = old_key,
2890 .new_key = new_key,
2891 .flags = flags,
2892 .fail_early = true,
2893 };
2894 int ret;
2895
2896 ret = dm_call_pr(bdev, __dm_pr_register, &pr);
2897 if (ret && new_key) {
2898 /* unregister all paths if we failed to register any path */
2899 pr.old_key = new_key;
2900 pr.new_key = 0;
2901 pr.flags = 0;
2902 pr.fail_early = false;
2903 dm_call_pr(bdev, __dm_pr_register, &pr);
2904 }
2905
2906 return ret;
71cdb697
CH
2907}
2908
2909static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
956a4025 2910 u32 flags)
71cdb697
CH
2911{
2912 struct mapped_device *md = bdev->bd_disk->private_data;
2913 const struct pr_ops *ops;
71cdb697 2914 fmode_t mode;
956a4025 2915 int r;
71cdb697 2916
956a4025 2917 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
71cdb697
CH
2918 if (r < 0)
2919 return r;
2920
2921 ops = bdev->bd_disk->fops->pr_ops;
2922 if (ops && ops->pr_reserve)
2923 r = ops->pr_reserve(bdev, key, type, flags);
2924 else
2925 r = -EOPNOTSUPP;
2926
956a4025 2927 bdput(bdev);
71cdb697
CH
2928 return r;
2929}
2930
2931static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
2932{
2933 struct mapped_device *md = bdev->bd_disk->private_data;
2934 const struct pr_ops *ops;
71cdb697 2935 fmode_t mode;
956a4025 2936 int r;
71cdb697 2937
956a4025 2938 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
71cdb697
CH
2939 if (r < 0)
2940 return r;
2941
2942 ops = bdev->bd_disk->fops->pr_ops;
2943 if (ops && ops->pr_release)
2944 r = ops->pr_release(bdev, key, type);
2945 else
2946 r = -EOPNOTSUPP;
2947
956a4025 2948 bdput(bdev);
71cdb697
CH
2949 return r;
2950}
2951
2952static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
956a4025 2953 enum pr_type type, bool abort)
71cdb697
CH
2954{
2955 struct mapped_device *md = bdev->bd_disk->private_data;
2956 const struct pr_ops *ops;
71cdb697 2957 fmode_t mode;
956a4025 2958 int r;
71cdb697 2959
956a4025 2960 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
71cdb697
CH
2961 if (r < 0)
2962 return r;
2963
2964 ops = bdev->bd_disk->fops->pr_ops;
2965 if (ops && ops->pr_preempt)
2966 r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
2967 else
2968 r = -EOPNOTSUPP;
2969
956a4025 2970 bdput(bdev);
71cdb697
CH
2971 return r;
2972}
2973
2974static int dm_pr_clear(struct block_device *bdev, u64 key)
2975{
2976 struct mapped_device *md = bdev->bd_disk->private_data;
2977 const struct pr_ops *ops;
71cdb697 2978 fmode_t mode;
956a4025 2979 int r;
71cdb697 2980
956a4025 2981 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
71cdb697
CH
2982 if (r < 0)
2983 return r;
2984
2985 ops = bdev->bd_disk->fops->pr_ops;
2986 if (ops && ops->pr_clear)
2987 r = ops->pr_clear(bdev, key);
2988 else
2989 r = -EOPNOTSUPP;
2990
956a4025 2991 bdput(bdev);
71cdb697
CH
2992 return r;
2993}
2994
2995static const struct pr_ops dm_pr_ops = {
2996 .pr_register = dm_pr_register,
2997 .pr_reserve = dm_pr_reserve,
2998 .pr_release = dm_pr_release,
2999 .pr_preempt = dm_pr_preempt,
3000 .pr_clear = dm_pr_clear,
3001};
3002
83d5cde4 3003static const struct block_device_operations dm_blk_dops = {
1da177e4
LT
3004 .open = dm_blk_open,
3005 .release = dm_blk_close,
aa129a22 3006 .ioctl = dm_blk_ioctl,
3ac51e74 3007 .getgeo = dm_blk_getgeo,
71cdb697 3008 .pr_ops = &dm_pr_ops,
1da177e4
LT
3009 .owner = THIS_MODULE
3010};
3011
f26c5719
DW
3012static const struct dax_operations dm_dax_ops = {
3013 .direct_access = dm_dax_direct_access,
7e026c8c 3014 .copy_from_iter = dm_dax_copy_from_iter,
f26c5719
DW
3015};
3016
1da177e4
LT
3017/*
3018 * module hooks
3019 */
3020module_init(dm_init);
3021module_exit(dm_exit);
3022
3023module_param(major, uint, 0);
3024MODULE_PARM_DESC(major, "The major number of the device mapper");
f4790826 3025
e8603136
MS
3026module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
3027MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3028
115485e8
MS
3029module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
3030MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
3031
1da177e4
LT
3032MODULE_DESCRIPTION(DM_NAME " driver");
3033MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
3034MODULE_LICENSE("GPL");