]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | linear.c : Multiple Devices driver for Linux | |
3 | Copyright (C) 1994-96 Marc ZYNGIER | |
4 | <zyngier@ufr-info-p7.ibp.fr> or | |
5 | <maz@gloups.fdn.fr> | |
6 | ||
7 | Linear mode management functions. | |
8 | ||
9 | This program is free software; you can redistribute it and/or modify | |
10 | it under the terms of the GNU General Public License as published by | |
11 | the Free Software Foundation; either version 2, or (at your option) | |
12 | any later version. | |
f72ffdd6 | 13 | |
1da177e4 LT |
14 | You should have received a copy of the GNU General Public License |
15 | (for example /usr/src/linux/COPYING); if not, write to the Free | |
f72ffdd6 | 16 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
1da177e4 LT |
17 | */ |
18 | ||
bff61975 N |
19 | #include <linux/blkdev.h> |
20 | #include <linux/raid/md_u.h> | |
bff61975 | 21 | #include <linux/seq_file.h> |
056075c7 | 22 | #include <linux/module.h> |
5a0e3ad6 | 23 | #include <linux/slab.h> |
109e3765 | 24 | #include <trace/events/block.h> |
43b2e5d8 | 25 | #include "md.h" |
ef740c37 | 26 | #include "linear.h" |
1da177e4 | 27 | |
1da177e4 | 28 | /* |
f72ffdd6 | 29 | * find which device holds a particular offset |
1da177e4 | 30 | */ |
a7120771 | 31 | static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) |
1da177e4 | 32 | { |
aece3d1f | 33 | int lo, mid, hi; |
e849b938 | 34 | struct linear_conf *conf; |
1da177e4 | 35 | |
aece3d1f SS |
36 | lo = 0; |
37 | hi = mddev->raid_disks - 1; | |
3be260cc | 38 | conf = mddev->private; |
1da177e4 | 39 | |
aece3d1f SS |
40 | /* |
41 | * Binary Search | |
42 | */ | |
43 | ||
44 | while (hi > lo) { | |
45 | ||
46 | mid = (hi + lo) / 2; | |
47 | if (sector < conf->disks[mid].end_sector) | |
48 | hi = mid; | |
49 | else | |
50 | lo = mid + 1; | |
51 | } | |
52 | ||
53 | return conf->disks + lo; | |
1da177e4 LT |
54 | } |
55 | ||
03a9e24e | 56 | /* |
57 | * In linear_congested() conf->raid_disks is used as a copy of | |
58 | * mddev->raid_disks to iterate conf->disks[], because conf->raid_disks | |
59 | * and conf->disks[] are created in linear_conf(), they are always | |
60 | * consitent with each other, but mddev->raid_disks does not. | |
61 | */ | |
5c675f83 | 62 | static int linear_congested(struct mddev *mddev, int bits) |
26be34dc | 63 | { |
e849b938 | 64 | struct linear_conf *conf; |
26be34dc N |
65 | int i, ret = 0; |
66 | ||
03a9e24e | 67 | rcu_read_lock(); |
68 | conf = rcu_dereference(mddev->private); | |
af11c397 | 69 | |
03a9e24e | 70 | for (i = 0; i < conf->raid_disks && !ret ; i++) { |
165125e1 | 71 | struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); |
dc3b17cc | 72 | ret |= bdi_congested(q->backing_dev_info, bits); |
26be34dc | 73 | } |
af11c397 | 74 | |
03a9e24e | 75 | rcu_read_unlock(); |
26be34dc N |
76 | return ret; |
77 | } | |
78 | ||
fd01b88c | 79 | static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) |
80c3a6ce | 80 | { |
e849b938 | 81 | struct linear_conf *conf; |
af11c397 | 82 | sector_t array_sectors; |
80c3a6ce | 83 | |
3be260cc | 84 | conf = mddev->private; |
80c3a6ce DW |
85 | WARN_ONCE(sectors || raid_disks, |
86 | "%s does not support generic reshape\n", __func__); | |
af11c397 | 87 | array_sectors = conf->array_sectors; |
80c3a6ce | 88 | |
af11c397 | 89 | return array_sectors; |
80c3a6ce DW |
90 | } |
91 | ||
e849b938 | 92 | static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) |
1da177e4 | 93 | { |
e849b938 | 94 | struct linear_conf *conf; |
3cb03002 | 95 | struct md_rdev *rdev; |
45d4582f | 96 | int i, cnt; |
f1cad2b6 | 97 | bool discard_supported = false; |
1da177e4 | 98 | |
a7120771 | 99 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info), |
1da177e4 LT |
100 | GFP_KERNEL); |
101 | if (!conf) | |
7c7546cc N |
102 | return NULL; |
103 | ||
1da177e4 | 104 | cnt = 0; |
d6e22150 | 105 | conf->array_sectors = 0; |
1da177e4 | 106 | |
dafb20fa | 107 | rdev_for_each(rdev, mddev) { |
1da177e4 | 108 | int j = rdev->raid_disk; |
a7120771 | 109 | struct dev_info *disk = conf->disks + j; |
13f2682b | 110 | sector_t sectors; |
1da177e4 | 111 | |
13864515 | 112 | if (j < 0 || j >= raid_disks || disk->rdev) { |
a2e202af N |
113 | pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", |
114 | mdname(mddev)); | |
1da177e4 LT |
115 | goto out; |
116 | } | |
117 | ||
118 | disk->rdev = rdev; | |
13f2682b N |
119 | if (mddev->chunk_sectors) { |
120 | sectors = rdev->sectors; | |
121 | sector_div(sectors, mddev->chunk_sectors); | |
122 | rdev->sectors = sectors * mddev->chunk_sectors; | |
123 | } | |
1da177e4 | 124 | |
8f6c2e4b MP |
125 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
126 | rdev->data_offset << 9); | |
1da177e4 | 127 | |
dd8ac336 | 128 | conf->array_sectors += rdev->sectors; |
1da177e4 | 129 | cnt++; |
4db7cdc8 | 130 | |
f1cad2b6 SL |
131 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) |
132 | discard_supported = true; | |
1da177e4 | 133 | } |
7c7546cc | 134 | if (cnt != raid_disks) { |
a2e202af N |
135 | pr_warn("md/linear:%s: not enough drives present. Aborting!\n", |
136 | mdname(mddev)); | |
1da177e4 LT |
137 | goto out; |
138 | } | |
139 | ||
f1cad2b6 SL |
140 | if (!discard_supported) |
141 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | |
142 | else | |
143 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | |
144 | ||
1da177e4 | 145 | /* |
45d4582f | 146 | * Here we calculate the device offsets. |
1da177e4 | 147 | */ |
4db7cdc8 SS |
148 | conf->disks[0].end_sector = conf->disks[0].rdev->sectors; |
149 | ||
a778b73f | 150 | for (i = 1; i < raid_disks; i++) |
4db7cdc8 SS |
151 | conf->disks[i].end_sector = |
152 | conf->disks[i-1].end_sector + | |
153 | conf->disks[i].rdev->sectors; | |
15945fee | 154 | |
03a9e24e | 155 | /* |
156 | * conf->raid_disks is copy of mddev->raid_disks. The reason to | |
157 | * keep a copy of mddev->raid_disks in struct linear_conf is, | |
158 | * mddev->raid_disks may not be consistent with pointers number of | |
159 | * conf->disks[] when it is updated in linear_add() and used to | |
160 | * iterate old conf->disks[] earray in linear_congested(). | |
161 | * Here conf->raid_disks is always consitent with number of | |
162 | * pointers in conf->disks[] array, and mddev->private is updated | |
163 | * with rcu_assign_pointer() in linear_addr(), such race can be | |
164 | * avoided. | |
165 | */ | |
166 | conf->raid_disks = raid_disks; | |
167 | ||
7c7546cc N |
168 | return conf; |
169 | ||
170 | out: | |
171 | kfree(conf); | |
172 | return NULL; | |
173 | } | |
174 | ||
fd01b88c | 175 | static int linear_run (struct mddev *mddev) |
7c7546cc | 176 | { |
e849b938 | 177 | struct linear_conf *conf; |
98d5561b | 178 | int ret; |
7c7546cc | 179 | |
0894cc30 AN |
180 | if (md_check_no_bitmap(mddev)) |
181 | return -EINVAL; | |
7c7546cc N |
182 | conf = linear_conf(mddev, mddev->raid_disks); |
183 | ||
184 | if (!conf) | |
185 | return 1; | |
186 | mddev->private = conf; | |
1f403624 | 187 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
7c7546cc | 188 | |
98d5561b | 189 | ret = md_integrity_register(mddev); |
190 | if (ret) { | |
191 | kfree(conf); | |
192 | mddev->private = NULL; | |
193 | } | |
194 | return ret; | |
7c7546cc | 195 | } |
1da177e4 | 196 | |
fd01b88c | 197 | static int linear_add(struct mddev *mddev, struct md_rdev *rdev) |
7c7546cc N |
198 | { |
199 | /* Adding a drive to a linear array allows the array to grow. | |
200 | * It is permitted if the new drive has a matching superblock | |
201 | * already on it, with raid_disk equal to raid_disks. | |
202 | * It is achieved by creating a new linear_private_data structure | |
203 | * and swapping it in in-place of the current one. | |
204 | * The current one is never freed until the array is stopped. | |
205 | * This avoids races. | |
206 | */ | |
e849b938 | 207 | struct linear_conf *newconf, *oldconf; |
7c7546cc | 208 | |
a778b73f | 209 | if (rdev->saved_raid_disk != mddev->raid_disks) |
7c7546cc N |
210 | return -EINVAL; |
211 | ||
a778b73f | 212 | rdev->raid_disk = rdev->saved_raid_disk; |
09cd9270 | 213 | rdev->saved_raid_disk = -1; |
a778b73f | 214 | |
7c7546cc N |
215 | newconf = linear_conf(mddev,mddev->raid_disks+1); |
216 | ||
217 | if (!newconf) | |
218 | return -ENOMEM; | |
219 | ||
03a9e24e | 220 | /* newconf->raid_disks already keeps a copy of * the increased |
221 | * value of mddev->raid_disks, WARN_ONCE() is just used to make | |
222 | * sure of this. It is possible that oldconf is still referenced | |
223 | * in linear_congested(), therefore kfree_rcu() is used to free | |
224 | * oldconf until no one uses it anymore. | |
225 | */ | |
3be260cc | 226 | mddev_suspend(mddev); |
d939cdfd SL |
227 | oldconf = rcu_dereference_protected(mddev->private, |
228 | lockdep_is_held(&mddev->reconfig_mutex)); | |
7c7546cc | 229 | mddev->raid_disks++; |
03a9e24e | 230 | WARN_ONCE(mddev->raid_disks != newconf->raid_disks, |
231 | "copied raid_disks doesn't match mddev->raid_disks"); | |
232 | rcu_assign_pointer(mddev->private, newconf); | |
1f403624 | 233 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
f233ea5c | 234 | set_capacity(mddev->gendisk, mddev->array_sectors); |
3be260cc | 235 | mddev_resume(mddev); |
449aad3e | 236 | revalidate_disk(mddev->gendisk); |
03a9e24e | 237 | kfree_rcu(oldconf, rcu); |
7c7546cc | 238 | return 0; |
1da177e4 LT |
239 | } |
240 | ||
afa0f557 | 241 | static void linear_free(struct mddev *mddev, void *priv) |
1da177e4 | 242 | { |
afa0f557 | 243 | struct linear_conf *conf = priv; |
af11c397 | 244 | |
495d3573 | 245 | kfree(conf); |
1da177e4 LT |
246 | } |
247 | ||
b4fdcb02 | 248 | static void linear_make_request(struct mddev *mddev, struct bio *bio) |
1da177e4 | 249 | { |
20d0189b | 250 | char b[BDEVNAME_SIZE]; |
a7120771 | 251 | struct dev_info *tmp_dev; |
20d0189b KO |
252 | struct bio *split; |
253 | sector_t start_sector, end_sector, data_offset; | |
1da177e4 | 254 | |
1eff9d32 | 255 | if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { |
e9c7469b | 256 | md_flush_request(mddev, bio); |
5a7bbad2 | 257 | return; |
e5dcdd80 N |
258 | } |
259 | ||
20d0189b | 260 | do { |
109e3765 N |
261 | sector_t bio_sector = bio->bi_iter.bi_sector; |
262 | tmp_dev = which_dev(mddev, bio_sector); | |
20d0189b KO |
263 | start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; |
264 | end_sector = tmp_dev->end_sector; | |
265 | data_offset = tmp_dev->rdev->data_offset; | |
266 | bio->bi_bdev = tmp_dev->rdev->bdev; | |
6283815d | 267 | |
109e3765 N |
268 | if (unlikely(bio_sector >= end_sector || |
269 | bio_sector < start_sector)) | |
20d0189b KO |
270 | goto out_of_bounds; |
271 | ||
272 | if (unlikely(bio_end_sector(bio) > end_sector)) { | |
273 | /* This bio crosses a device boundary, so we have to | |
274 | * split it. | |
275 | */ | |
109e3765 | 276 | split = bio_split(bio, end_sector - bio_sector, |
20d0189b KO |
277 | GFP_NOIO, fs_bio_set); |
278 | bio_chain(split, bio); | |
279 | } else { | |
280 | split = bio; | |
281 | } | |
f1cad2b6 | 282 | |
20d0189b KO |
283 | split->bi_iter.bi_sector = split->bi_iter.bi_sector - |
284 | start_sector + data_offset; | |
285 | ||
796a5cf0 | 286 | if (unlikely((bio_op(split) == REQ_OP_DISCARD) && |
20d0189b KO |
287 | !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { |
288 | /* Just ignore it */ | |
4246a0b6 | 289 | bio_endio(split); |
109e3765 N |
290 | } else { |
291 | if (mddev->gendisk) | |
292 | trace_block_bio_remap(bdev_get_queue(split->bi_bdev), | |
293 | split, disk_devt(mddev->gendisk), | |
294 | bio_sector); | |
26483819 | 295 | mddev_check_writesame(mddev, split); |
20d0189b | 296 | generic_make_request(split); |
109e3765 | 297 | } |
20d0189b KO |
298 | } while (split != bio); |
299 | return; | |
300 | ||
301 | out_of_bounds: | |
a2e202af | 302 | pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n", |
20d0189b KO |
303 | mdname(mddev), |
304 | (unsigned long long)bio->bi_iter.bi_sector, | |
305 | bdevname(tmp_dev->rdev->bdev, b), | |
306 | (unsigned long long)tmp_dev->rdev->sectors, | |
307 | (unsigned long long)start_sector); | |
308 | bio_io_error(bio); | |
1da177e4 LT |
309 | } |
310 | ||
fd01b88c | 311 | static void linear_status (struct seq_file *seq, struct mddev *mddev) |
1da177e4 | 312 | { |
9d8f0363 | 313 | seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); |
1da177e4 LT |
314 | } |
315 | ||
3be260cc N |
316 | static void linear_quiesce(struct mddev *mddev, int state) |
317 | { | |
318 | } | |
319 | ||
84fc4b56 | 320 | static struct md_personality linear_personality = |
1da177e4 LT |
321 | { |
322 | .name = "linear", | |
2604b703 | 323 | .level = LEVEL_LINEAR, |
1da177e4 LT |
324 | .owner = THIS_MODULE, |
325 | .make_request = linear_make_request, | |
326 | .run = linear_run, | |
afa0f557 | 327 | .free = linear_free, |
1da177e4 | 328 | .status = linear_status, |
7c7546cc | 329 | .hot_add_disk = linear_add, |
80c3a6ce | 330 | .size = linear_size, |
3be260cc | 331 | .quiesce = linear_quiesce, |
5c675f83 | 332 | .congested = linear_congested, |
1da177e4 LT |
333 | }; |
334 | ||
335 | static int __init linear_init (void) | |
336 | { | |
2604b703 | 337 | return register_md_personality (&linear_personality); |
1da177e4 LT |
338 | } |
339 | ||
340 | static void linear_exit (void) | |
341 | { | |
2604b703 | 342 | unregister_md_personality (&linear_personality); |
1da177e4 LT |
343 | } |
344 | ||
1da177e4 LT |
345 | module_init(linear_init); |
346 | module_exit(linear_exit); | |
347 | MODULE_LICENSE("GPL"); | |
0efb9e61 | 348 | MODULE_DESCRIPTION("Linear device concatenation personality for MD"); |
d9d166c2 N |
349 | MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ |
350 | MODULE_ALIAS("md-linear"); | |
2604b703 | 351 | MODULE_ALIAS("md-level--1"); |