]>
git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - block/badblocks.c
4 * - Heavily based on MD badblocks code from Neil Brown
6 * Copyright (c) 2015, Intel Corporation.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 #include <linux/badblocks.h>
19 #include <linux/seqlock.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/stddef.h>
23 #include <linux/types.h>
24 #include <linux/slab.h>
27 * badblocks_check() - check a given range for bad sectors
28 * @bb: the badblocks structure that holds all badblock information
29 * @s: sector (start) at which to check for badblocks
30 * @sectors: number of sectors to check for badblocks
31 * @first_bad: pointer to store location of the first badblock
32 * @bad_sectors: pointer to store number of badblocks after @first_bad
34 * We can record which blocks on each device are 'bad' and so just
35 * fail those blocks, or that stripe, rather than the whole device.
36 * Entries in the bad-block table are 64bits wide. This comprises:
37 * Length of bad-range, in sectors: 0-511 for lengths 1-512
38 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
39 * A 'shift' can be set so that larger blocks are tracked and
40 * consequently larger devices can be covered.
41 * 'Acknowledged' flag - 1 bit. - the most significant bit.
43 * Locking of the bad-block table uses a seqlock so badblocks_check
44 * might need to retry if it is very unlucky.
45 * We will sometimes want to check for bad blocks in a bi_end_io function,
46 * so we use the write_seqlock_irq variant.
48 * When looking for a bad block we specify a range and want to
49 * know if any block in the range is bad. So we binary-search
50 * to the last range that starts at-or-before the given endpoint,
51 * (or "before the sector after the target range")
52 * then see if it ends after the given start.
55 * 0: there are no known bad blocks in the range
56 * 1: there are known bad block which are all acknowledged
57 * -1: there are bad blocks which have not yet been acknowledged in metadata.
58 * plus the start/length of the first bad section we overlap.
60 int badblocks_check(struct badblocks
*bb
, sector_t s
, int sectors
,
61 sector_t
*first_bad
, int *bad_sectors
)
67 sector_t target
= s
+ sectors
;
71 /* round the start down, and the end up */
73 target
+= (1<<bb
->shift
) - 1;
77 /* 'target' is now the first block after the bad range */
80 seq
= read_seqbegin(&bb
->lock
);
85 /* Binary search between lo and hi for 'target'
86 * i.e. for the last range that starts before 'target'
88 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
89 * are known not to be the last range before target.
90 * VARIANT: hi-lo is the number of possible
91 * ranges, and decreases until it reaches 1
94 int mid
= (lo
+ hi
) / 2;
95 sector_t a
= BB_OFFSET(p
[mid
]);
98 /* This could still be the one, earlier ranges
103 /* This and later ranges are definitely out. */
106 /* 'lo' might be the last that started before target, but 'hi' isn't */
108 /* need to check all range that end after 's' to see if
109 * any are unacknowledged.
112 BB_OFFSET(p
[lo
]) + BB_LEN(p
[lo
]) > s
) {
113 if (BB_OFFSET(p
[lo
]) < target
) {
114 /* starts before the end, and finishes after
115 * the start, so they must overlap
117 if (rv
!= -1 && BB_ACK(p
[lo
]))
121 *first_bad
= BB_OFFSET(p
[lo
]);
122 *bad_sectors
= BB_LEN(p
[lo
]);
128 if (read_seqretry(&bb
->lock
, seq
))
133 EXPORT_SYMBOL_GPL(badblocks_check
);
136 * badblocks_set() - Add a range of bad blocks to the table.
137 * @bb: the badblocks structure that holds all badblock information
138 * @s: first sector to mark as bad
139 * @sectors: number of sectors to mark as bad
140 * @acknowledged: weather to mark the bad sectors as acknowledged
142 * This might extend the table, or might contract it if two adjacent ranges
143 * can be merged. We binary-search to find the 'insertion' point, then
144 * decide how best to handle it.
148 * 1: failed to set badblocks (out of space)
150 int badblocks_set(struct badblocks
*bb
, sector_t s
, int sectors
,
159 /* badblocks are disabled */
163 /* round the start down, and the end up */
164 sector_t next
= s
+ sectors
;
167 next
+= (1<<bb
->shift
) - 1;
172 write_seqlock_irqsave(&bb
->lock
, flags
);
177 /* Find the last range that starts at-or-before 's' */
178 while (hi
- lo
> 1) {
179 int mid
= (lo
+ hi
) / 2;
180 sector_t a
= BB_OFFSET(p
[mid
]);
187 if (hi
> lo
&& BB_OFFSET(p
[lo
]) > s
)
191 /* we found a range that might merge with the start
194 sector_t a
= BB_OFFSET(p
[lo
]);
195 sector_t e
= a
+ BB_LEN(p
[lo
]);
196 int ack
= BB_ACK(p
[lo
]);
199 /* Yes, we can merge with a previous range */
200 if (s
== a
&& s
+ sectors
>= e
)
201 /* new range covers old */
204 ack
= ack
&& acknowledged
;
208 if (e
- a
<= BB_MAX_LEN
) {
209 p
[lo
] = BB_MAKE(a
, e
-a
, ack
);
212 /* does not all fit in one range,
215 if (BB_LEN(p
[lo
]) != BB_MAX_LEN
)
216 p
[lo
] = BB_MAKE(a
, BB_MAX_LEN
, ack
);
222 if (sectors
&& hi
< bb
->count
) {
223 /* 'hi' points to the first range that starts after 's'.
224 * Maybe we can merge with the start of that range
226 sector_t a
= BB_OFFSET(p
[hi
]);
227 sector_t e
= a
+ BB_LEN(p
[hi
]);
228 int ack
= BB_ACK(p
[hi
]);
230 if (a
<= s
+ sectors
) {
231 /* merging is possible */
232 if (e
<= s
+ sectors
) {
237 ack
= ack
&& acknowledged
;
240 if (e
- a
<= BB_MAX_LEN
) {
241 p
[hi
] = BB_MAKE(a
, e
-a
, ack
);
244 p
[hi
] = BB_MAKE(a
, BB_MAX_LEN
, ack
);
252 if (sectors
== 0 && hi
< bb
->count
) {
253 /* we might be able to combine lo and hi */
254 /* Note: 's' is at the end of 'lo' */
255 sector_t a
= BB_OFFSET(p
[hi
]);
256 int lolen
= BB_LEN(p
[lo
]);
257 int hilen
= BB_LEN(p
[hi
]);
258 int newlen
= lolen
+ hilen
- (s
- a
);
260 if (s
>= a
&& newlen
< BB_MAX_LEN
) {
261 /* yes, we can combine them */
262 int ack
= BB_ACK(p
[lo
]) && BB_ACK(p
[hi
]);
264 p
[lo
] = BB_MAKE(BB_OFFSET(p
[lo
]), newlen
, ack
);
265 memmove(p
+ hi
, p
+ hi
+ 1,
266 (bb
->count
- hi
- 1) * 8);
271 /* didn't merge (it all).
272 * Need to add a range just before 'hi'
274 if (bb
->count
>= MAX_BADBLOCKS
) {
275 /* No room for more */
279 int this_sectors
= sectors
;
281 memmove(p
+ hi
+ 1, p
+ hi
,
282 (bb
->count
- hi
) * 8);
285 if (this_sectors
> BB_MAX_LEN
)
286 this_sectors
= BB_MAX_LEN
;
287 p
[hi
] = BB_MAKE(s
, this_sectors
, acknowledged
);
288 sectors
-= this_sectors
;
295 bb
->unacked_exist
= 1;
296 write_sequnlock_irqrestore(&bb
->lock
, flags
);
300 EXPORT_SYMBOL_GPL(badblocks_set
);
303 * badblocks_clear() - Remove a range of bad blocks to the table.
304 * @bb: the badblocks structure that holds all badblock information
305 * @s: first sector to mark as bad
306 * @sectors: number of sectors to mark as bad
308 * This may involve extending the table if we spilt a region,
309 * but it must not fail. So if the table becomes full, we just
310 * drop the remove request.
314 * 1: failed to clear badblocks
316 int badblocks_clear(struct badblocks
*bb
, sector_t s
, int sectors
)
320 sector_t target
= s
+ sectors
;
324 /* When clearing we round the start up and the end down.
325 * This should not matter as the shift should align with
326 * the block size and no rounding should ever be needed.
327 * However it is better the think a block is bad when it
328 * isn't than to think a block is not bad when it is.
330 s
+= (1<<bb
->shift
) - 1;
332 target
>>= bb
->shift
;
333 sectors
= target
- s
;
336 write_seqlock_irq(&bb
->lock
);
341 /* Find the last range that starts before 'target' */
342 while (hi
- lo
> 1) {
343 int mid
= (lo
+ hi
) / 2;
344 sector_t a
= BB_OFFSET(p
[mid
]);
352 /* p[lo] is the last range that could overlap the
353 * current range. Earlier ranges could also overlap,
354 * but only this one can overlap the end of the range.
356 if (BB_OFFSET(p
[lo
]) + BB_LEN(p
[lo
]) > target
) {
357 /* Partial overlap, leave the tail of this range */
358 int ack
= BB_ACK(p
[lo
]);
359 sector_t a
= BB_OFFSET(p
[lo
]);
360 sector_t end
= a
+ BB_LEN(p
[lo
]);
363 /* we need to split this range */
364 if (bb
->count
>= MAX_BADBLOCKS
) {
368 memmove(p
+lo
+1, p
+lo
, (bb
->count
- lo
) * 8);
370 p
[lo
] = BB_MAKE(a
, s
-a
, ack
);
373 p
[lo
] = BB_MAKE(target
, end
- target
, ack
);
374 /* there is no longer an overlap */
379 BB_OFFSET(p
[lo
]) + BB_LEN(p
[lo
]) > s
) {
380 /* This range does overlap */
381 if (BB_OFFSET(p
[lo
]) < s
) {
382 /* Keep the early parts of this range. */
383 int ack
= BB_ACK(p
[lo
]);
384 sector_t start
= BB_OFFSET(p
[lo
]);
386 p
[lo
] = BB_MAKE(start
, s
- start
, ack
);
387 /* now low doesn't overlap, so.. */
392 /* 'lo' is strictly before, 'hi' is strictly after,
393 * anything between needs to be discarded
396 memmove(p
+lo
+1, p
+hi
, (bb
->count
- hi
) * 8);
397 bb
->count
-= (hi
- lo
- 1);
403 write_sequnlock_irq(&bb
->lock
);
406 EXPORT_SYMBOL_GPL(badblocks_clear
);
409 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
410 * @bb: the badblocks structure that holds all badblock information
412 * This only succeeds if ->changed is clear. It is used by
413 * in-kernel metadata updates
415 void ack_all_badblocks(struct badblocks
*bb
)
417 if (bb
->page
== NULL
|| bb
->changed
)
418 /* no point even trying */
420 write_seqlock_irq(&bb
->lock
);
422 if (bb
->changed
== 0 && bb
->unacked_exist
) {
426 for (i
= 0; i
< bb
->count
; i
++) {
428 sector_t start
= BB_OFFSET(p
[i
]);
429 int len
= BB_LEN(p
[i
]);
431 p
[i
] = BB_MAKE(start
, len
, 1);
434 bb
->unacked_exist
= 0;
436 write_sequnlock_irq(&bb
->lock
);
438 EXPORT_SYMBOL_GPL(ack_all_badblocks
);
441 * badblocks_show() - sysfs access to bad-blocks list
442 * @bb: the badblocks structure that holds all badblock information
443 * @page: buffer received from sysfs
444 * @unack: weather to show unacknowledged badblocks
447 * Length of returned data
449 ssize_t
badblocks_show(struct badblocks
*bb
, char *page
, int unack
)
460 seq
= read_seqbegin(&bb
->lock
);
465 while (len
< PAGE_SIZE
&& i
< bb
->count
) {
466 sector_t s
= BB_OFFSET(p
[i
]);
467 unsigned int length
= BB_LEN(p
[i
]);
468 int ack
= BB_ACK(p
[i
]);
475 len
+= snprintf(page
+len
, PAGE_SIZE
-len
, "%llu %u\n",
476 (unsigned long long)s
<< bb
->shift
,
477 length
<< bb
->shift
);
479 if (unack
&& len
== 0)
480 bb
->unacked_exist
= 0;
482 if (read_seqretry(&bb
->lock
, seq
))
487 EXPORT_SYMBOL_GPL(badblocks_show
);
490 * badblocks_store() - sysfs access to bad-blocks list
491 * @bb: the badblocks structure that holds all badblock information
492 * @page: buffer received from sysfs
493 * @len: length of data received from sysfs
494 * @unack: weather to show unacknowledged badblocks
497 * Length of the buffer processed or -ve error.
499 ssize_t
badblocks_store(struct badblocks
*bb
, const char *page
, size_t len
,
502 unsigned long long sector
;
506 switch (sscanf(page
, "%llu %d%c", §or
, &length
, &newline
)) {
518 if (badblocks_set(bb
, sector
, length
, !unack
))
523 EXPORT_SYMBOL_GPL(badblocks_store
);
526 * badblocks_init() - initialize the badblocks structure
527 * @bb: the badblocks structure that holds all badblock information
528 * @enable: weather to enable badblocks accounting
532 * -ve errno: on error
534 int badblocks_init(struct badblocks
*bb
, int enable
)
541 bb
->page
= kzalloc(PAGE_SIZE
, GFP_KERNEL
);
542 if (bb
->page
== (u64
*)0) {
546 seqlock_init(&bb
->lock
);
550 EXPORT_SYMBOL_GPL(badblocks_init
);
553 * badblocks_free() - free the badblocks structure
554 * @bb: the badblocks structure that holds all badblock information
556 void badblocks_free(struct badblocks
*bb
)
561 EXPORT_SYMBOL_GPL(badblocks_free
);