]>
Commit | Line | Data |
---|---|---|
a4b75251 TL |
1 | // SPDX-License-Identifier: BSD-3-Clause |
2 | /* Copyright 2017-2020, Intel Corporation */ | |
3 | ||
4 | /* | |
5 | * badblocks_ndctl.c -- implementation of DIMMs API based on the ndctl library | |
6 | */ | |
7 | #define _GNU_SOURCE | |
8 | ||
9 | #include <sys/types.h> | |
10 | #include <libgen.h> | |
11 | #include <linux/falloc.h> | |
12 | #include <limits.h> | |
13 | #include <string.h> | |
14 | #include <stdio.h> | |
15 | #include <stdlib.h> | |
16 | #include <sys/sysmacros.h> | |
17 | #include <fcntl.h> | |
18 | #include <ndctl/libndctl.h> | |
19 | #include <ndctl/libdaxctl.h> | |
20 | ||
21 | #include "libpmem2.h" | |
22 | #include "pmem2_utils.h" | |
23 | #include "source.h" | |
24 | #include "region_namespace_ndctl.h" | |
25 | ||
26 | #include "file.h" | |
27 | #include "out.h" | |
28 | #include "badblocks.h" | |
29 | #include "set_badblocks.h" | |
30 | #include "extent.h" | |
31 | ||
32 | typedef int pmem2_badblock_next_type( | |
33 | struct pmem2_badblock_context *bbctx, | |
34 | struct pmem2_badblock *bb); | |
35 | ||
36 | typedef void *pmem2_badblock_get_next_type( | |
37 | struct pmem2_badblock_context *bbctx); | |
38 | ||
39 | struct pmem2_badblock_context { | |
40 | /* file descriptor */ | |
41 | int fd; | |
42 | ||
43 | /* pmem2 file type */ | |
44 | enum pmem2_file_type file_type; | |
45 | ||
46 | /* ndctl context */ | |
47 | struct ndctl_ctx *ctx; | |
48 | ||
49 | /* | |
50 | * Function pointer to: | |
51 | * - pmem2_badblock_next_namespace() or | |
52 | * - pmem2_badblock_next_region() | |
53 | */ | |
54 | pmem2_badblock_next_type *pmem2_badblock_next_func; | |
55 | ||
56 | /* | |
57 | * Function pointer to: | |
58 | * - pmem2_namespace_get_first_badblock() or | |
59 | * - pmem2_namespace_get_next_badblock() or | |
60 | * - pmem2_region_get_first_badblock() or | |
61 | * - pmem2_region_get_next_badblock() | |
62 | */ | |
63 | pmem2_badblock_get_next_type *pmem2_badblock_get_next_func; | |
64 | ||
65 | /* needed only by the ndctl namespace badblock iterator */ | |
66 | struct ndctl_namespace *ndns; | |
67 | ||
68 | /* needed only by the ndctl region badblock iterator */ | |
69 | struct { | |
70 | struct ndctl_bus *bus; | |
71 | struct ndctl_region *region; | |
72 | unsigned long long ns_res; /* address of the namespace */ | |
73 | unsigned long long ns_beg; /* the begining of the namespace */ | |
74 | unsigned long long ns_end; /* the end of the namespace */ | |
75 | } rgn; | |
76 | ||
77 | /* file's extents */ | |
78 | struct extents *exts; | |
79 | unsigned first_extent; | |
80 | struct pmem2_badblock last_bb; | |
81 | }; | |
82 | ||
83 | /* forward declarations */ | |
84 | static int pmem2_badblock_next_namespace( | |
85 | struct pmem2_badblock_context *bbctx, | |
86 | struct pmem2_badblock *bb); | |
87 | static int pmem2_badblock_next_region( | |
88 | struct pmem2_badblock_context *bbctx, | |
89 | struct pmem2_badblock *bb); | |
90 | static void *pmem2_namespace_get_first_badblock( | |
91 | struct pmem2_badblock_context *bbctx); | |
92 | static void *pmem2_region_get_first_badblock( | |
93 | struct pmem2_badblock_context *bbctx); | |
94 | ||
95 | /* | |
96 | * badblocks_get_namespace_bounds -- (internal) returns the bounds | |
97 | * (offset and size) of the given namespace | |
98 | * relative to the beginning of its region | |
99 | */ | |
100 | static int | |
101 | badblocks_get_namespace_bounds(struct ndctl_region *region, | |
102 | struct ndctl_namespace *ndns, | |
103 | unsigned long long *ns_offset, | |
104 | unsigned long long *ns_size) | |
105 | { | |
106 | LOG(3, "region %p namespace %p ns_offset %p ns_size %p", | |
107 | region, ndns, ns_offset, ns_size); | |
108 | ||
109 | struct ndctl_pfn *pfn = ndctl_namespace_get_pfn(ndns); | |
110 | struct ndctl_dax *dax = ndctl_namespace_get_dax(ndns); | |
111 | ||
112 | ASSERTne(ns_offset, NULL); | |
113 | ASSERTne(ns_size, NULL); | |
114 | ||
115 | if (pfn) { | |
116 | *ns_offset = ndctl_pfn_get_resource(pfn); | |
117 | if (*ns_offset == ULLONG_MAX) { | |
118 | ERR("(pfn) cannot read offset of the namespace"); | |
119 | return PMEM2_E_CANNOT_READ_BOUNDS; | |
120 | } | |
121 | ||
122 | *ns_size = ndctl_pfn_get_size(pfn); | |
123 | if (*ns_size == ULLONG_MAX) { | |
124 | ERR("(pfn) cannot read size of the namespace"); | |
125 | return PMEM2_E_CANNOT_READ_BOUNDS; | |
126 | } | |
127 | ||
128 | LOG(10, "(pfn) ns_offset 0x%llx ns_size %llu", | |
129 | *ns_offset, *ns_size); | |
130 | } else if (dax) { | |
131 | *ns_offset = ndctl_dax_get_resource(dax); | |
132 | if (*ns_offset == ULLONG_MAX) { | |
133 | ERR("(dax) cannot read offset of the namespace"); | |
134 | return PMEM2_E_CANNOT_READ_BOUNDS; | |
135 | } | |
136 | ||
137 | *ns_size = ndctl_dax_get_size(dax); | |
138 | if (*ns_size == ULLONG_MAX) { | |
139 | ERR("(dax) cannot read size of the namespace"); | |
140 | return PMEM2_E_CANNOT_READ_BOUNDS; | |
141 | } | |
142 | ||
143 | LOG(10, "(dax) ns_offset 0x%llx ns_size %llu", | |
144 | *ns_offset, *ns_size); | |
145 | } else { /* raw or btt */ | |
146 | *ns_offset = ndctl_namespace_get_resource(ndns); | |
147 | if (*ns_offset == ULLONG_MAX) { | |
148 | ERR("(raw/btt) cannot read offset of the namespace"); | |
149 | return PMEM2_E_CANNOT_READ_BOUNDS; | |
150 | } | |
151 | ||
152 | *ns_size = ndctl_namespace_get_size(ndns); | |
153 | if (*ns_size == ULLONG_MAX) { | |
154 | ERR("(raw/btt) cannot read size of the namespace"); | |
155 | return PMEM2_E_CANNOT_READ_BOUNDS; | |
156 | } | |
157 | ||
158 | LOG(10, "(raw/btt) ns_offset 0x%llx ns_size %llu", | |
159 | *ns_offset, *ns_size); | |
160 | } | |
161 | ||
162 | unsigned long long region_offset = ndctl_region_get_resource(region); | |
163 | if (region_offset == ULLONG_MAX) { | |
164 | ERR("!cannot read offset of the region"); | |
165 | return PMEM2_E_ERRNO; | |
166 | } | |
167 | ||
168 | LOG(10, "region_offset 0x%llx", region_offset); | |
169 | *ns_offset -= region_offset; | |
170 | ||
171 | return 0; | |
172 | } | |
173 | ||
174 | /* | |
175 | * badblocks_devdax_clear_one_badblock -- (internal) clear one bad block | |
176 | * in the dax device | |
177 | */ | |
178 | static int | |
179 | badblocks_devdax_clear_one_badblock(struct ndctl_bus *bus, | |
180 | unsigned long long address, | |
181 | unsigned long long length) | |
182 | { | |
183 | LOG(3, "bus %p address 0x%llx length %llu (bytes)", | |
184 | bus, address, length); | |
185 | ||
186 | int ret; | |
187 | ||
188 | struct ndctl_cmd *cmd_ars_cap = ndctl_bus_cmd_new_ars_cap(bus, | |
189 | address, length); | |
190 | if (cmd_ars_cap == NULL) { | |
191 | ERR("ndctl_bus_cmd_new_ars_cap() failed (bus '%s')", | |
192 | ndctl_bus_get_provider(bus)); | |
193 | return PMEM2_E_ERRNO; | |
194 | } | |
195 | ||
196 | ret = ndctl_cmd_submit(cmd_ars_cap); | |
197 | if (ret) { | |
198 | ERR("ndctl_cmd_submit() failed (bus '%s')", | |
199 | ndctl_bus_get_provider(bus)); | |
200 | /* ndctl_cmd_submit() returns -errno */ | |
201 | goto out_ars_cap; | |
202 | } | |
203 | ||
204 | struct ndctl_range range; | |
205 | ret = ndctl_cmd_ars_cap_get_range(cmd_ars_cap, &range); | |
206 | if (ret) { | |
207 | ERR("ndctl_cmd_ars_cap_get_range() failed"); | |
208 | /* ndctl_cmd_ars_cap_get_range() returns -errno */ | |
209 | goto out_ars_cap; | |
210 | } | |
211 | ||
212 | struct ndctl_cmd *cmd_clear_error = ndctl_bus_cmd_new_clear_error( | |
213 | range.address, range.length, cmd_ars_cap); | |
214 | ||
215 | ret = ndctl_cmd_submit(cmd_clear_error); | |
216 | if (ret) { | |
217 | ERR("ndctl_cmd_submit() failed (bus '%s')", | |
218 | ndctl_bus_get_provider(bus)); | |
219 | /* ndctl_cmd_submit() returns -errno */ | |
220 | goto out_clear_error; | |
221 | } | |
222 | ||
223 | size_t cleared = ndctl_cmd_clear_error_get_cleared(cmd_clear_error); | |
224 | ||
225 | LOG(4, "cleared %zu out of %llu bad blocks", cleared, length); | |
226 | ||
227 | ASSERT(cleared <= length); | |
228 | ||
229 | if (cleared < length) { | |
230 | ERR("failed to clear %llu out of %llu bad blocks", | |
231 | length - cleared, length); | |
232 | errno = ENXIO; /* ndctl handles such error in this way */ | |
233 | ret = PMEM2_E_ERRNO; | |
234 | } else { | |
235 | ret = 0; | |
236 | } | |
237 | ||
238 | out_clear_error: | |
239 | ndctl_cmd_unref(cmd_clear_error); | |
240 | out_ars_cap: | |
241 | ndctl_cmd_unref(cmd_ars_cap); | |
242 | ||
243 | return ret; | |
244 | } | |
245 | ||
246 | /* | |
247 | * pmem2_badblock_context_new -- allocate and create a new bad block context | |
248 | */ | |
249 | int | |
250 | pmem2_badblock_context_new(struct pmem2_badblock_context **bbctx, | |
251 | const struct pmem2_source *src) | |
252 | { | |
253 | LOG(3, "src %p bbctx %p", src, bbctx); | |
254 | PMEM2_ERR_CLR(); | |
255 | ||
256 | ASSERTne(bbctx, NULL); | |
257 | ||
258 | if (src->type == PMEM2_SOURCE_ANON) { | |
259 | ERR("Anonymous source does not support bad blocks"); | |
260 | return PMEM2_E_NOSUPP; | |
261 | } | |
262 | ||
263 | ASSERTeq(src->type, PMEM2_SOURCE_FD); | |
264 | ||
265 | struct ndctl_ctx *ctx; | |
266 | struct ndctl_region *region; | |
267 | struct ndctl_namespace *ndns; | |
268 | struct pmem2_badblock_context *tbbctx = NULL; | |
269 | enum pmem2_file_type pmem2_type; | |
270 | int ret = PMEM2_E_UNKNOWN; | |
271 | *bbctx = NULL; | |
272 | ||
273 | errno = ndctl_new(&ctx) * (-1); | |
274 | if (errno) { | |
275 | ERR("!ndctl_new"); | |
276 | return PMEM2_E_ERRNO; | |
277 | } | |
278 | ||
279 | pmem2_type = src->value.ftype; | |
280 | ||
281 | ret = pmem2_region_namespace(ctx, src, ®ion, &ndns); | |
282 | if (ret) { | |
283 | LOG(1, "getting region and namespace failed"); | |
284 | goto exit_ndctl_unref; | |
285 | } | |
286 | ||
287 | tbbctx = pmem2_zalloc(sizeof(struct pmem2_badblock_context), &ret); | |
288 | if (ret) | |
289 | goto exit_ndctl_unref; | |
290 | ||
291 | tbbctx->fd = src->value.fd; | |
292 | tbbctx->file_type = pmem2_type; | |
293 | tbbctx->ctx = ctx; | |
294 | ||
295 | if (region == NULL || ndns == NULL) { | |
296 | /* did not found any matching device */ | |
297 | *bbctx = tbbctx; | |
298 | return 0; | |
299 | } | |
300 | ||
301 | if (ndctl_namespace_get_mode(ndns) == NDCTL_NS_MODE_FSDAX) { | |
302 | tbbctx->ndns = ndns; | |
303 | tbbctx->pmem2_badblock_next_func = | |
304 | pmem2_badblock_next_namespace; | |
305 | tbbctx->pmem2_badblock_get_next_func = | |
306 | pmem2_namespace_get_first_badblock; | |
307 | } else { | |
308 | unsigned long long ns_beg, ns_size, ns_end; | |
309 | ret = badblocks_get_namespace_bounds( | |
310 | region, ndns, | |
311 | &ns_beg, &ns_size); | |
312 | if (ret) { | |
313 | LOG(1, "cannot read namespace's bounds"); | |
314 | goto error_free_all; | |
315 | } | |
316 | ||
317 | ns_end = ns_beg + ns_size - 1; | |
318 | ||
319 | LOG(10, | |
320 | "namespace: begin %llu, end %llu size %llu (in 512B sectors)", | |
321 | B2SEC(ns_beg), B2SEC(ns_end + 1) - 1, B2SEC(ns_size)); | |
322 | ||
323 | tbbctx->rgn.bus = ndctl_region_get_bus(region); | |
324 | tbbctx->rgn.region = region; | |
325 | tbbctx->rgn.ns_beg = ns_beg; | |
326 | tbbctx->rgn.ns_end = ns_end; | |
327 | tbbctx->rgn.ns_res = ns_beg + ndctl_region_get_resource(region); | |
328 | tbbctx->pmem2_badblock_next_func = | |
329 | pmem2_badblock_next_region; | |
330 | tbbctx->pmem2_badblock_get_next_func = | |
331 | pmem2_region_get_first_badblock; | |
332 | } | |
333 | ||
334 | if (pmem2_type == PMEM2_FTYPE_REG) { | |
335 | /* only regular files have extents */ | |
336 | ret = pmem2_extents_create_get(src->value.fd, &tbbctx->exts); | |
337 | if (ret) { | |
338 | LOG(1, "getting extents of fd %i failed", | |
339 | src->value.fd); | |
340 | goto error_free_all; | |
341 | } | |
342 | } | |
343 | ||
344 | /* set the context */ | |
345 | *bbctx = tbbctx; | |
346 | ||
347 | return 0; | |
348 | ||
349 | error_free_all: | |
350 | pmem2_extents_destroy(&tbbctx->exts); | |
351 | Free(tbbctx); | |
352 | ||
353 | exit_ndctl_unref: | |
354 | ndctl_unref(ctx); | |
355 | ||
356 | return ret; | |
357 | } | |
358 | ||
359 | /* | |
360 | * pmem2_badblock_context_delete -- delete and free the bad block context | |
361 | */ | |
362 | void | |
363 | pmem2_badblock_context_delete(struct pmem2_badblock_context **bbctx) | |
364 | { | |
365 | LOG(3, "bbctx %p", bbctx); | |
366 | PMEM2_ERR_CLR(); | |
367 | ||
368 | ASSERTne(bbctx, NULL); | |
369 | ||
370 | if (*bbctx == NULL) | |
371 | return; | |
372 | ||
373 | struct pmem2_badblock_context *tbbctx = *bbctx; | |
374 | ||
375 | pmem2_extents_destroy(&tbbctx->exts); | |
376 | ndctl_unref(tbbctx->ctx); | |
377 | Free(tbbctx); | |
378 | ||
379 | *bbctx = NULL; | |
380 | } | |
381 | ||
382 | /* | |
383 | * pmem2_namespace_get_next_badblock -- (internal) wrapper for | |
384 | * ndctl_namespace_get_next_badblock | |
385 | */ | |
386 | static void * | |
387 | pmem2_namespace_get_next_badblock(struct pmem2_badblock_context *bbctx) | |
388 | { | |
389 | LOG(3, "bbctx %p", bbctx); | |
390 | ||
391 | return ndctl_namespace_get_next_badblock(bbctx->ndns); | |
392 | } | |
393 | ||
394 | /* | |
395 | * pmem2_namespace_get_first_badblock -- (internal) wrapper for | |
396 | * ndctl_namespace_get_first_badblock | |
397 | */ | |
398 | static void * | |
399 | pmem2_namespace_get_first_badblock(struct pmem2_badblock_context *bbctx) | |
400 | { | |
401 | LOG(3, "bbctx %p", bbctx); | |
402 | ||
403 | bbctx->pmem2_badblock_get_next_func = pmem2_namespace_get_next_badblock; | |
404 | return ndctl_namespace_get_first_badblock(bbctx->ndns); | |
405 | } | |
406 | ||
407 | /* | |
408 | * pmem2_region_get_next_badblock -- (internal) wrapper for | |
409 | * ndctl_region_get_next_badblock | |
410 | */ | |
411 | static void * | |
412 | pmem2_region_get_next_badblock(struct pmem2_badblock_context *bbctx) | |
413 | { | |
414 | LOG(3, "bbctx %p", bbctx); | |
415 | ||
416 | return ndctl_region_get_next_badblock(bbctx->rgn.region); | |
417 | } | |
418 | ||
419 | /* | |
420 | * pmem2_region_get_first_badblock -- (internal) wrapper for | |
421 | * ndctl_region_get_first_badblock | |
422 | */ | |
423 | static void * | |
424 | pmem2_region_get_first_badblock(struct pmem2_badblock_context *bbctx) | |
425 | { | |
426 | LOG(3, "bbctx %p", bbctx); | |
427 | ||
428 | bbctx->pmem2_badblock_get_next_func = pmem2_region_get_next_badblock; | |
429 | return ndctl_region_get_first_badblock(bbctx->rgn.region); | |
430 | } | |
431 | ||
432 | /* | |
433 | * pmem2_badblock_next_namespace -- (internal) version of pmem2_badblock_next() | |
434 | * called for ndctl with namespace badblock | |
435 | * iterator | |
436 | * | |
437 | * This function works only for fsdax, but does not require any special | |
438 | * permissions. | |
439 | */ | |
440 | static int | |
441 | pmem2_badblock_next_namespace(struct pmem2_badblock_context *bbctx, | |
442 | struct pmem2_badblock *bb) | |
443 | { | |
444 | LOG(3, "bbctx %p bb %p", bbctx, bb); | |
445 | ||
446 | ASSERTne(bbctx, NULL); | |
447 | ASSERTne(bb, NULL); | |
448 | ||
449 | struct badblock *bbn; | |
450 | ||
451 | bbn = bbctx->pmem2_badblock_get_next_func(bbctx); | |
452 | if (bbn == NULL) | |
453 | return PMEM2_E_NO_BAD_BLOCK_FOUND; | |
454 | ||
455 | /* | |
456 | * libndctl returns offset and length of a bad block | |
457 | * both expressed in 512B sectors. Offset is relative | |
458 | * to the beginning of the namespace. | |
459 | */ | |
460 | bb->offset = SEC2B(bbn->offset); | |
461 | bb->length = SEC2B(bbn->len); | |
462 | ||
463 | return 0; | |
464 | } | |
465 | ||
466 | /* | |
467 | * pmem2_badblock_next_region -- (internal) version of pmem2_badblock_next() | |
468 | * called for ndctl with region badblock iterator | |
469 | * | |
470 | * This function works for all types of namespaces, but requires read access to | |
471 | * privileged device information. | |
472 | */ | |
473 | static int | |
474 | pmem2_badblock_next_region(struct pmem2_badblock_context *bbctx, | |
475 | struct pmem2_badblock *bb) | |
476 | { | |
477 | LOG(3, "bbctx %p bb %p", bbctx, bb); | |
478 | ||
479 | ASSERTne(bbctx, NULL); | |
480 | ASSERTne(bb, NULL); | |
481 | ||
482 | unsigned long long bb_beg, bb_end; | |
483 | unsigned long long beg, end; | |
484 | struct badblock *bbn; | |
485 | ||
486 | unsigned long long ns_beg = bbctx->rgn.ns_beg; | |
487 | unsigned long long ns_end = bbctx->rgn.ns_end; | |
488 | ||
489 | do { | |
490 | bbn = bbctx->pmem2_badblock_get_next_func(bbctx); | |
491 | if (bbn == NULL) | |
492 | return PMEM2_E_NO_BAD_BLOCK_FOUND; | |
493 | ||
494 | LOG(10, | |
495 | "region bad block: begin %llu end %llu length %u (in 512B sectors)", | |
496 | bbn->offset, bbn->offset + bbn->len - 1, bbn->len); | |
497 | ||
498 | /* | |
499 | * libndctl returns offset and length of a bad block | |
500 | * both expressed in 512B sectors. Offset is relative | |
501 | * to the beginning of the region. | |
502 | */ | |
503 | bb_beg = SEC2B(bbn->offset); | |
504 | bb_end = bb_beg + SEC2B(bbn->len) - 1; | |
505 | ||
506 | } while (bb_beg > ns_end || ns_beg > bb_end); | |
507 | ||
508 | beg = (bb_beg > ns_beg) ? bb_beg : ns_beg; | |
509 | end = (bb_end < ns_end) ? bb_end : ns_end; | |
510 | ||
511 | /* | |
512 | * Form a new bad block structure with offset and length | |
513 | * expressed in bytes and offset relative to the beginning | |
514 | * of the namespace. | |
515 | */ | |
516 | bb->offset = beg - ns_beg; | |
517 | bb->length = end - beg + 1; | |
518 | ||
519 | LOG(4, | |
520 | "namespace bad block: begin %llu end %llu length %llu (in 512B sectors)", | |
521 | B2SEC(beg - ns_beg), B2SEC(end - ns_beg), B2SEC(end - beg) + 1); | |
522 | ||
523 | return 0; | |
524 | } | |
525 | ||
526 | /* | |
527 | * pmem2_badblock_next -- get the next bad block | |
528 | */ | |
529 | int | |
530 | pmem2_badblock_next(struct pmem2_badblock_context *bbctx, | |
531 | struct pmem2_badblock *bb) | |
532 | { | |
533 | LOG(3, "bbctx %p bb %p", bbctx, bb); | |
534 | PMEM2_ERR_CLR(); | |
535 | ||
536 | ASSERTne(bbctx, NULL); | |
537 | ASSERTne(bb, NULL); | |
538 | ||
539 | struct pmem2_badblock bbn; | |
540 | unsigned long long bb_beg; | |
541 | unsigned long long bb_end; | |
542 | unsigned long long bb_len; | |
543 | unsigned long long bb_off; | |
544 | unsigned long long ext_beg = 0; /* placate compiler warnings */ | |
545 | unsigned long long ext_end = -1ULL; | |
546 | unsigned e; | |
547 | int ret; | |
548 | ||
549 | if (bbctx->rgn.region == NULL && bbctx->ndns == NULL) { | |
550 | ERR("Cannot find any matching device, no bad blocks found"); | |
551 | return PMEM2_E_NO_BAD_BLOCK_FOUND; | |
552 | } | |
553 | ||
554 | struct extents *exts = bbctx->exts; | |
555 | ||
556 | /* DAX devices have no extents */ | |
557 | if (!exts) { | |
558 | ret = bbctx->pmem2_badblock_next_func(bbctx, &bbn); | |
559 | *bb = bbn; | |
560 | return ret; | |
561 | } | |
562 | ||
563 | /* | |
564 | * There is at least one extent. | |
565 | * Loop until: | |
566 | * 1) a bad block overlaps with an extent or | |
567 | * 2) there are no more bad blocks. | |
568 | */ | |
569 | int bb_overlaps_with_extent = 0; | |
570 | do { | |
571 | if (bbctx->last_bb.length) { | |
572 | /* | |
573 | * We have saved the last bad block to check it | |
574 | * with the next extent saved | |
575 | * in bbctx->first_extent. | |
576 | */ | |
577 | ASSERTne(bbctx->first_extent, 0); | |
578 | bbn = bbctx->last_bb; | |
579 | bbctx->last_bb.offset = 0; | |
580 | bbctx->last_bb.length = 0; | |
581 | } else { | |
582 | ASSERTeq(bbctx->first_extent, 0); | |
583 | /* look for the next bad block */ | |
584 | ret = bbctx->pmem2_badblock_next_func(bbctx, &bbn); | |
585 | if (ret) | |
586 | return ret; | |
587 | } | |
588 | ||
589 | bb_beg = bbn.offset; | |
590 | bb_end = bb_beg + bbn.length - 1; | |
591 | ||
592 | for (e = bbctx->first_extent; | |
593 | e < exts->extents_count; | |
594 | e++) { | |
595 | ||
596 | ext_beg = exts->extents[e].offset_physical; | |
597 | ext_end = ext_beg + exts->extents[e].length - 1; | |
598 | ||
599 | /* check if the bad block overlaps with the extent */ | |
600 | if (bb_beg <= ext_end && ext_beg <= bb_end) { | |
601 | /* bad block overlaps with the extent */ | |
602 | bb_overlaps_with_extent = 1; | |
603 | ||
604 | if (bb_end > ext_end && | |
605 | e + 1 < exts->extents_count) { | |
606 | /* | |
607 | * The bad block is longer than | |
608 | * the extent and there are | |
609 | * more extents. | |
610 | * Save the current bad block | |
611 | * to check it with the next extent. | |
612 | */ | |
613 | bbctx->first_extent = e + 1; | |
614 | bbctx->last_bb = bbn; | |
615 | } else { | |
616 | /* | |
617 | * All extents were checked | |
618 | * with the current bad block. | |
619 | */ | |
620 | bbctx->first_extent = 0; | |
621 | bbctx->last_bb.length = 0; | |
622 | bbctx->last_bb.offset = 0; | |
623 | } | |
624 | break; | |
625 | } | |
626 | } | |
627 | ||
628 | /* check all extents with the next bad block */ | |
629 | if (bb_overlaps_with_extent == 0) { | |
630 | bbctx->first_extent = 0; | |
631 | bbctx->last_bb.length = 0; | |
632 | bbctx->last_bb.offset = 0; | |
633 | } | |
634 | ||
635 | } while (bb_overlaps_with_extent == 0); | |
636 | ||
637 | /* bad block overlaps with an extent */ | |
638 | ||
639 | bb_beg = (bb_beg > ext_beg) ? bb_beg : ext_beg; | |
640 | bb_end = (bb_end < ext_end) ? bb_end : ext_end; | |
641 | bb_len = bb_end - bb_beg + 1; | |
642 | bb_off = bb_beg + exts->extents[e].offset_logical | |
643 | - exts->extents[e].offset_physical; | |
644 | ||
645 | LOG(10, "bad block found: physical offset: %llu, length: %llu", | |
646 | bb_beg, bb_len); | |
647 | ||
648 | /* make sure the offset is block-aligned */ | |
649 | unsigned long long not_block_aligned = bb_off & (exts->blksize - 1); | |
650 | if (not_block_aligned) { | |
651 | bb_off -= not_block_aligned; | |
652 | bb_len += not_block_aligned; | |
653 | } | |
654 | ||
655 | /* make sure the length is block-aligned */ | |
656 | bb_len = ALIGN_UP(bb_len, exts->blksize); | |
657 | ||
658 | LOG(4, "bad block found: logical offset: %llu, length: %llu", | |
659 | bb_off, bb_len); | |
660 | ||
661 | /* | |
662 | * Return the bad block with offset and length | |
663 | * expressed in bytes and offset relative | |
664 | * to the beginning of the file. | |
665 | */ | |
666 | bb->offset = bb_off; | |
667 | bb->length = bb_len; | |
668 | ||
669 | return 0; | |
670 | } | |
671 | ||
672 | /* | |
673 | * pmem2_badblock_clear_fsdax -- (internal) clear one bad block | |
674 | * in a FSDAX device | |
675 | */ | |
676 | static int | |
677 | pmem2_badblock_clear_fsdax(int fd, const struct pmem2_badblock *bb) | |
678 | { | |
679 | LOG(3, "fd %i badblock %p", fd, bb); | |
680 | PMEM2_ERR_CLR(); | |
681 | ||
682 | ASSERTne(bb, NULL); | |
683 | ||
684 | LOG(10, | |
685 | "clearing a bad block: fd %i logical offset %zu length %zu (in 512B sectors)", | |
686 | fd, B2SEC(bb->offset), B2SEC(bb->length)); | |
687 | ||
688 | /* fallocate() takes offset as the off_t type */ | |
689 | if (bb->offset > (size_t)INT64_MAX) { | |
690 | ERR("bad block's offset is greater than INT64_MAX"); | |
691 | return PMEM2_E_OFFSET_OUT_OF_RANGE; | |
692 | } | |
693 | ||
694 | /* fallocate() takes length as the off_t type */ | |
695 | if (bb->length > (size_t)INT64_MAX) { | |
696 | ERR("bad block's length is greater than INT64_MAX"); | |
697 | return PMEM2_E_LENGTH_OUT_OF_RANGE; | |
698 | } | |
699 | ||
700 | off_t offset = (off_t)bb->offset; | |
701 | off_t length = (off_t)bb->length; | |
702 | ||
703 | /* deallocate bad blocks */ | |
704 | if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, | |
705 | offset, length)) { | |
706 | ERR("!fallocate"); | |
707 | return PMEM2_E_ERRNO; | |
708 | } | |
709 | ||
710 | /* allocate new blocks */ | |
711 | if (fallocate(fd, FALLOC_FL_KEEP_SIZE, offset, length)) { | |
712 | ERR("!fallocate"); | |
713 | return PMEM2_E_ERRNO; | |
714 | } | |
715 | ||
716 | return 0; | |
717 | } | |
718 | ||
719 | /* | |
720 | * pmem2_badblock_clear_devdax -- (internal) clear one bad block | |
721 | * in a DAX device | |
722 | */ | |
723 | static int | |
724 | pmem2_badblock_clear_devdax(const struct pmem2_badblock_context *bbctx, | |
725 | const struct pmem2_badblock *bb) | |
726 | { | |
727 | LOG(3, "bbctx %p bb %p", bbctx, bb); | |
728 | ||
729 | ASSERTne(bb, NULL); | |
730 | ASSERTne(bbctx, NULL); | |
731 | ASSERTne(bbctx->rgn.bus, NULL); | |
732 | ASSERTne(bbctx->rgn.ns_res, 0); | |
733 | ||
734 | LOG(4, | |
735 | "clearing a bad block: offset %zu length %zu (in 512B sectors)", | |
736 | B2SEC(bb->offset), B2SEC(bb->length)); | |
737 | ||
738 | int ret = badblocks_devdax_clear_one_badblock(bbctx->rgn.bus, | |
739 | bb->offset + bbctx->rgn.ns_res, | |
740 | bb->length); | |
741 | if (ret) { | |
742 | LOG(1, | |
743 | "failed to clear a bad block: offset %zu length %zu (in 512B sectors)", | |
744 | B2SEC(bb->offset), | |
745 | B2SEC(bb->length)); | |
746 | return ret; | |
747 | } | |
748 | ||
749 | return 0; | |
750 | } | |
751 | ||
752 | /* | |
753 | * pmem2_badblock_clear -- clear one bad block | |
754 | */ | |
755 | int | |
756 | pmem2_badblock_clear(struct pmem2_badblock_context *bbctx, | |
757 | const struct pmem2_badblock *bb) | |
758 | { | |
759 | LOG(3, "bbctx %p badblock %p", bbctx, bb); | |
760 | PMEM2_ERR_CLR(); | |
761 | ||
762 | ASSERTne(bbctx, NULL); | |
763 | ASSERTne(bb, NULL); | |
764 | ||
765 | if (bbctx->file_type == PMEM2_FTYPE_DEVDAX) | |
766 | return pmem2_badblock_clear_devdax(bbctx, bb); | |
767 | ||
768 | ASSERTeq(bbctx->file_type, PMEM2_FTYPE_REG); | |
769 | ||
770 | return pmem2_badblock_clear_fsdax(bbctx->fd, bb); | |
771 | } |