1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) STRATO AG 2011. All rights reserved.
7 * This module can be used to catch cases when the btrfs kernel
8 * code executes write requests to the disk that bring the file
9 * system in an inconsistent state. In such a state, a power-loss
10 * or kernel panic event would cause that the data on disk is
11 * lost or at least damaged.
13 * Code is added that examines all block write requests during
14 * runtime (including writes of the super block). Three rules
15 * are verified and an error is printed on violation of the
17 * 1. It is not allowed to write a disk block which is
18 * currently referenced by the super block (either directly
20 * 2. When a super block is written, it is verified that all
21 * referenced (directly or indirectly) blocks fulfill the
22 * following requirements:
23 * 2a. All referenced blocks have either been present when
24 * the file system was mounted, (i.e., they have been
25 * referenced by the super block) or they have been
26 * written since then and the write completion callback
27 * was called and no write error was indicated and a
28 * FLUSH request to the device where these blocks are
29 * located was received and completed.
30 * 2b. All referenced blocks need to have a generation
31 * number which is equal to the parent's number.
33 * One issue that was found using this module was that the log
34 * tree on disk became temporarily corrupted because disk blocks
35 * that had been in use for the log tree had been freed and
36 * reused too early, while being referenced by the written super
39 * The search term in the kernel log that can be used to filter
40 * on the existence of detected integrity issues is
43 * The integrity check is enabled via mount options. These
44 * mount options are only supported if the integrity check
45 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
47 * Example #1, apply integrity checks to all metadata:
48 * mount /dev/sdb1 /mnt -o check_int
50 * Example #2, apply integrity checks to all metadata and
52 * mount /dev/sdb1 /mnt -o check_int_data
54 * Example #3, apply integrity checks to all metadata and dump
55 * the tree that the super block references to kernel messages
56 * each time after a super block was written:
57 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
59 * If the integrity check tool is included and activated in
60 * the mount options, plenty of kernel memory is used, and
61 * plenty of additional CPU cycles are spent. Enabling this
62 * functionality is not intended for normal use. In most
63 * cases, unless you are a btrfs developer who needs to verify
64 * the integrity of (super)-block write requests, do not
65 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
66 * include and compile the integrity check tool.
68 * Expect millions of lines of information in the kernel log with an
69 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
70 * kernel config to at least 26 (which is 64MB). Usually the value is
71 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
72 * changed like this before LOG_BUF_SHIFT can be set to a high value:
73 * config LOG_BUF_SHIFT
74 * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
78 #include <linux/sched.h>
79 #include <linux/slab.h>
80 #include <linux/mutex.h>
81 #include <linux/genhd.h>
82 #include <linux/blkdev.h>
84 #include <linux/string.h>
85 #include <crypto/hash.h>
88 #include "transaction.h"
89 #include "extent_io.h"
91 #include "print-tree.h"
93 #include "check-integrity.h"
94 #include "rcu-string.h"
95 #include "compression.h"
97 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
99 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
100 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
101 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
102 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
103 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
104 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
105 * excluding " [...]" */
106 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
109 * The definition of the bitmask fields for the print_mask.
110 * They are specified with the mount option check_integrity_print_mask.
112 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
113 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
114 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
115 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
116 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
117 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
118 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
119 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
120 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
121 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
122 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
123 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
124 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
125 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
127 struct btrfsic_dev_state
;
128 struct btrfsic_state
;
130 struct btrfsic_block
{
131 u32 magic_num
; /* only used for debug purposes */
132 unsigned int is_metadata
:1; /* if it is meta-data, not data-data */
133 unsigned int is_superblock
:1; /* if it is one of the superblocks */
134 unsigned int is_iodone
:1; /* if is done by lower subsystem */
135 unsigned int iodone_w_error
:1; /* error was indicated to endio */
136 unsigned int never_written
:1; /* block was added because it was
137 * referenced, not because it was
139 unsigned int mirror_num
; /* large enough to hold
140 * BTRFS_SUPER_MIRROR_MAX */
141 struct btrfsic_dev_state
*dev_state
;
142 u64 dev_bytenr
; /* key, physical byte num on disk */
143 u64 logical_bytenr
; /* logical byte num on disk */
145 struct btrfs_disk_key disk_key
; /* extra info to print in case of
146 * issues, will not always be correct */
147 struct list_head collision_resolving_node
; /* list node */
148 struct list_head all_blocks_node
; /* list node */
150 /* the following two lists contain block_link items */
151 struct list_head ref_to_list
; /* list */
152 struct list_head ref_from_list
; /* list */
153 struct btrfsic_block
*next_in_same_bio
;
154 void *orig_bio_private
;
155 bio_end_io_t
*orig_bio_end_io
;
156 int submit_bio_bh_rw
;
157 u64 flush_gen
; /* only valid if !never_written */
161 * Elements of this type are allocated dynamically and required because
162 * each block object can refer to and can be ref from multiple blocks.
163 * The key to lookup them in the hashtable is the dev_bytenr of
164 * the block ref to plus the one from the block referred from.
165 * The fact that they are searchable via a hashtable and that a
166 * ref_cnt is maintained is not required for the btrfs integrity
167 * check algorithm itself, it is only used to make the output more
168 * beautiful in case that an error is detected (an error is defined
169 * as a write operation to a block while that block is still referenced).
171 struct btrfsic_block_link
{
172 u32 magic_num
; /* only used for debug purposes */
174 struct list_head node_ref_to
; /* list node */
175 struct list_head node_ref_from
; /* list node */
176 struct list_head collision_resolving_node
; /* list node */
177 struct btrfsic_block
*block_ref_to
;
178 struct btrfsic_block
*block_ref_from
;
179 u64 parent_generation
;
182 struct btrfsic_dev_state
{
183 u32 magic_num
; /* only used for debug purposes */
184 struct block_device
*bdev
;
185 struct btrfsic_state
*state
;
186 struct list_head collision_resolving_node
; /* list node */
187 struct btrfsic_block dummy_block_for_bio_bh_flush
;
189 char name
[BDEVNAME_SIZE
];
192 struct btrfsic_block_hashtable
{
193 struct list_head table
[BTRFSIC_BLOCK_HASHTABLE_SIZE
];
196 struct btrfsic_block_link_hashtable
{
197 struct list_head table
[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
];
200 struct btrfsic_dev_state_hashtable
{
201 struct list_head table
[BTRFSIC_DEV2STATE_HASHTABLE_SIZE
];
204 struct btrfsic_block_data_ctx
{
205 u64 start
; /* virtual bytenr */
206 u64 dev_bytenr
; /* physical bytenr on device */
208 struct btrfsic_dev_state
*dev
;
214 /* This structure is used to implement recursion without occupying
215 * any stack space, refer to btrfsic_process_metablock() */
216 struct btrfsic_stack_frame
{
224 struct btrfsic_block
*block
;
225 struct btrfsic_block_data_ctx
*block_ctx
;
226 struct btrfsic_block
*next_block
;
227 struct btrfsic_block_data_ctx next_block_ctx
;
228 struct btrfs_header
*hdr
;
229 struct btrfsic_stack_frame
*prev
;
232 /* Some state per mounted filesystem */
233 struct btrfsic_state
{
235 int include_extent_data
;
237 struct list_head all_blocks_list
;
238 struct btrfsic_block_hashtable block_hashtable
;
239 struct btrfsic_block_link_hashtable block_link_hashtable
;
240 struct btrfs_fs_info
*fs_info
;
241 u64 max_superblock_generation
;
242 struct btrfsic_block
*latest_superblock
;
247 static void btrfsic_block_init(struct btrfsic_block
*b
);
248 static struct btrfsic_block
*btrfsic_block_alloc(void);
249 static void btrfsic_block_free(struct btrfsic_block
*b
);
250 static void btrfsic_block_link_init(struct btrfsic_block_link
*n
);
251 static struct btrfsic_block_link
*btrfsic_block_link_alloc(void);
252 static void btrfsic_block_link_free(struct btrfsic_block_link
*n
);
253 static void btrfsic_dev_state_init(struct btrfsic_dev_state
*ds
);
254 static struct btrfsic_dev_state
*btrfsic_dev_state_alloc(void);
255 static void btrfsic_dev_state_free(struct btrfsic_dev_state
*ds
);
256 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable
*h
);
257 static void btrfsic_block_hashtable_add(struct btrfsic_block
*b
,
258 struct btrfsic_block_hashtable
*h
);
259 static void btrfsic_block_hashtable_remove(struct btrfsic_block
*b
);
260 static struct btrfsic_block
*btrfsic_block_hashtable_lookup(
261 struct block_device
*bdev
,
263 struct btrfsic_block_hashtable
*h
);
264 static void btrfsic_block_link_hashtable_init(
265 struct btrfsic_block_link_hashtable
*h
);
266 static void btrfsic_block_link_hashtable_add(
267 struct btrfsic_block_link
*l
,
268 struct btrfsic_block_link_hashtable
*h
);
269 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link
*l
);
270 static struct btrfsic_block_link
*btrfsic_block_link_hashtable_lookup(
271 struct block_device
*bdev_ref_to
,
272 u64 dev_bytenr_ref_to
,
273 struct block_device
*bdev_ref_from
,
274 u64 dev_bytenr_ref_from
,
275 struct btrfsic_block_link_hashtable
*h
);
276 static void btrfsic_dev_state_hashtable_init(
277 struct btrfsic_dev_state_hashtable
*h
);
278 static void btrfsic_dev_state_hashtable_add(
279 struct btrfsic_dev_state
*ds
,
280 struct btrfsic_dev_state_hashtable
*h
);
281 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state
*ds
);
282 static struct btrfsic_dev_state
*btrfsic_dev_state_hashtable_lookup(dev_t dev
,
283 struct btrfsic_dev_state_hashtable
*h
);
284 static struct btrfsic_stack_frame
*btrfsic_stack_frame_alloc(void);
285 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame
*sf
);
286 static int btrfsic_process_superblock(struct btrfsic_state
*state
,
287 struct btrfs_fs_devices
*fs_devices
);
288 static int btrfsic_process_metablock(struct btrfsic_state
*state
,
289 struct btrfsic_block
*block
,
290 struct btrfsic_block_data_ctx
*block_ctx
,
291 int limit_nesting
, int force_iodone_flag
);
292 static void btrfsic_read_from_block_data(
293 struct btrfsic_block_data_ctx
*block_ctx
,
294 void *dst
, u32 offset
, size_t len
);
295 static int btrfsic_create_link_to_next_block(
296 struct btrfsic_state
*state
,
297 struct btrfsic_block
*block
,
298 struct btrfsic_block_data_ctx
299 *block_ctx
, u64 next_bytenr
,
301 struct btrfsic_block_data_ctx
*next_block_ctx
,
302 struct btrfsic_block
**next_blockp
,
303 int force_iodone_flag
,
304 int *num_copiesp
, int *mirror_nump
,
305 struct btrfs_disk_key
*disk_key
,
306 u64 parent_generation
);
307 static int btrfsic_handle_extent_data(struct btrfsic_state
*state
,
308 struct btrfsic_block
*block
,
309 struct btrfsic_block_data_ctx
*block_ctx
,
310 u32 item_offset
, int force_iodone_flag
);
311 static int btrfsic_map_block(struct btrfsic_state
*state
, u64 bytenr
, u32 len
,
312 struct btrfsic_block_data_ctx
*block_ctx_out
,
314 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx
*block_ctx
);
315 static int btrfsic_read_block(struct btrfsic_state
*state
,
316 struct btrfsic_block_data_ctx
*block_ctx
);
317 static void btrfsic_dump_database(struct btrfsic_state
*state
);
318 static int btrfsic_test_for_metadata(struct btrfsic_state
*state
,
319 char **datav
, unsigned int num_pages
);
320 static void btrfsic_process_written_block(struct btrfsic_dev_state
*dev_state
,
321 u64 dev_bytenr
, char **mapped_datav
,
322 unsigned int num_pages
,
323 struct bio
*bio
, int *bio_is_patched
,
324 int submit_bio_bh_rw
);
325 static int btrfsic_process_written_superblock(
326 struct btrfsic_state
*state
,
327 struct btrfsic_block
*const block
,
328 struct btrfs_super_block
*const super_hdr
);
329 static void btrfsic_bio_end_io(struct bio
*bp
);
330 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state
*state
,
331 const struct btrfsic_block
*block
,
332 int recursion_level
);
333 static int btrfsic_check_all_ref_blocks(struct btrfsic_state
*state
,
334 struct btrfsic_block
*const block
,
335 int recursion_level
);
336 static void btrfsic_print_add_link(const struct btrfsic_state
*state
,
337 const struct btrfsic_block_link
*l
);
338 static void btrfsic_print_rem_link(const struct btrfsic_state
*state
,
339 const struct btrfsic_block_link
*l
);
340 static char btrfsic_get_block_type(const struct btrfsic_state
*state
,
341 const struct btrfsic_block
*block
);
342 static void btrfsic_dump_tree(const struct btrfsic_state
*state
);
343 static void btrfsic_dump_tree_sub(const struct btrfsic_state
*state
,
344 const struct btrfsic_block
*block
,
346 static struct btrfsic_block_link
*btrfsic_block_link_lookup_or_add(
347 struct btrfsic_state
*state
,
348 struct btrfsic_block_data_ctx
*next_block_ctx
,
349 struct btrfsic_block
*next_block
,
350 struct btrfsic_block
*from_block
,
351 u64 parent_generation
);
352 static struct btrfsic_block
*btrfsic_block_lookup_or_add(
353 struct btrfsic_state
*state
,
354 struct btrfsic_block_data_ctx
*block_ctx
,
355 const char *additional_string
,
361 static int btrfsic_process_superblock_dev_mirror(
362 struct btrfsic_state
*state
,
363 struct btrfsic_dev_state
*dev_state
,
364 struct btrfs_device
*device
,
365 int superblock_mirror_num
,
366 struct btrfsic_dev_state
**selected_dev_state
,
367 struct btrfs_super_block
*selected_super
);
368 static struct btrfsic_dev_state
*btrfsic_dev_state_lookup(dev_t dev
);
369 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state
*state
,
371 struct btrfsic_dev_state
*dev_state
,
374 static struct mutex btrfsic_mutex
;
375 static int btrfsic_is_initialized
;
376 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable
;
379 static void btrfsic_block_init(struct btrfsic_block
*b
)
381 b
->magic_num
= BTRFSIC_BLOCK_MAGIC_NUMBER
;
384 b
->logical_bytenr
= 0;
385 b
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
386 b
->disk_key
.objectid
= 0;
387 b
->disk_key
.type
= 0;
388 b
->disk_key
.offset
= 0;
390 b
->is_superblock
= 0;
392 b
->iodone_w_error
= 0;
393 b
->never_written
= 0;
395 b
->next_in_same_bio
= NULL
;
396 b
->orig_bio_private
= NULL
;
397 b
->orig_bio_end_io
= NULL
;
398 INIT_LIST_HEAD(&b
->collision_resolving_node
);
399 INIT_LIST_HEAD(&b
->all_blocks_node
);
400 INIT_LIST_HEAD(&b
->ref_to_list
);
401 INIT_LIST_HEAD(&b
->ref_from_list
);
402 b
->submit_bio_bh_rw
= 0;
406 static struct btrfsic_block
*btrfsic_block_alloc(void)
408 struct btrfsic_block
*b
;
410 b
= kzalloc(sizeof(*b
), GFP_NOFS
);
412 btrfsic_block_init(b
);
417 static void btrfsic_block_free(struct btrfsic_block
*b
)
419 BUG_ON(!(NULL
== b
|| BTRFSIC_BLOCK_MAGIC_NUMBER
== b
->magic_num
));
423 static void btrfsic_block_link_init(struct btrfsic_block_link
*l
)
425 l
->magic_num
= BTRFSIC_BLOCK_LINK_MAGIC_NUMBER
;
427 INIT_LIST_HEAD(&l
->node_ref_to
);
428 INIT_LIST_HEAD(&l
->node_ref_from
);
429 INIT_LIST_HEAD(&l
->collision_resolving_node
);
430 l
->block_ref_to
= NULL
;
431 l
->block_ref_from
= NULL
;
434 static struct btrfsic_block_link
*btrfsic_block_link_alloc(void)
436 struct btrfsic_block_link
*l
;
438 l
= kzalloc(sizeof(*l
), GFP_NOFS
);
440 btrfsic_block_link_init(l
);
445 static void btrfsic_block_link_free(struct btrfsic_block_link
*l
)
447 BUG_ON(!(NULL
== l
|| BTRFSIC_BLOCK_LINK_MAGIC_NUMBER
== l
->magic_num
));
451 static void btrfsic_dev_state_init(struct btrfsic_dev_state
*ds
)
453 ds
->magic_num
= BTRFSIC_DEV2STATE_MAGIC_NUMBER
;
457 INIT_LIST_HEAD(&ds
->collision_resolving_node
);
458 ds
->last_flush_gen
= 0;
459 btrfsic_block_init(&ds
->dummy_block_for_bio_bh_flush
);
460 ds
->dummy_block_for_bio_bh_flush
.is_iodone
= 1;
461 ds
->dummy_block_for_bio_bh_flush
.dev_state
= ds
;
464 static struct btrfsic_dev_state
*btrfsic_dev_state_alloc(void)
466 struct btrfsic_dev_state
*ds
;
468 ds
= kzalloc(sizeof(*ds
), GFP_NOFS
);
470 btrfsic_dev_state_init(ds
);
475 static void btrfsic_dev_state_free(struct btrfsic_dev_state
*ds
)
477 BUG_ON(!(NULL
== ds
||
478 BTRFSIC_DEV2STATE_MAGIC_NUMBER
== ds
->magic_num
));
482 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable
*h
)
486 for (i
= 0; i
< BTRFSIC_BLOCK_HASHTABLE_SIZE
; i
++)
487 INIT_LIST_HEAD(h
->table
+ i
);
490 static void btrfsic_block_hashtable_add(struct btrfsic_block
*b
,
491 struct btrfsic_block_hashtable
*h
)
493 const unsigned int hashval
=
494 (((unsigned int)(b
->dev_bytenr
>> 16)) ^
495 ((unsigned int)((uintptr_t)b
->dev_state
->bdev
))) &
496 (BTRFSIC_BLOCK_HASHTABLE_SIZE
- 1);
498 list_add(&b
->collision_resolving_node
, h
->table
+ hashval
);
501 static void btrfsic_block_hashtable_remove(struct btrfsic_block
*b
)
503 list_del(&b
->collision_resolving_node
);
506 static struct btrfsic_block
*btrfsic_block_hashtable_lookup(
507 struct block_device
*bdev
,
509 struct btrfsic_block_hashtable
*h
)
511 const unsigned int hashval
=
512 (((unsigned int)(dev_bytenr
>> 16)) ^
513 ((unsigned int)((uintptr_t)bdev
))) &
514 (BTRFSIC_BLOCK_HASHTABLE_SIZE
- 1);
515 struct btrfsic_block
*b
;
517 list_for_each_entry(b
, h
->table
+ hashval
, collision_resolving_node
) {
518 if (b
->dev_state
->bdev
== bdev
&& b
->dev_bytenr
== dev_bytenr
)
525 static void btrfsic_block_link_hashtable_init(
526 struct btrfsic_block_link_hashtable
*h
)
530 for (i
= 0; i
< BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
; i
++)
531 INIT_LIST_HEAD(h
->table
+ i
);
534 static void btrfsic_block_link_hashtable_add(
535 struct btrfsic_block_link
*l
,
536 struct btrfsic_block_link_hashtable
*h
)
538 const unsigned int hashval
=
539 (((unsigned int)(l
->block_ref_to
->dev_bytenr
>> 16)) ^
540 ((unsigned int)(l
->block_ref_from
->dev_bytenr
>> 16)) ^
541 ((unsigned int)((uintptr_t)l
->block_ref_to
->dev_state
->bdev
)) ^
542 ((unsigned int)((uintptr_t)l
->block_ref_from
->dev_state
->bdev
)))
543 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
- 1);
545 BUG_ON(NULL
== l
->block_ref_to
);
546 BUG_ON(NULL
== l
->block_ref_from
);
547 list_add(&l
->collision_resolving_node
, h
->table
+ hashval
);
550 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link
*l
)
552 list_del(&l
->collision_resolving_node
);
555 static struct btrfsic_block_link
*btrfsic_block_link_hashtable_lookup(
556 struct block_device
*bdev_ref_to
,
557 u64 dev_bytenr_ref_to
,
558 struct block_device
*bdev_ref_from
,
559 u64 dev_bytenr_ref_from
,
560 struct btrfsic_block_link_hashtable
*h
)
562 const unsigned int hashval
=
563 (((unsigned int)(dev_bytenr_ref_to
>> 16)) ^
564 ((unsigned int)(dev_bytenr_ref_from
>> 16)) ^
565 ((unsigned int)((uintptr_t)bdev_ref_to
)) ^
566 ((unsigned int)((uintptr_t)bdev_ref_from
))) &
567 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
- 1);
568 struct btrfsic_block_link
*l
;
570 list_for_each_entry(l
, h
->table
+ hashval
, collision_resolving_node
) {
571 BUG_ON(NULL
== l
->block_ref_to
);
572 BUG_ON(NULL
== l
->block_ref_from
);
573 if (l
->block_ref_to
->dev_state
->bdev
== bdev_ref_to
&&
574 l
->block_ref_to
->dev_bytenr
== dev_bytenr_ref_to
&&
575 l
->block_ref_from
->dev_state
->bdev
== bdev_ref_from
&&
576 l
->block_ref_from
->dev_bytenr
== dev_bytenr_ref_from
)
583 static void btrfsic_dev_state_hashtable_init(
584 struct btrfsic_dev_state_hashtable
*h
)
588 for (i
= 0; i
< BTRFSIC_DEV2STATE_HASHTABLE_SIZE
; i
++)
589 INIT_LIST_HEAD(h
->table
+ i
);
592 static void btrfsic_dev_state_hashtable_add(
593 struct btrfsic_dev_state
*ds
,
594 struct btrfsic_dev_state_hashtable
*h
)
596 const unsigned int hashval
=
597 (((unsigned int)((uintptr_t)ds
->bdev
->bd_dev
)) &
598 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE
- 1));
600 list_add(&ds
->collision_resolving_node
, h
->table
+ hashval
);
603 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state
*ds
)
605 list_del(&ds
->collision_resolving_node
);
608 static struct btrfsic_dev_state
*btrfsic_dev_state_hashtable_lookup(dev_t dev
,
609 struct btrfsic_dev_state_hashtable
*h
)
611 const unsigned int hashval
=
612 dev
& (BTRFSIC_DEV2STATE_HASHTABLE_SIZE
- 1);
613 struct btrfsic_dev_state
*ds
;
615 list_for_each_entry(ds
, h
->table
+ hashval
, collision_resolving_node
) {
616 if (ds
->bdev
->bd_dev
== dev
)
623 static int btrfsic_process_superblock(struct btrfsic_state
*state
,
624 struct btrfs_fs_devices
*fs_devices
)
626 struct btrfs_super_block
*selected_super
;
627 struct list_head
*dev_head
= &fs_devices
->devices
;
628 struct btrfs_device
*device
;
629 struct btrfsic_dev_state
*selected_dev_state
= NULL
;
633 selected_super
= kzalloc(sizeof(*selected_super
), GFP_NOFS
);
637 list_for_each_entry(device
, dev_head
, dev_list
) {
639 struct btrfsic_dev_state
*dev_state
;
641 if (!device
->bdev
|| !device
->name
)
644 dev_state
= btrfsic_dev_state_lookup(device
->bdev
->bd_dev
);
645 BUG_ON(NULL
== dev_state
);
646 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
647 ret
= btrfsic_process_superblock_dev_mirror(
648 state
, dev_state
, device
, i
,
649 &selected_dev_state
, selected_super
);
650 if (0 != ret
&& 0 == i
) {
651 kfree(selected_super
);
657 if (NULL
== state
->latest_superblock
) {
658 pr_info("btrfsic: no superblock found!\n");
659 kfree(selected_super
);
663 state
->csum_size
= btrfs_super_csum_size(selected_super
);
665 for (pass
= 0; pass
< 3; pass
++) {
672 next_bytenr
= btrfs_super_root(selected_super
);
673 if (state
->print_mask
&
674 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
675 pr_info("root@%llu\n", next_bytenr
);
678 next_bytenr
= btrfs_super_chunk_root(selected_super
);
679 if (state
->print_mask
&
680 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
681 pr_info("chunk@%llu\n", next_bytenr
);
684 next_bytenr
= btrfs_super_log_root(selected_super
);
685 if (0 == next_bytenr
)
687 if (state
->print_mask
&
688 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
689 pr_info("log@%llu\n", next_bytenr
);
693 num_copies
= btrfs_num_copies(state
->fs_info
, next_bytenr
,
694 state
->metablock_size
);
695 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
696 pr_info("num_copies(log_bytenr=%llu) = %d\n",
697 next_bytenr
, num_copies
);
699 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
700 struct btrfsic_block
*next_block
;
701 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
702 struct btrfsic_block_link
*l
;
704 ret
= btrfsic_map_block(state
, next_bytenr
,
705 state
->metablock_size
,
709 pr_info("btrfsic: btrfsic_map_block(root @%llu, mirror %d) failed!\n",
710 next_bytenr
, mirror_num
);
711 kfree(selected_super
);
715 next_block
= btrfsic_block_hashtable_lookup(
716 tmp_next_block_ctx
.dev
->bdev
,
717 tmp_next_block_ctx
.dev_bytenr
,
718 &state
->block_hashtable
);
719 BUG_ON(NULL
== next_block
);
721 l
= btrfsic_block_link_hashtable_lookup(
722 tmp_next_block_ctx
.dev
->bdev
,
723 tmp_next_block_ctx
.dev_bytenr
,
724 state
->latest_superblock
->dev_state
->
726 state
->latest_superblock
->dev_bytenr
,
727 &state
->block_link_hashtable
);
730 ret
= btrfsic_read_block(state
, &tmp_next_block_ctx
);
731 if (ret
< (int)PAGE_SIZE
) {
732 pr_info("btrfsic: read @logical %llu failed!\n",
733 tmp_next_block_ctx
.start
);
734 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
735 kfree(selected_super
);
739 ret
= btrfsic_process_metablock(state
,
742 BTRFS_MAX_LEVEL
+ 3, 1);
743 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
747 kfree(selected_super
);
751 static int btrfsic_process_superblock_dev_mirror(
752 struct btrfsic_state
*state
,
753 struct btrfsic_dev_state
*dev_state
,
754 struct btrfs_device
*device
,
755 int superblock_mirror_num
,
756 struct btrfsic_dev_state
**selected_dev_state
,
757 struct btrfs_super_block
*selected_super
)
759 struct btrfs_fs_info
*fs_info
= state
->fs_info
;
760 struct btrfs_super_block
*super_tmp
;
762 struct btrfsic_block
*superblock_tmp
;
764 struct block_device
*const superblock_bdev
= device
->bdev
;
766 struct address_space
*mapping
= superblock_bdev
->bd_inode
->i_mapping
;
769 /* super block bytenr is always the unmapped device bytenr */
770 dev_bytenr
= btrfs_sb_offset(superblock_mirror_num
);
771 if (dev_bytenr
+ BTRFS_SUPER_INFO_SIZE
> device
->commit_total_bytes
)
774 page
= read_cache_page_gfp(mapping
, dev_bytenr
>> PAGE_SHIFT
, GFP_NOFS
);
778 super_tmp
= page_address(page
);
780 if (btrfs_super_bytenr(super_tmp
) != dev_bytenr
||
781 btrfs_super_magic(super_tmp
) != BTRFS_MAGIC
||
782 memcmp(device
->uuid
, super_tmp
->dev_item
.uuid
, BTRFS_UUID_SIZE
) ||
783 btrfs_super_nodesize(super_tmp
) != state
->metablock_size
||
784 btrfs_super_sectorsize(super_tmp
) != state
->datablock_size
) {
790 btrfsic_block_hashtable_lookup(superblock_bdev
,
792 &state
->block_hashtable
);
793 if (NULL
== superblock_tmp
) {
794 superblock_tmp
= btrfsic_block_alloc();
795 if (NULL
== superblock_tmp
) {
799 /* for superblock, only the dev_bytenr makes sense */
800 superblock_tmp
->dev_bytenr
= dev_bytenr
;
801 superblock_tmp
->dev_state
= dev_state
;
802 superblock_tmp
->logical_bytenr
= dev_bytenr
;
803 superblock_tmp
->generation
= btrfs_super_generation(super_tmp
);
804 superblock_tmp
->is_metadata
= 1;
805 superblock_tmp
->is_superblock
= 1;
806 superblock_tmp
->is_iodone
= 1;
807 superblock_tmp
->never_written
= 0;
808 superblock_tmp
->mirror_num
= 1 + superblock_mirror_num
;
809 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
810 btrfs_info_in_rcu(fs_info
,
811 "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
813 rcu_str_deref(device
->name
), dev_bytenr
,
814 dev_state
->name
, dev_bytenr
,
815 superblock_mirror_num
);
816 list_add(&superblock_tmp
->all_blocks_node
,
817 &state
->all_blocks_list
);
818 btrfsic_block_hashtable_add(superblock_tmp
,
819 &state
->block_hashtable
);
822 /* select the one with the highest generation field */
823 if (btrfs_super_generation(super_tmp
) >
824 state
->max_superblock_generation
||
825 0 == state
->max_superblock_generation
) {
826 memcpy(selected_super
, super_tmp
, sizeof(*selected_super
));
827 *selected_dev_state
= dev_state
;
828 state
->max_superblock_generation
=
829 btrfs_super_generation(super_tmp
);
830 state
->latest_superblock
= superblock_tmp
;
833 for (pass
= 0; pass
< 3; pass
++) {
837 const char *additional_string
= NULL
;
838 struct btrfs_disk_key tmp_disk_key
;
840 tmp_disk_key
.type
= BTRFS_ROOT_ITEM_KEY
;
841 tmp_disk_key
.offset
= 0;
844 btrfs_set_disk_key_objectid(&tmp_disk_key
,
845 BTRFS_ROOT_TREE_OBJECTID
);
846 additional_string
= "initial root ";
847 next_bytenr
= btrfs_super_root(super_tmp
);
850 btrfs_set_disk_key_objectid(&tmp_disk_key
,
851 BTRFS_CHUNK_TREE_OBJECTID
);
852 additional_string
= "initial chunk ";
853 next_bytenr
= btrfs_super_chunk_root(super_tmp
);
856 btrfs_set_disk_key_objectid(&tmp_disk_key
,
857 BTRFS_TREE_LOG_OBJECTID
);
858 additional_string
= "initial log ";
859 next_bytenr
= btrfs_super_log_root(super_tmp
);
860 if (0 == next_bytenr
)
865 num_copies
= btrfs_num_copies(fs_info
, next_bytenr
,
866 state
->metablock_size
);
867 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
868 pr_info("num_copies(log_bytenr=%llu) = %d\n",
869 next_bytenr
, num_copies
);
870 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
871 struct btrfsic_block
*next_block
;
872 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
873 struct btrfsic_block_link
*l
;
875 if (btrfsic_map_block(state
, next_bytenr
,
876 state
->metablock_size
,
879 pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n",
880 next_bytenr
, mirror_num
);
885 next_block
= btrfsic_block_lookup_or_add(
886 state
, &tmp_next_block_ctx
,
887 additional_string
, 1, 1, 0,
889 if (NULL
== next_block
) {
890 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
895 next_block
->disk_key
= tmp_disk_key
;
896 next_block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
897 l
= btrfsic_block_link_lookup_or_add(
898 state
, &tmp_next_block_ctx
,
899 next_block
, superblock_tmp
,
900 BTRFSIC_GENERATION_UNKNOWN
);
901 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
908 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES
)
909 btrfsic_dump_tree_sub(state
, superblock_tmp
, 0);
916 static struct btrfsic_stack_frame
*btrfsic_stack_frame_alloc(void)
918 struct btrfsic_stack_frame
*sf
;
920 sf
= kzalloc(sizeof(*sf
), GFP_NOFS
);
922 sf
->magic
= BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER
;
926 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame
*sf
)
928 BUG_ON(!(NULL
== sf
||
929 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER
== sf
->magic
));
933 static noinline_for_stack
int btrfsic_process_metablock(
934 struct btrfsic_state
*state
,
935 struct btrfsic_block
*const first_block
,
936 struct btrfsic_block_data_ctx
*const first_block_ctx
,
937 int first_limit_nesting
, int force_iodone_flag
)
939 struct btrfsic_stack_frame initial_stack_frame
= { 0 };
940 struct btrfsic_stack_frame
*sf
;
941 struct btrfsic_stack_frame
*next_stack
;
942 struct btrfs_header
*const first_hdr
=
943 (struct btrfs_header
*)first_block_ctx
->datav
[0];
946 sf
= &initial_stack_frame
;
949 sf
->limit_nesting
= first_limit_nesting
;
950 sf
->block
= first_block
;
951 sf
->block_ctx
= first_block_ctx
;
952 sf
->next_block
= NULL
;
956 continue_with_new_stack_frame
:
957 sf
->block
->generation
= le64_to_cpu(sf
->hdr
->generation
);
958 if (0 == sf
->hdr
->level
) {
959 struct btrfs_leaf
*const leafhdr
=
960 (struct btrfs_leaf
*)sf
->hdr
;
963 sf
->nr
= btrfs_stack_header_nritems(&leafhdr
->header
);
965 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
966 pr_info("leaf %llu items %d generation %llu owner %llu\n",
967 sf
->block_ctx
->start
, sf
->nr
,
968 btrfs_stack_header_generation(
970 btrfs_stack_header_owner(
974 continue_with_current_leaf_stack_frame
:
975 if (0 == sf
->num_copies
|| sf
->mirror_num
> sf
->num_copies
) {
980 if (sf
->i
< sf
->nr
) {
981 struct btrfs_item disk_item
;
982 u32 disk_item_offset
=
983 (uintptr_t)(leafhdr
->items
+ sf
->i
) -
985 struct btrfs_disk_key
*disk_key
;
990 if (disk_item_offset
+ sizeof(struct btrfs_item
) >
991 sf
->block_ctx
->len
) {
992 leaf_item_out_of_bounce_error
:
993 pr_info("btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
994 sf
->block_ctx
->start
,
995 sf
->block_ctx
->dev
->name
);
996 goto one_stack_frame_backwards
;
998 btrfsic_read_from_block_data(sf
->block_ctx
,
1001 sizeof(struct btrfs_item
));
1002 item_offset
= btrfs_stack_item_offset(&disk_item
);
1003 item_size
= btrfs_stack_item_size(&disk_item
);
1004 disk_key
= &disk_item
.key
;
1005 type
= btrfs_disk_key_type(disk_key
);
1007 if (BTRFS_ROOT_ITEM_KEY
== type
) {
1008 struct btrfs_root_item root_item
;
1009 u32 root_item_offset
;
1012 root_item_offset
= item_offset
+
1013 offsetof(struct btrfs_leaf
, items
);
1014 if (root_item_offset
+ item_size
>
1016 goto leaf_item_out_of_bounce_error
;
1017 btrfsic_read_from_block_data(
1018 sf
->block_ctx
, &root_item
,
1021 next_bytenr
= btrfs_root_bytenr(&root_item
);
1024 btrfsic_create_link_to_next_block(
1030 &sf
->next_block_ctx
,
1036 btrfs_root_generation(
1039 goto one_stack_frame_backwards
;
1041 if (NULL
!= sf
->next_block
) {
1042 struct btrfs_header
*const next_hdr
=
1043 (struct btrfs_header
*)
1044 sf
->next_block_ctx
.datav
[0];
1047 btrfsic_stack_frame_alloc();
1048 if (NULL
== next_stack
) {
1050 btrfsic_release_block_ctx(
1053 goto one_stack_frame_backwards
;
1057 next_stack
->block
= sf
->next_block
;
1058 next_stack
->block_ctx
=
1059 &sf
->next_block_ctx
;
1060 next_stack
->next_block
= NULL
;
1061 next_stack
->hdr
= next_hdr
;
1062 next_stack
->limit_nesting
=
1063 sf
->limit_nesting
- 1;
1064 next_stack
->prev
= sf
;
1066 goto continue_with_new_stack_frame
;
1068 } else if (BTRFS_EXTENT_DATA_KEY
== type
&&
1069 state
->include_extent_data
) {
1070 sf
->error
= btrfsic_handle_extent_data(
1077 goto one_stack_frame_backwards
;
1080 goto continue_with_current_leaf_stack_frame
;
1083 struct btrfs_node
*const nodehdr
= (struct btrfs_node
*)sf
->hdr
;
1086 sf
->nr
= btrfs_stack_header_nritems(&nodehdr
->header
);
1088 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1089 pr_info("node %llu level %d items %d generation %llu owner %llu\n",
1090 sf
->block_ctx
->start
,
1091 nodehdr
->header
.level
, sf
->nr
,
1092 btrfs_stack_header_generation(
1094 btrfs_stack_header_owner(
1098 continue_with_current_node_stack_frame
:
1099 if (0 == sf
->num_copies
|| sf
->mirror_num
> sf
->num_copies
) {
1104 if (sf
->i
< sf
->nr
) {
1105 struct btrfs_key_ptr key_ptr
;
1109 key_ptr_offset
= (uintptr_t)(nodehdr
->ptrs
+ sf
->i
) -
1111 if (key_ptr_offset
+ sizeof(struct btrfs_key_ptr
) >
1112 sf
->block_ctx
->len
) {
1113 pr_info("btrfsic: node item out of bounce at logical %llu, dev %s\n",
1114 sf
->block_ctx
->start
,
1115 sf
->block_ctx
->dev
->name
);
1116 goto one_stack_frame_backwards
;
1118 btrfsic_read_from_block_data(
1119 sf
->block_ctx
, &key_ptr
, key_ptr_offset
,
1120 sizeof(struct btrfs_key_ptr
));
1121 next_bytenr
= btrfs_stack_key_blockptr(&key_ptr
);
1123 sf
->error
= btrfsic_create_link_to_next_block(
1129 &sf
->next_block_ctx
,
1135 btrfs_stack_key_generation(&key_ptr
));
1137 goto one_stack_frame_backwards
;
1139 if (NULL
!= sf
->next_block
) {
1140 struct btrfs_header
*const next_hdr
=
1141 (struct btrfs_header
*)
1142 sf
->next_block_ctx
.datav
[0];
1144 next_stack
= btrfsic_stack_frame_alloc();
1145 if (NULL
== next_stack
) {
1147 goto one_stack_frame_backwards
;
1151 next_stack
->block
= sf
->next_block
;
1152 next_stack
->block_ctx
= &sf
->next_block_ctx
;
1153 next_stack
->next_block
= NULL
;
1154 next_stack
->hdr
= next_hdr
;
1155 next_stack
->limit_nesting
=
1156 sf
->limit_nesting
- 1;
1157 next_stack
->prev
= sf
;
1159 goto continue_with_new_stack_frame
;
1162 goto continue_with_current_node_stack_frame
;
1166 one_stack_frame_backwards
:
1167 if (NULL
!= sf
->prev
) {
1168 struct btrfsic_stack_frame
*const prev
= sf
->prev
;
1170 /* the one for the initial block is freed in the caller */
1171 btrfsic_release_block_ctx(sf
->block_ctx
);
1174 prev
->error
= sf
->error
;
1175 btrfsic_stack_frame_free(sf
);
1177 goto one_stack_frame_backwards
;
1180 btrfsic_stack_frame_free(sf
);
1182 goto continue_with_new_stack_frame
;
1184 BUG_ON(&initial_stack_frame
!= sf
);
1190 static void btrfsic_read_from_block_data(
1191 struct btrfsic_block_data_ctx
*block_ctx
,
1192 void *dstv
, u32 offset
, size_t len
)
1197 char *dst
= (char *)dstv
;
1198 size_t start_offset
= offset_in_page(block_ctx
->start
);
1199 unsigned long i
= (start_offset
+ offset
) >> PAGE_SHIFT
;
1201 WARN_ON(offset
+ len
> block_ctx
->len
);
1202 pgoff
= offset_in_page(start_offset
+ offset
);
1205 cur
= min(len
, ((size_t)PAGE_SIZE
- pgoff
));
1206 BUG_ON(i
>= DIV_ROUND_UP(block_ctx
->len
, PAGE_SIZE
));
1207 kaddr
= block_ctx
->datav
[i
];
1208 memcpy(dst
, kaddr
+ pgoff
, cur
);
1217 static int btrfsic_create_link_to_next_block(
1218 struct btrfsic_state
*state
,
1219 struct btrfsic_block
*block
,
1220 struct btrfsic_block_data_ctx
*block_ctx
,
1223 struct btrfsic_block_data_ctx
*next_block_ctx
,
1224 struct btrfsic_block
**next_blockp
,
1225 int force_iodone_flag
,
1226 int *num_copiesp
, int *mirror_nump
,
1227 struct btrfs_disk_key
*disk_key
,
1228 u64 parent_generation
)
1230 struct btrfs_fs_info
*fs_info
= state
->fs_info
;
1231 struct btrfsic_block
*next_block
= NULL
;
1233 struct btrfsic_block_link
*l
;
1234 int did_alloc_block_link
;
1235 int block_was_created
;
1237 *next_blockp
= NULL
;
1238 if (0 == *num_copiesp
) {
1239 *num_copiesp
= btrfs_num_copies(fs_info
, next_bytenr
,
1240 state
->metablock_size
);
1241 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
1242 pr_info("num_copies(log_bytenr=%llu) = %d\n",
1243 next_bytenr
, *num_copiesp
);
1247 if (*mirror_nump
> *num_copiesp
)
1250 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1251 pr_info("btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1253 ret
= btrfsic_map_block(state
, next_bytenr
,
1254 state
->metablock_size
,
1255 next_block_ctx
, *mirror_nump
);
1257 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1258 next_bytenr
, *mirror_nump
);
1259 btrfsic_release_block_ctx(next_block_ctx
);
1260 *next_blockp
= NULL
;
1264 next_block
= btrfsic_block_lookup_or_add(state
,
1265 next_block_ctx
, "referenced ",
1266 1, force_iodone_flag
,
1269 &block_was_created
);
1270 if (NULL
== next_block
) {
1271 btrfsic_release_block_ctx(next_block_ctx
);
1272 *next_blockp
= NULL
;
1275 if (block_was_created
) {
1277 next_block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
1279 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
) {
1280 if (next_block
->logical_bytenr
!= next_bytenr
&&
1281 !(!next_block
->is_metadata
&&
1282 0 == next_block
->logical_bytenr
))
1283 pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1284 next_bytenr
, next_block_ctx
->dev
->name
,
1285 next_block_ctx
->dev_bytenr
, *mirror_nump
,
1286 btrfsic_get_block_type(state
,
1288 next_block
->logical_bytenr
);
1290 pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1291 next_bytenr
, next_block_ctx
->dev
->name
,
1292 next_block_ctx
->dev_bytenr
, *mirror_nump
,
1293 btrfsic_get_block_type(state
,
1296 next_block
->logical_bytenr
= next_bytenr
;
1298 next_block
->mirror_num
= *mirror_nump
;
1299 l
= btrfsic_block_link_hashtable_lookup(
1300 next_block_ctx
->dev
->bdev
,
1301 next_block_ctx
->dev_bytenr
,
1302 block_ctx
->dev
->bdev
,
1303 block_ctx
->dev_bytenr
,
1304 &state
->block_link_hashtable
);
1307 next_block
->disk_key
= *disk_key
;
1309 l
= btrfsic_block_link_alloc();
1311 btrfsic_release_block_ctx(next_block_ctx
);
1312 *next_blockp
= NULL
;
1316 did_alloc_block_link
= 1;
1317 l
->block_ref_to
= next_block
;
1318 l
->block_ref_from
= block
;
1320 l
->parent_generation
= parent_generation
;
1322 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1323 btrfsic_print_add_link(state
, l
);
1325 list_add(&l
->node_ref_to
, &block
->ref_to_list
);
1326 list_add(&l
->node_ref_from
, &next_block
->ref_from_list
);
1328 btrfsic_block_link_hashtable_add(l
,
1329 &state
->block_link_hashtable
);
1331 did_alloc_block_link
= 0;
1332 if (0 == limit_nesting
) {
1334 l
->parent_generation
= parent_generation
;
1335 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1336 btrfsic_print_add_link(state
, l
);
1340 if (limit_nesting
> 0 && did_alloc_block_link
) {
1341 ret
= btrfsic_read_block(state
, next_block_ctx
);
1342 if (ret
< (int)next_block_ctx
->len
) {
1343 pr_info("btrfsic: read block @logical %llu failed!\n",
1345 btrfsic_release_block_ctx(next_block_ctx
);
1346 *next_blockp
= NULL
;
1350 *next_blockp
= next_block
;
1352 *next_blockp
= NULL
;
1359 static int btrfsic_handle_extent_data(
1360 struct btrfsic_state
*state
,
1361 struct btrfsic_block
*block
,
1362 struct btrfsic_block_data_ctx
*block_ctx
,
1363 u32 item_offset
, int force_iodone_flag
)
1365 struct btrfs_fs_info
*fs_info
= state
->fs_info
;
1366 struct btrfs_file_extent_item file_extent_item
;
1367 u64 file_extent_item_offset
;
1371 struct btrfsic_block_link
*l
;
1374 file_extent_item_offset
= offsetof(struct btrfs_leaf
, items
) +
1376 if (file_extent_item_offset
+
1377 offsetof(struct btrfs_file_extent_item
, disk_num_bytes
) >
1379 pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
1380 block_ctx
->start
, block_ctx
->dev
->name
);
1384 btrfsic_read_from_block_data(block_ctx
, &file_extent_item
,
1385 file_extent_item_offset
,
1386 offsetof(struct btrfs_file_extent_item
, disk_num_bytes
));
1387 if (BTRFS_FILE_EXTENT_REG
!= file_extent_item
.type
||
1388 btrfs_stack_file_extent_disk_bytenr(&file_extent_item
) == 0) {
1389 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1390 pr_info("extent_data: type %u, disk_bytenr = %llu\n",
1391 file_extent_item
.type
,
1392 btrfs_stack_file_extent_disk_bytenr(
1393 &file_extent_item
));
1397 if (file_extent_item_offset
+ sizeof(struct btrfs_file_extent_item
) >
1399 pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
1400 block_ctx
->start
, block_ctx
->dev
->name
);
1403 btrfsic_read_from_block_data(block_ctx
, &file_extent_item
,
1404 file_extent_item_offset
,
1405 sizeof(struct btrfs_file_extent_item
));
1406 next_bytenr
= btrfs_stack_file_extent_disk_bytenr(&file_extent_item
);
1407 if (btrfs_stack_file_extent_compression(&file_extent_item
) ==
1408 BTRFS_COMPRESS_NONE
) {
1409 next_bytenr
+= btrfs_stack_file_extent_offset(&file_extent_item
);
1410 num_bytes
= btrfs_stack_file_extent_num_bytes(&file_extent_item
);
1412 num_bytes
= btrfs_stack_file_extent_disk_num_bytes(&file_extent_item
);
1414 generation
= btrfs_stack_file_extent_generation(&file_extent_item
);
1416 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1417 pr_info("extent_data: type %u, disk_bytenr = %llu, offset = %llu, num_bytes = %llu\n",
1418 file_extent_item
.type
,
1419 btrfs_stack_file_extent_disk_bytenr(&file_extent_item
),
1420 btrfs_stack_file_extent_offset(&file_extent_item
),
1422 while (num_bytes
> 0) {
1427 if (num_bytes
> state
->datablock_size
)
1428 chunk_len
= state
->datablock_size
;
1430 chunk_len
= num_bytes
;
1432 num_copies
= btrfs_num_copies(fs_info
, next_bytenr
,
1433 state
->datablock_size
);
1434 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
1435 pr_info("num_copies(log_bytenr=%llu) = %d\n",
1436 next_bytenr
, num_copies
);
1437 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
1438 struct btrfsic_block_data_ctx next_block_ctx
;
1439 struct btrfsic_block
*next_block
;
1440 int block_was_created
;
1442 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1443 pr_info("btrfsic_handle_extent_data(mirror_num=%d)\n",
1445 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1446 pr_info("\tdisk_bytenr = %llu, num_bytes %u\n",
1447 next_bytenr
, chunk_len
);
1448 ret
= btrfsic_map_block(state
, next_bytenr
,
1449 chunk_len
, &next_block_ctx
,
1452 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1453 next_bytenr
, mirror_num
);
1457 next_block
= btrfsic_block_lookup_or_add(
1465 &block_was_created
);
1466 if (NULL
== next_block
) {
1467 btrfsic_release_block_ctx(&next_block_ctx
);
1470 if (!block_was_created
) {
1471 if ((state
->print_mask
&
1472 BTRFSIC_PRINT_MASK_VERBOSE
) &&
1473 next_block
->logical_bytenr
!= next_bytenr
&&
1474 !(!next_block
->is_metadata
&&
1475 0 == next_block
->logical_bytenr
)) {
1476 pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n",
1478 next_block_ctx
.dev
->name
,
1479 next_block_ctx
.dev_bytenr
,
1481 next_block
->logical_bytenr
);
1483 next_block
->logical_bytenr
= next_bytenr
;
1484 next_block
->mirror_num
= mirror_num
;
1487 l
= btrfsic_block_link_lookup_or_add(state
,
1491 btrfsic_release_block_ctx(&next_block_ctx
);
1496 next_bytenr
+= chunk_len
;
1497 num_bytes
-= chunk_len
;
1503 static int btrfsic_map_block(struct btrfsic_state
*state
, u64 bytenr
, u32 len
,
1504 struct btrfsic_block_data_ctx
*block_ctx_out
,
1507 struct btrfs_fs_info
*fs_info
= state
->fs_info
;
1510 struct btrfs_bio
*multi
= NULL
;
1511 struct btrfs_device
*device
;
1514 ret
= btrfs_map_block(fs_info
, BTRFS_MAP_READ
,
1515 bytenr
, &length
, &multi
, mirror_num
);
1518 block_ctx_out
->start
= 0;
1519 block_ctx_out
->dev_bytenr
= 0;
1520 block_ctx_out
->len
= 0;
1521 block_ctx_out
->dev
= NULL
;
1522 block_ctx_out
->datav
= NULL
;
1523 block_ctx_out
->pagev
= NULL
;
1524 block_ctx_out
->mem_to_free
= NULL
;
1529 device
= multi
->stripes
[0].dev
;
1530 if (test_bit(BTRFS_DEV_STATE_MISSING
, &device
->dev_state
) ||
1531 !device
->bdev
|| !device
->name
)
1532 block_ctx_out
->dev
= NULL
;
1534 block_ctx_out
->dev
= btrfsic_dev_state_lookup(
1535 device
->bdev
->bd_dev
);
1536 block_ctx_out
->dev_bytenr
= multi
->stripes
[0].physical
;
1537 block_ctx_out
->start
= bytenr
;
1538 block_ctx_out
->len
= len
;
1539 block_ctx_out
->datav
= NULL
;
1540 block_ctx_out
->pagev
= NULL
;
1541 block_ctx_out
->mem_to_free
= NULL
;
1544 if (NULL
== block_ctx_out
->dev
) {
1546 pr_info("btrfsic: error, cannot lookup dev (#1)!\n");
1552 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx
*block_ctx
)
1554 if (block_ctx
->mem_to_free
) {
1555 unsigned int num_pages
;
1557 BUG_ON(!block_ctx
->datav
);
1558 BUG_ON(!block_ctx
->pagev
);
1559 num_pages
= (block_ctx
->len
+ (u64
)PAGE_SIZE
- 1) >>
1561 while (num_pages
> 0) {
1563 if (block_ctx
->datav
[num_pages
]) {
1564 kunmap(block_ctx
->pagev
[num_pages
]);
1565 block_ctx
->datav
[num_pages
] = NULL
;
1567 if (block_ctx
->pagev
[num_pages
]) {
1568 __free_page(block_ctx
->pagev
[num_pages
]);
1569 block_ctx
->pagev
[num_pages
] = NULL
;
1573 kfree(block_ctx
->mem_to_free
);
1574 block_ctx
->mem_to_free
= NULL
;
1575 block_ctx
->pagev
= NULL
;
1576 block_ctx
->datav
= NULL
;
1580 static int btrfsic_read_block(struct btrfsic_state
*state
,
1581 struct btrfsic_block_data_ctx
*block_ctx
)
1583 unsigned int num_pages
;
1589 BUG_ON(block_ctx
->datav
);
1590 BUG_ON(block_ctx
->pagev
);
1591 BUG_ON(block_ctx
->mem_to_free
);
1592 if (!PAGE_ALIGNED(block_ctx
->dev_bytenr
)) {
1593 pr_info("btrfsic: read_block() with unaligned bytenr %llu\n",
1594 block_ctx
->dev_bytenr
);
1598 num_pages
= (block_ctx
->len
+ (u64
)PAGE_SIZE
- 1) >>
1600 size
= sizeof(*block_ctx
->datav
) + sizeof(*block_ctx
->pagev
);
1601 block_ctx
->mem_to_free
= kcalloc(num_pages
, size
, GFP_NOFS
);
1602 if (!block_ctx
->mem_to_free
)
1604 block_ctx
->datav
= block_ctx
->mem_to_free
;
1605 block_ctx
->pagev
= (struct page
**)(block_ctx
->datav
+ num_pages
);
1606 for (i
= 0; i
< num_pages
; i
++) {
1607 block_ctx
->pagev
[i
] = alloc_page(GFP_NOFS
);
1608 if (!block_ctx
->pagev
[i
])
1612 dev_bytenr
= block_ctx
->dev_bytenr
;
1613 for (i
= 0; i
< num_pages
;) {
1617 bio
= btrfs_io_bio_alloc(num_pages
- i
);
1618 bio_set_dev(bio
, block_ctx
->dev
->bdev
);
1619 bio
->bi_iter
.bi_sector
= dev_bytenr
>> 9;
1620 bio
->bi_opf
= REQ_OP_READ
;
1622 for (j
= i
; j
< num_pages
; j
++) {
1623 ret
= bio_add_page(bio
, block_ctx
->pagev
[j
],
1625 if (PAGE_SIZE
!= ret
)
1629 pr_info("btrfsic: error, failed to add a single page!\n");
1632 if (submit_bio_wait(bio
)) {
1633 pr_info("btrfsic: read error at logical %llu dev %s!\n",
1634 block_ctx
->start
, block_ctx
->dev
->name
);
1639 dev_bytenr
+= (j
- i
) * PAGE_SIZE
;
1642 for (i
= 0; i
< num_pages
; i
++)
1643 block_ctx
->datav
[i
] = kmap(block_ctx
->pagev
[i
]);
1645 return block_ctx
->len
;
1648 static void btrfsic_dump_database(struct btrfsic_state
*state
)
1650 const struct btrfsic_block
*b_all
;
1652 BUG_ON(NULL
== state
);
1654 pr_info("all_blocks_list:\n");
1655 list_for_each_entry(b_all
, &state
->all_blocks_list
, all_blocks_node
) {
1656 const struct btrfsic_block_link
*l
;
1658 pr_info("%c-block @%llu (%s/%llu/%d)\n",
1659 btrfsic_get_block_type(state
, b_all
),
1660 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1661 b_all
->dev_bytenr
, b_all
->mirror_num
);
1663 list_for_each_entry(l
, &b_all
->ref_to_list
, node_ref_to
) {
1664 pr_info(" %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n",
1665 btrfsic_get_block_type(state
, b_all
),
1666 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1667 b_all
->dev_bytenr
, b_all
->mirror_num
,
1669 btrfsic_get_block_type(state
, l
->block_ref_to
),
1670 l
->block_ref_to
->logical_bytenr
,
1671 l
->block_ref_to
->dev_state
->name
,
1672 l
->block_ref_to
->dev_bytenr
,
1673 l
->block_ref_to
->mirror_num
);
1676 list_for_each_entry(l
, &b_all
->ref_from_list
, node_ref_from
) {
1677 pr_info(" %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
1678 btrfsic_get_block_type(state
, b_all
),
1679 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1680 b_all
->dev_bytenr
, b_all
->mirror_num
,
1682 btrfsic_get_block_type(state
, l
->block_ref_from
),
1683 l
->block_ref_from
->logical_bytenr
,
1684 l
->block_ref_from
->dev_state
->name
,
1685 l
->block_ref_from
->dev_bytenr
,
1686 l
->block_ref_from
->mirror_num
);
1694 * Test whether the disk block contains a tree block (leaf or node)
1695 * (note that this test fails for the super block)
1697 static noinline_for_stack
int btrfsic_test_for_metadata(
1698 struct btrfsic_state
*state
,
1699 char **datav
, unsigned int num_pages
)
1701 struct btrfs_fs_info
*fs_info
= state
->fs_info
;
1702 SHASH_DESC_ON_STACK(shash
, fs_info
->csum_shash
);
1703 struct btrfs_header
*h
;
1704 u8 csum
[BTRFS_CSUM_SIZE
];
1707 if (num_pages
* PAGE_SIZE
< state
->metablock_size
)
1708 return 1; /* not metadata */
1709 num_pages
= state
->metablock_size
>> PAGE_SHIFT
;
1710 h
= (struct btrfs_header
*)datav
[0];
1712 if (memcmp(h
->fsid
, fs_info
->fs_devices
->fsid
, BTRFS_FSID_SIZE
))
1715 shash
->tfm
= fs_info
->csum_shash
;
1716 crypto_shash_init(shash
);
1718 for (i
= 0; i
< num_pages
; i
++) {
1719 u8
*data
= i
? datav
[i
] : (datav
[i
] + BTRFS_CSUM_SIZE
);
1720 size_t sublen
= i
? PAGE_SIZE
:
1721 (PAGE_SIZE
- BTRFS_CSUM_SIZE
);
1723 crypto_shash_update(shash
, data
, sublen
);
1725 crypto_shash_final(shash
, csum
);
1726 if (memcmp(csum
, h
->csum
, state
->csum_size
))
1729 return 0; /* is metadata */
1732 static void btrfsic_process_written_block(struct btrfsic_dev_state
*dev_state
,
1733 u64 dev_bytenr
, char **mapped_datav
,
1734 unsigned int num_pages
,
1735 struct bio
*bio
, int *bio_is_patched
,
1736 int submit_bio_bh_rw
)
1739 struct btrfsic_block
*block
;
1740 struct btrfsic_block_data_ctx block_ctx
;
1742 struct btrfsic_state
*state
= dev_state
->state
;
1743 struct block_device
*bdev
= dev_state
->bdev
;
1744 unsigned int processed_len
;
1746 if (NULL
!= bio_is_patched
)
1747 *bio_is_patched
= 0;
1754 is_metadata
= (0 == btrfsic_test_for_metadata(state
, mapped_datav
,
1757 block
= btrfsic_block_hashtable_lookup(bdev
, dev_bytenr
,
1758 &state
->block_hashtable
);
1759 if (NULL
!= block
) {
1761 struct btrfsic_block_link
*l
, *tmp
;
1763 if (block
->is_superblock
) {
1764 bytenr
= btrfs_super_bytenr((struct btrfs_super_block
*)
1766 if (num_pages
* PAGE_SIZE
<
1767 BTRFS_SUPER_INFO_SIZE
) {
1768 pr_info("btrfsic: cannot work with too short bios!\n");
1772 BUG_ON(!PAGE_ALIGNED(BTRFS_SUPER_INFO_SIZE
));
1773 processed_len
= BTRFS_SUPER_INFO_SIZE
;
1774 if (state
->print_mask
&
1775 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE
) {
1776 pr_info("[before new superblock is written]:\n");
1777 btrfsic_dump_tree_sub(state
, block
, 0);
1781 if (!block
->is_superblock
) {
1782 if (num_pages
* PAGE_SIZE
<
1783 state
->metablock_size
) {
1784 pr_info("btrfsic: cannot work with too short bios!\n");
1787 processed_len
= state
->metablock_size
;
1788 bytenr
= btrfs_stack_header_bytenr(
1789 (struct btrfs_header
*)
1791 btrfsic_cmp_log_and_dev_bytenr(state
, bytenr
,
1795 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
) {
1796 if (block
->logical_bytenr
!= bytenr
&&
1797 !(!block
->is_metadata
&&
1798 block
->logical_bytenr
== 0))
1799 pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1800 bytenr
, dev_state
->name
,
1803 btrfsic_get_block_type(state
,
1805 block
->logical_bytenr
);
1807 pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1808 bytenr
, dev_state
->name
,
1809 dev_bytenr
, block
->mirror_num
,
1810 btrfsic_get_block_type(state
,
1813 block
->logical_bytenr
= bytenr
;
1815 if (num_pages
* PAGE_SIZE
<
1816 state
->datablock_size
) {
1817 pr_info("btrfsic: cannot work with too short bios!\n");
1820 processed_len
= state
->datablock_size
;
1821 bytenr
= block
->logical_bytenr
;
1822 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1823 pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1824 bytenr
, dev_state
->name
, dev_bytenr
,
1826 btrfsic_get_block_type(state
, block
));
1829 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1830 pr_info("ref_to_list: %cE, ref_from_list: %cE\n",
1831 list_empty(&block
->ref_to_list
) ? ' ' : '!',
1832 list_empty(&block
->ref_from_list
) ? ' ' : '!');
1833 if (btrfsic_is_block_ref_by_superblock(state
, block
, 0)) {
1834 pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
1835 btrfsic_get_block_type(state
, block
), bytenr
,
1836 dev_state
->name
, dev_bytenr
, block
->mirror_num
,
1838 btrfs_disk_key_objectid(&block
->disk_key
),
1839 block
->disk_key
.type
,
1840 btrfs_disk_key_offset(&block
->disk_key
),
1841 btrfs_stack_header_generation(
1842 (struct btrfs_header
*) mapped_datav
[0]),
1843 state
->max_superblock_generation
);
1844 btrfsic_dump_tree(state
);
1847 if (!block
->is_iodone
&& !block
->never_written
) {
1848 pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
1849 btrfsic_get_block_type(state
, block
), bytenr
,
1850 dev_state
->name
, dev_bytenr
, block
->mirror_num
,
1852 btrfs_stack_header_generation(
1853 (struct btrfs_header
*)
1855 /* it would not be safe to go on */
1856 btrfsic_dump_tree(state
);
1861 * Clear all references of this block. Do not free
1862 * the block itself even if is not referenced anymore
1863 * because it still carries valuable information
1864 * like whether it was ever written and IO completed.
1866 list_for_each_entry_safe(l
, tmp
, &block
->ref_to_list
,
1868 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1869 btrfsic_print_rem_link(state
, l
);
1871 if (0 == l
->ref_cnt
) {
1872 list_del(&l
->node_ref_to
);
1873 list_del(&l
->node_ref_from
);
1874 btrfsic_block_link_hashtable_remove(l
);
1875 btrfsic_block_link_free(l
);
1879 block_ctx
.dev
= dev_state
;
1880 block_ctx
.dev_bytenr
= dev_bytenr
;
1881 block_ctx
.start
= bytenr
;
1882 block_ctx
.len
= processed_len
;
1883 block_ctx
.pagev
= NULL
;
1884 block_ctx
.mem_to_free
= NULL
;
1885 block_ctx
.datav
= mapped_datav
;
1887 if (is_metadata
|| state
->include_extent_data
) {
1888 block
->never_written
= 0;
1889 block
->iodone_w_error
= 0;
1891 block
->is_iodone
= 0;
1892 BUG_ON(NULL
== bio_is_patched
);
1893 if (!*bio_is_patched
) {
1894 block
->orig_bio_private
=
1896 block
->orig_bio_end_io
=
1898 block
->next_in_same_bio
= NULL
;
1899 bio
->bi_private
= block
;
1900 bio
->bi_end_io
= btrfsic_bio_end_io
;
1901 *bio_is_patched
= 1;
1903 struct btrfsic_block
*chained_block
=
1904 (struct btrfsic_block
*)
1907 BUG_ON(NULL
== chained_block
);
1908 block
->orig_bio_private
=
1909 chained_block
->orig_bio_private
;
1910 block
->orig_bio_end_io
=
1911 chained_block
->orig_bio_end_io
;
1912 block
->next_in_same_bio
= chained_block
;
1913 bio
->bi_private
= block
;
1916 block
->is_iodone
= 1;
1917 block
->orig_bio_private
= NULL
;
1918 block
->orig_bio_end_io
= NULL
;
1919 block
->next_in_same_bio
= NULL
;
1923 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
1924 block
->submit_bio_bh_rw
= submit_bio_bh_rw
;
1926 block
->logical_bytenr
= bytenr
;
1927 block
->is_metadata
= 1;
1928 if (block
->is_superblock
) {
1930 BTRFS_SUPER_INFO_SIZE
);
1931 ret
= btrfsic_process_written_superblock(
1934 (struct btrfs_super_block
*)
1936 if (state
->print_mask
&
1937 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE
) {
1938 pr_info("[after new superblock is written]:\n");
1939 btrfsic_dump_tree_sub(state
, block
, 0);
1942 block
->mirror_num
= 0; /* unknown */
1943 ret
= btrfsic_process_metablock(
1950 pr_info("btrfsic: btrfsic_process_metablock(root @%llu) failed!\n",
1953 block
->is_metadata
= 0;
1954 block
->mirror_num
= 0; /* unknown */
1955 block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
1956 if (!state
->include_extent_data
1957 && list_empty(&block
->ref_from_list
)) {
1959 * disk block is overwritten with extent
1960 * data (not meta data) and we are configured
1961 * to not include extent data: take the
1962 * chance and free the block's memory
1964 btrfsic_block_hashtable_remove(block
);
1965 list_del(&block
->all_blocks_node
);
1966 btrfsic_block_free(block
);
1969 btrfsic_release_block_ctx(&block_ctx
);
1971 /* block has not been found in hash table */
1975 processed_len
= state
->datablock_size
;
1976 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1977 pr_info("Written block (%s/%llu/?) !found in hash table, D.\n",
1978 dev_state
->name
, dev_bytenr
);
1979 if (!state
->include_extent_data
) {
1980 /* ignore that written D block */
1984 /* this is getting ugly for the
1985 * include_extent_data case... */
1986 bytenr
= 0; /* unknown */
1988 processed_len
= state
->metablock_size
;
1989 bytenr
= btrfs_stack_header_bytenr(
1990 (struct btrfs_header
*)
1992 btrfsic_cmp_log_and_dev_bytenr(state
, bytenr
, dev_state
,
1994 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1995 pr_info("Written block @%llu (%s/%llu/?) !found in hash table, M.\n",
1996 bytenr
, dev_state
->name
, dev_bytenr
);
1999 block_ctx
.dev
= dev_state
;
2000 block_ctx
.dev_bytenr
= dev_bytenr
;
2001 block_ctx
.start
= bytenr
;
2002 block_ctx
.len
= processed_len
;
2003 block_ctx
.pagev
= NULL
;
2004 block_ctx
.mem_to_free
= NULL
;
2005 block_ctx
.datav
= mapped_datav
;
2007 block
= btrfsic_block_alloc();
2008 if (NULL
== block
) {
2009 btrfsic_release_block_ctx(&block_ctx
);
2012 block
->dev_state
= dev_state
;
2013 block
->dev_bytenr
= dev_bytenr
;
2014 block
->logical_bytenr
= bytenr
;
2015 block
->is_metadata
= is_metadata
;
2016 block
->never_written
= 0;
2017 block
->iodone_w_error
= 0;
2018 block
->mirror_num
= 0; /* unknown */
2019 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2020 block
->submit_bio_bh_rw
= submit_bio_bh_rw
;
2022 block
->is_iodone
= 0;
2023 BUG_ON(NULL
== bio_is_patched
);
2024 if (!*bio_is_patched
) {
2025 block
->orig_bio_private
= bio
->bi_private
;
2026 block
->orig_bio_end_io
= bio
->bi_end_io
;
2027 block
->next_in_same_bio
= NULL
;
2028 bio
->bi_private
= block
;
2029 bio
->bi_end_io
= btrfsic_bio_end_io
;
2030 *bio_is_patched
= 1;
2032 struct btrfsic_block
*chained_block
=
2033 (struct btrfsic_block
*)
2036 BUG_ON(NULL
== chained_block
);
2037 block
->orig_bio_private
=
2038 chained_block
->orig_bio_private
;
2039 block
->orig_bio_end_io
=
2040 chained_block
->orig_bio_end_io
;
2041 block
->next_in_same_bio
= chained_block
;
2042 bio
->bi_private
= block
;
2045 block
->is_iodone
= 1;
2046 block
->orig_bio_private
= NULL
;
2047 block
->orig_bio_end_io
= NULL
;
2048 block
->next_in_same_bio
= NULL
;
2050 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2051 pr_info("New written %c-block @%llu (%s/%llu/%d)\n",
2052 is_metadata
? 'M' : 'D',
2053 block
->logical_bytenr
, block
->dev_state
->name
,
2054 block
->dev_bytenr
, block
->mirror_num
);
2055 list_add(&block
->all_blocks_node
, &state
->all_blocks_list
);
2056 btrfsic_block_hashtable_add(block
, &state
->block_hashtable
);
2059 ret
= btrfsic_process_metablock(state
, block
,
2062 pr_info("btrfsic: process_metablock(root @%llu) failed!\n",
2065 btrfsic_release_block_ctx(&block_ctx
);
2069 BUG_ON(!processed_len
);
2070 dev_bytenr
+= processed_len
;
2071 mapped_datav
+= processed_len
>> PAGE_SHIFT
;
2072 num_pages
-= processed_len
>> PAGE_SHIFT
;
2076 static void btrfsic_bio_end_io(struct bio
*bp
)
2078 struct btrfsic_block
*block
= (struct btrfsic_block
*)bp
->bi_private
;
2081 /* mutex is not held! This is not save if IO is not yet completed
2087 BUG_ON(NULL
== block
);
2088 bp
->bi_private
= block
->orig_bio_private
;
2089 bp
->bi_end_io
= block
->orig_bio_end_io
;
2092 struct btrfsic_block
*next_block
;
2093 struct btrfsic_dev_state
*const dev_state
= block
->dev_state
;
2095 if ((dev_state
->state
->print_mask
&
2096 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2097 pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2099 btrfsic_get_block_type(dev_state
->state
, block
),
2100 block
->logical_bytenr
, dev_state
->name
,
2101 block
->dev_bytenr
, block
->mirror_num
);
2102 next_block
= block
->next_in_same_bio
;
2103 block
->iodone_w_error
= iodone_w_error
;
2104 if (block
->submit_bio_bh_rw
& REQ_PREFLUSH
) {
2105 dev_state
->last_flush_gen
++;
2106 if ((dev_state
->state
->print_mask
&
2107 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2108 pr_info("bio_end_io() new %s flush_gen=%llu\n",
2110 dev_state
->last_flush_gen
);
2112 if (block
->submit_bio_bh_rw
& REQ_FUA
)
2113 block
->flush_gen
= 0; /* FUA completed means block is
2115 block
->is_iodone
= 1; /* for FLUSH, this releases the block */
2117 } while (NULL
!= block
);
2122 static int btrfsic_process_written_superblock(
2123 struct btrfsic_state
*state
,
2124 struct btrfsic_block
*const superblock
,
2125 struct btrfs_super_block
*const super_hdr
)
2127 struct btrfs_fs_info
*fs_info
= state
->fs_info
;
2130 superblock
->generation
= btrfs_super_generation(super_hdr
);
2131 if (!(superblock
->generation
> state
->max_superblock_generation
||
2132 0 == state
->max_superblock_generation
)) {
2133 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
2134 pr_info("btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n",
2135 superblock
->logical_bytenr
,
2136 superblock
->dev_state
->name
,
2137 superblock
->dev_bytenr
, superblock
->mirror_num
,
2138 btrfs_super_generation(super_hdr
),
2139 state
->max_superblock_generation
);
2141 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
2142 pr_info("btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n",
2143 superblock
->logical_bytenr
,
2144 superblock
->dev_state
->name
,
2145 superblock
->dev_bytenr
, superblock
->mirror_num
,
2146 btrfs_super_generation(super_hdr
),
2147 state
->max_superblock_generation
);
2149 state
->max_superblock_generation
=
2150 btrfs_super_generation(super_hdr
);
2151 state
->latest_superblock
= superblock
;
2154 for (pass
= 0; pass
< 3; pass
++) {
2157 struct btrfsic_block
*next_block
;
2158 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
2159 struct btrfsic_block_link
*l
;
2162 const char *additional_string
= NULL
;
2163 struct btrfs_disk_key tmp_disk_key
= {0};
2165 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2166 BTRFS_ROOT_ITEM_KEY
);
2167 btrfs_set_disk_key_objectid(&tmp_disk_key
, 0);
2171 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2172 BTRFS_ROOT_TREE_OBJECTID
);
2173 additional_string
= "root ";
2174 next_bytenr
= btrfs_super_root(super_hdr
);
2175 if (state
->print_mask
&
2176 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2177 pr_info("root@%llu\n", next_bytenr
);
2180 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2181 BTRFS_CHUNK_TREE_OBJECTID
);
2182 additional_string
= "chunk ";
2183 next_bytenr
= btrfs_super_chunk_root(super_hdr
);
2184 if (state
->print_mask
&
2185 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2186 pr_info("chunk@%llu\n", next_bytenr
);
2189 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2190 BTRFS_TREE_LOG_OBJECTID
);
2191 additional_string
= "log ";
2192 next_bytenr
= btrfs_super_log_root(super_hdr
);
2193 if (0 == next_bytenr
)
2195 if (state
->print_mask
&
2196 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2197 pr_info("log@%llu\n", next_bytenr
);
2201 num_copies
= btrfs_num_copies(fs_info
, next_bytenr
,
2202 BTRFS_SUPER_INFO_SIZE
);
2203 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
2204 pr_info("num_copies(log_bytenr=%llu) = %d\n",
2205 next_bytenr
, num_copies
);
2206 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2209 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2210 pr_info("btrfsic_process_written_superblock(mirror_num=%d)\n", mirror_num
);
2211 ret
= btrfsic_map_block(state
, next_bytenr
,
2212 BTRFS_SUPER_INFO_SIZE
,
2213 &tmp_next_block_ctx
,
2216 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
2217 next_bytenr
, mirror_num
);
2221 next_block
= btrfsic_block_lookup_or_add(
2223 &tmp_next_block_ctx
,
2228 if (NULL
== next_block
) {
2229 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
2233 next_block
->disk_key
= tmp_disk_key
;
2235 next_block
->generation
=
2236 BTRFSIC_GENERATION_UNKNOWN
;
2237 l
= btrfsic_block_link_lookup_or_add(
2239 &tmp_next_block_ctx
,
2242 BTRFSIC_GENERATION_UNKNOWN
);
2243 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
2249 if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state
, superblock
, 0)))
2250 btrfsic_dump_tree(state
);
2255 static int btrfsic_check_all_ref_blocks(struct btrfsic_state
*state
,
2256 struct btrfsic_block
*const block
,
2257 int recursion_level
)
2259 const struct btrfsic_block_link
*l
;
2262 if (recursion_level
>= 3 + BTRFS_MAX_LEVEL
) {
2264 * Note that this situation can happen and does not
2265 * indicate an error in regular cases. It happens
2266 * when disk blocks are freed and later reused.
2267 * The check-integrity module is not aware of any
2268 * block free operations, it just recognizes block
2269 * write operations. Therefore it keeps the linkage
2270 * information for a block until a block is
2271 * rewritten. This can temporarily cause incorrect
2272 * and even circular linkage information. This
2273 * causes no harm unless such blocks are referenced
2274 * by the most recent super block.
2276 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2277 pr_info("btrfsic: abort cyclic linkage (case 1).\n");
2283 * This algorithm is recursive because the amount of used stack
2284 * space is very small and the max recursion depth is limited.
2286 list_for_each_entry(l
, &block
->ref_to_list
, node_ref_to
) {
2287 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2288 pr_info("rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n",
2290 btrfsic_get_block_type(state
, block
),
2291 block
->logical_bytenr
, block
->dev_state
->name
,
2292 block
->dev_bytenr
, block
->mirror_num
,
2294 btrfsic_get_block_type(state
, l
->block_ref_to
),
2295 l
->block_ref_to
->logical_bytenr
,
2296 l
->block_ref_to
->dev_state
->name
,
2297 l
->block_ref_to
->dev_bytenr
,
2298 l
->block_ref_to
->mirror_num
);
2299 if (l
->block_ref_to
->never_written
) {
2300 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n",
2301 btrfsic_get_block_type(state
, l
->block_ref_to
),
2302 l
->block_ref_to
->logical_bytenr
,
2303 l
->block_ref_to
->dev_state
->name
,
2304 l
->block_ref_to
->dev_bytenr
,
2305 l
->block_ref_to
->mirror_num
);
2307 } else if (!l
->block_ref_to
->is_iodone
) {
2308 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n",
2309 btrfsic_get_block_type(state
, l
->block_ref_to
),
2310 l
->block_ref_to
->logical_bytenr
,
2311 l
->block_ref_to
->dev_state
->name
,
2312 l
->block_ref_to
->dev_bytenr
,
2313 l
->block_ref_to
->mirror_num
);
2315 } else if (l
->block_ref_to
->iodone_w_error
) {
2316 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n",
2317 btrfsic_get_block_type(state
, l
->block_ref_to
),
2318 l
->block_ref_to
->logical_bytenr
,
2319 l
->block_ref_to
->dev_state
->name
,
2320 l
->block_ref_to
->dev_bytenr
,
2321 l
->block_ref_to
->mirror_num
);
2323 } else if (l
->parent_generation
!=
2324 l
->block_ref_to
->generation
&&
2325 BTRFSIC_GENERATION_UNKNOWN
!=
2326 l
->parent_generation
&&
2327 BTRFSIC_GENERATION_UNKNOWN
!=
2328 l
->block_ref_to
->generation
) {
2329 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n",
2330 btrfsic_get_block_type(state
, l
->block_ref_to
),
2331 l
->block_ref_to
->logical_bytenr
,
2332 l
->block_ref_to
->dev_state
->name
,
2333 l
->block_ref_to
->dev_bytenr
,
2334 l
->block_ref_to
->mirror_num
,
2335 l
->block_ref_to
->generation
,
2336 l
->parent_generation
);
2338 } else if (l
->block_ref_to
->flush_gen
>
2339 l
->block_ref_to
->dev_state
->last_flush_gen
) {
2340 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
2341 btrfsic_get_block_type(state
, l
->block_ref_to
),
2342 l
->block_ref_to
->logical_bytenr
,
2343 l
->block_ref_to
->dev_state
->name
,
2344 l
->block_ref_to
->dev_bytenr
,
2345 l
->block_ref_to
->mirror_num
, block
->flush_gen
,
2346 l
->block_ref_to
->dev_state
->last_flush_gen
);
2348 } else if (-1 == btrfsic_check_all_ref_blocks(state
,
2359 static int btrfsic_is_block_ref_by_superblock(
2360 const struct btrfsic_state
*state
,
2361 const struct btrfsic_block
*block
,
2362 int recursion_level
)
2364 const struct btrfsic_block_link
*l
;
2366 if (recursion_level
>= 3 + BTRFS_MAX_LEVEL
) {
2367 /* refer to comment at "abort cyclic linkage (case 1)" */
2368 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2369 pr_info("btrfsic: abort cyclic linkage (case 2).\n");
2375 * This algorithm is recursive because the amount of used stack space
2376 * is very small and the max recursion depth is limited.
2378 list_for_each_entry(l
, &block
->ref_from_list
, node_ref_from
) {
2379 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2380 pr_info("rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
2382 btrfsic_get_block_type(state
, block
),
2383 block
->logical_bytenr
, block
->dev_state
->name
,
2384 block
->dev_bytenr
, block
->mirror_num
,
2386 btrfsic_get_block_type(state
, l
->block_ref_from
),
2387 l
->block_ref_from
->logical_bytenr
,
2388 l
->block_ref_from
->dev_state
->name
,
2389 l
->block_ref_from
->dev_bytenr
,
2390 l
->block_ref_from
->mirror_num
);
2391 if (l
->block_ref_from
->is_superblock
&&
2392 state
->latest_superblock
->dev_bytenr
==
2393 l
->block_ref_from
->dev_bytenr
&&
2394 state
->latest_superblock
->dev_state
->bdev
==
2395 l
->block_ref_from
->dev_state
->bdev
)
2397 else if (btrfsic_is_block_ref_by_superblock(state
,
2407 static void btrfsic_print_add_link(const struct btrfsic_state
*state
,
2408 const struct btrfsic_block_link
*l
)
2410 pr_info("Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
2412 btrfsic_get_block_type(state
, l
->block_ref_from
),
2413 l
->block_ref_from
->logical_bytenr
,
2414 l
->block_ref_from
->dev_state
->name
,
2415 l
->block_ref_from
->dev_bytenr
, l
->block_ref_from
->mirror_num
,
2416 btrfsic_get_block_type(state
, l
->block_ref_to
),
2417 l
->block_ref_to
->logical_bytenr
,
2418 l
->block_ref_to
->dev_state
->name
, l
->block_ref_to
->dev_bytenr
,
2419 l
->block_ref_to
->mirror_num
);
2422 static void btrfsic_print_rem_link(const struct btrfsic_state
*state
,
2423 const struct btrfsic_block_link
*l
)
2425 pr_info("Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
2427 btrfsic_get_block_type(state
, l
->block_ref_from
),
2428 l
->block_ref_from
->logical_bytenr
,
2429 l
->block_ref_from
->dev_state
->name
,
2430 l
->block_ref_from
->dev_bytenr
, l
->block_ref_from
->mirror_num
,
2431 btrfsic_get_block_type(state
, l
->block_ref_to
),
2432 l
->block_ref_to
->logical_bytenr
,
2433 l
->block_ref_to
->dev_state
->name
, l
->block_ref_to
->dev_bytenr
,
2434 l
->block_ref_to
->mirror_num
);
2437 static char btrfsic_get_block_type(const struct btrfsic_state
*state
,
2438 const struct btrfsic_block
*block
)
2440 if (block
->is_superblock
&&
2441 state
->latest_superblock
->dev_bytenr
== block
->dev_bytenr
&&
2442 state
->latest_superblock
->dev_state
->bdev
== block
->dev_state
->bdev
)
2444 else if (block
->is_superblock
)
2446 else if (block
->is_metadata
)
2452 static void btrfsic_dump_tree(const struct btrfsic_state
*state
)
2454 btrfsic_dump_tree_sub(state
, state
->latest_superblock
, 0);
2457 static void btrfsic_dump_tree_sub(const struct btrfsic_state
*state
,
2458 const struct btrfsic_block
*block
,
2461 const struct btrfsic_block_link
*l
;
2463 static char buf
[80];
2464 int cursor_position
;
2467 * Should better fill an on-stack buffer with a complete line and
2468 * dump it at once when it is time to print a newline character.
2472 * This algorithm is recursive because the amount of used stack space
2473 * is very small and the max recursion depth is limited.
2475 indent_add
= sprintf(buf
, "%c-%llu(%s/%llu/%u)",
2476 btrfsic_get_block_type(state
, block
),
2477 block
->logical_bytenr
, block
->dev_state
->name
,
2478 block
->dev_bytenr
, block
->mirror_num
);
2479 if (indent_level
+ indent_add
> BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL
) {
2484 indent_level
+= indent_add
;
2485 if (list_empty(&block
->ref_to_list
)) {
2489 if (block
->mirror_num
> 1 &&
2490 !(state
->print_mask
& BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS
)) {
2495 cursor_position
= indent_level
;
2496 list_for_each_entry(l
, &block
->ref_to_list
, node_ref_to
) {
2497 while (cursor_position
< indent_level
) {
2502 indent_add
= sprintf(buf
, " %d*--> ", l
->ref_cnt
);
2504 indent_add
= sprintf(buf
, " --> ");
2505 if (indent_level
+ indent_add
>
2506 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL
) {
2508 cursor_position
= 0;
2514 btrfsic_dump_tree_sub(state
, l
->block_ref_to
,
2515 indent_level
+ indent_add
);
2516 cursor_position
= 0;
2520 static struct btrfsic_block_link
*btrfsic_block_link_lookup_or_add(
2521 struct btrfsic_state
*state
,
2522 struct btrfsic_block_data_ctx
*next_block_ctx
,
2523 struct btrfsic_block
*next_block
,
2524 struct btrfsic_block
*from_block
,
2525 u64 parent_generation
)
2527 struct btrfsic_block_link
*l
;
2529 l
= btrfsic_block_link_hashtable_lookup(next_block_ctx
->dev
->bdev
,
2530 next_block_ctx
->dev_bytenr
,
2531 from_block
->dev_state
->bdev
,
2532 from_block
->dev_bytenr
,
2533 &state
->block_link_hashtable
);
2535 l
= btrfsic_block_link_alloc();
2539 l
->block_ref_to
= next_block
;
2540 l
->block_ref_from
= from_block
;
2542 l
->parent_generation
= parent_generation
;
2544 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2545 btrfsic_print_add_link(state
, l
);
2547 list_add(&l
->node_ref_to
, &from_block
->ref_to_list
);
2548 list_add(&l
->node_ref_from
, &next_block
->ref_from_list
);
2550 btrfsic_block_link_hashtable_add(l
,
2551 &state
->block_link_hashtable
);
2554 l
->parent_generation
= parent_generation
;
2555 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2556 btrfsic_print_add_link(state
, l
);
2562 static struct btrfsic_block
*btrfsic_block_lookup_or_add(
2563 struct btrfsic_state
*state
,
2564 struct btrfsic_block_data_ctx
*block_ctx
,
2565 const char *additional_string
,
2572 struct btrfsic_block
*block
;
2574 block
= btrfsic_block_hashtable_lookup(block_ctx
->dev
->bdev
,
2575 block_ctx
->dev_bytenr
,
2576 &state
->block_hashtable
);
2577 if (NULL
== block
) {
2578 struct btrfsic_dev_state
*dev_state
;
2580 block
= btrfsic_block_alloc();
2584 dev_state
= btrfsic_dev_state_lookup(block_ctx
->dev
->bdev
->bd_dev
);
2585 if (NULL
== dev_state
) {
2586 pr_info("btrfsic: error, lookup dev_state failed!\n");
2587 btrfsic_block_free(block
);
2590 block
->dev_state
= dev_state
;
2591 block
->dev_bytenr
= block_ctx
->dev_bytenr
;
2592 block
->logical_bytenr
= block_ctx
->start
;
2593 block
->is_metadata
= is_metadata
;
2594 block
->is_iodone
= is_iodone
;
2595 block
->never_written
= never_written
;
2596 block
->mirror_num
= mirror_num
;
2597 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2598 pr_info("New %s%c-block @%llu (%s/%llu/%d)\n",
2600 btrfsic_get_block_type(state
, block
),
2601 block
->logical_bytenr
, dev_state
->name
,
2602 block
->dev_bytenr
, mirror_num
);
2603 list_add(&block
->all_blocks_node
, &state
->all_blocks_list
);
2604 btrfsic_block_hashtable_add(block
, &state
->block_hashtable
);
2605 if (NULL
!= was_created
)
2608 if (NULL
!= was_created
)
2615 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state
*state
,
2617 struct btrfsic_dev_state
*dev_state
,
2620 struct btrfs_fs_info
*fs_info
= state
->fs_info
;
2621 struct btrfsic_block_data_ctx block_ctx
;
2627 num_copies
= btrfs_num_copies(fs_info
, bytenr
, state
->metablock_size
);
2629 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2630 ret
= btrfsic_map_block(state
, bytenr
, state
->metablock_size
,
2631 &block_ctx
, mirror_num
);
2633 pr_info("btrfsic: btrfsic_map_block(logical @%llu, mirror %d) failed!\n",
2634 bytenr
, mirror_num
);
2638 if (dev_state
->bdev
== block_ctx
.dev
->bdev
&&
2639 dev_bytenr
== block_ctx
.dev_bytenr
) {
2641 btrfsic_release_block_ctx(&block_ctx
);
2644 btrfsic_release_block_ctx(&block_ctx
);
2647 if (WARN_ON(!match
)) {
2648 pr_info("btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n",
2649 bytenr
, dev_state
->name
, dev_bytenr
);
2650 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2651 ret
= btrfsic_map_block(state
, bytenr
,
2652 state
->metablock_size
,
2653 &block_ctx
, mirror_num
);
2657 pr_info("Read logical bytenr @%llu maps to (%s/%llu/%d)\n",
2658 bytenr
, block_ctx
.dev
->name
,
2659 block_ctx
.dev_bytenr
, mirror_num
);
2664 static struct btrfsic_dev_state
*btrfsic_dev_state_lookup(dev_t dev
)
2666 return btrfsic_dev_state_hashtable_lookup(dev
,
2667 &btrfsic_dev_state_hashtable
);
2670 static void __btrfsic_submit_bio(struct bio
*bio
)
2672 struct btrfsic_dev_state
*dev_state
;
2674 if (!btrfsic_is_initialized
)
2677 mutex_lock(&btrfsic_mutex
);
2678 /* since btrfsic_submit_bio() is also called before
2679 * btrfsic_mount(), this might return NULL */
2680 dev_state
= btrfsic_dev_state_lookup(bio_dev(bio
) + bio
->bi_partno
);
2681 if (NULL
!= dev_state
&&
2682 (bio_op(bio
) == REQ_OP_WRITE
) && bio_has_data(bio
)) {
2686 struct bio_vec bvec
;
2687 struct bvec_iter iter
;
2689 char **mapped_datav
;
2690 unsigned int segs
= bio_segments(bio
);
2692 dev_bytenr
= 512 * bio
->bi_iter
.bi_sector
;
2694 if (dev_state
->state
->print_mask
&
2695 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2696 pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n",
2697 bio_op(bio
), bio
->bi_opf
, segs
,
2698 (unsigned long long)bio
->bi_iter
.bi_sector
,
2699 dev_bytenr
, bio
->bi_disk
);
2701 mapped_datav
= kmalloc_array(segs
,
2702 sizeof(*mapped_datav
), GFP_NOFS
);
2705 cur_bytenr
= dev_bytenr
;
2707 bio_for_each_segment(bvec
, bio
, iter
) {
2708 BUG_ON(bvec
.bv_len
!= PAGE_SIZE
);
2709 mapped_datav
[i
] = kmap(bvec
.bv_page
);
2712 if (dev_state
->state
->print_mask
&
2713 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE
)
2714 pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n",
2715 i
, cur_bytenr
, bvec
.bv_len
, bvec
.bv_offset
);
2716 cur_bytenr
+= bvec
.bv_len
;
2718 btrfsic_process_written_block(dev_state
, dev_bytenr
,
2720 bio
, &bio_is_patched
,
2722 bio_for_each_segment(bvec
, bio
, iter
)
2723 kunmap(bvec
.bv_page
);
2724 kfree(mapped_datav
);
2725 } else if (NULL
!= dev_state
&& (bio
->bi_opf
& REQ_PREFLUSH
)) {
2726 if (dev_state
->state
->print_mask
&
2727 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2728 pr_info("submit_bio(rw=%d,0x%x FLUSH, disk=%p)\n",
2729 bio_op(bio
), bio
->bi_opf
, bio
->bi_disk
);
2730 if (!dev_state
->dummy_block_for_bio_bh_flush
.is_iodone
) {
2731 if ((dev_state
->state
->print_mask
&
2732 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
|
2733 BTRFSIC_PRINT_MASK_VERBOSE
)))
2734 pr_info("btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n",
2737 struct btrfsic_block
*const block
=
2738 &dev_state
->dummy_block_for_bio_bh_flush
;
2740 block
->is_iodone
= 0;
2741 block
->never_written
= 0;
2742 block
->iodone_w_error
= 0;
2743 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2744 block
->submit_bio_bh_rw
= bio
->bi_opf
;
2745 block
->orig_bio_private
= bio
->bi_private
;
2746 block
->orig_bio_end_io
= bio
->bi_end_io
;
2747 block
->next_in_same_bio
= NULL
;
2748 bio
->bi_private
= block
;
2749 bio
->bi_end_io
= btrfsic_bio_end_io
;
2753 mutex_unlock(&btrfsic_mutex
);
2756 void btrfsic_submit_bio(struct bio
*bio
)
2758 __btrfsic_submit_bio(bio
);
2762 int btrfsic_submit_bio_wait(struct bio
*bio
)
2764 __btrfsic_submit_bio(bio
);
2765 return submit_bio_wait(bio
);
2768 int btrfsic_mount(struct btrfs_fs_info
*fs_info
,
2769 struct btrfs_fs_devices
*fs_devices
,
2770 int including_extent_data
, u32 print_mask
)
2773 struct btrfsic_state
*state
;
2774 struct list_head
*dev_head
= &fs_devices
->devices
;
2775 struct btrfs_device
*device
;
2777 if (!PAGE_ALIGNED(fs_info
->nodesize
)) {
2778 pr_info("btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n",
2779 fs_info
->nodesize
, PAGE_SIZE
);
2782 if (!PAGE_ALIGNED(fs_info
->sectorsize
)) {
2783 pr_info("btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n",
2784 fs_info
->sectorsize
, PAGE_SIZE
);
2787 state
= kvzalloc(sizeof(*state
), GFP_KERNEL
);
2791 if (!btrfsic_is_initialized
) {
2792 mutex_init(&btrfsic_mutex
);
2793 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable
);
2794 btrfsic_is_initialized
= 1;
2796 mutex_lock(&btrfsic_mutex
);
2797 state
->fs_info
= fs_info
;
2798 state
->print_mask
= print_mask
;
2799 state
->include_extent_data
= including_extent_data
;
2800 state
->csum_size
= 0;
2801 state
->metablock_size
= fs_info
->nodesize
;
2802 state
->datablock_size
= fs_info
->sectorsize
;
2803 INIT_LIST_HEAD(&state
->all_blocks_list
);
2804 btrfsic_block_hashtable_init(&state
->block_hashtable
);
2805 btrfsic_block_link_hashtable_init(&state
->block_link_hashtable
);
2806 state
->max_superblock_generation
= 0;
2807 state
->latest_superblock
= NULL
;
2809 list_for_each_entry(device
, dev_head
, dev_list
) {
2810 struct btrfsic_dev_state
*ds
;
2813 if (!device
->bdev
|| !device
->name
)
2816 ds
= btrfsic_dev_state_alloc();
2818 mutex_unlock(&btrfsic_mutex
);
2821 ds
->bdev
= device
->bdev
;
2823 bdevname(ds
->bdev
, ds
->name
);
2824 ds
->name
[BDEVNAME_SIZE
- 1] = '\0';
2825 p
= kbasename(ds
->name
);
2826 strlcpy(ds
->name
, p
, sizeof(ds
->name
));
2827 btrfsic_dev_state_hashtable_add(ds
,
2828 &btrfsic_dev_state_hashtable
);
2831 ret
= btrfsic_process_superblock(state
, fs_devices
);
2833 mutex_unlock(&btrfsic_mutex
);
2834 btrfsic_unmount(fs_devices
);
2838 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_DATABASE
)
2839 btrfsic_dump_database(state
);
2840 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_TREE
)
2841 btrfsic_dump_tree(state
);
2843 mutex_unlock(&btrfsic_mutex
);
2847 void btrfsic_unmount(struct btrfs_fs_devices
*fs_devices
)
2849 struct btrfsic_block
*b_all
, *tmp_all
;
2850 struct btrfsic_state
*state
;
2851 struct list_head
*dev_head
= &fs_devices
->devices
;
2852 struct btrfs_device
*device
;
2854 if (!btrfsic_is_initialized
)
2857 mutex_lock(&btrfsic_mutex
);
2860 list_for_each_entry(device
, dev_head
, dev_list
) {
2861 struct btrfsic_dev_state
*ds
;
2863 if (!device
->bdev
|| !device
->name
)
2866 ds
= btrfsic_dev_state_hashtable_lookup(
2867 device
->bdev
->bd_dev
,
2868 &btrfsic_dev_state_hashtable
);
2871 btrfsic_dev_state_hashtable_remove(ds
);
2872 btrfsic_dev_state_free(ds
);
2876 if (NULL
== state
) {
2877 pr_info("btrfsic: error, cannot find state information on umount!\n");
2878 mutex_unlock(&btrfsic_mutex
);
2883 * Don't care about keeping the lists' state up to date,
2884 * just free all memory that was allocated dynamically.
2885 * Free the blocks and the block_links.
2887 list_for_each_entry_safe(b_all
, tmp_all
, &state
->all_blocks_list
,
2889 struct btrfsic_block_link
*l
, *tmp
;
2891 list_for_each_entry_safe(l
, tmp
, &b_all
->ref_to_list
,
2893 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2894 btrfsic_print_rem_link(state
, l
);
2897 if (0 == l
->ref_cnt
)
2898 btrfsic_block_link_free(l
);
2901 if (b_all
->is_iodone
|| b_all
->never_written
)
2902 btrfsic_block_free(b_all
);
2904 pr_info("btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n",
2905 btrfsic_get_block_type(state
, b_all
),
2906 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
2907 b_all
->dev_bytenr
, b_all
->mirror_num
);
2910 mutex_unlock(&btrfsic_mutex
);