]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - fs/btrfs/check-integrity.c
block: add a bi_error field to struct bio
[mirror_ubuntu-bionic-kernel.git] / fs / btrfs / check-integrity.c
1 /*
2 * Copyright (C) STRATO AG 2011. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 /*
20 * This module can be used to catch cases when the btrfs kernel
21 * code executes write requests to the disk that bring the file
22 * system in an inconsistent state. In such a state, a power-loss
23 * or kernel panic event would cause that the data on disk is
24 * lost or at least damaged.
25 *
26 * Code is added that examines all block write requests during
27 * runtime (including writes of the super block). Three rules
28 * are verified and an error is printed on violation of the
29 * rules:
30 * 1. It is not allowed to write a disk block which is
31 * currently referenced by the super block (either directly
32 * or indirectly).
33 * 2. When a super block is written, it is verified that all
34 * referenced (directly or indirectly) blocks fulfill the
35 * following requirements:
36 * 2a. All referenced blocks have either been present when
37 * the file system was mounted, (i.e., they have been
38 * referenced by the super block) or they have been
39 * written since then and the write completion callback
40 * was called and no write error was indicated and a
41 * FLUSH request to the device where these blocks are
42 * located was received and completed.
43 * 2b. All referenced blocks need to have a generation
44 * number which is equal to the parent's number.
45 *
46 * One issue that was found using this module was that the log
47 * tree on disk became temporarily corrupted because disk blocks
48 * that had been in use for the log tree had been freed and
49 * reused too early, while being referenced by the written super
50 * block.
51 *
52 * The search term in the kernel log that can be used to filter
53 * on the existence of detected integrity issues is
54 * "btrfs: attempt".
55 *
56 * The integrity check is enabled via mount options. These
57 * mount options are only supported if the integrity check
58 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
59 *
60 * Example #1, apply integrity checks to all metadata:
61 * mount /dev/sdb1 /mnt -o check_int
62 *
63 * Example #2, apply integrity checks to all metadata and
64 * to data extents:
65 * mount /dev/sdb1 /mnt -o check_int_data
66 *
67 * Example #3, apply integrity checks to all metadata and dump
68 * the tree that the super block references to kernel messages
69 * each time after a super block was written:
70 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
71 *
72 * If the integrity check tool is included and activated in
73 * the mount options, plenty of kernel memory is used, and
74 * plenty of additional CPU cycles are spent. Enabling this
75 * functionality is not intended for normal use. In most
76 * cases, unless you are a btrfs developer who needs to verify
77 * the integrity of (super)-block write requests, do not
78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79 * include and compile the integrity check tool.
80 *
81 * Expect millions of lines of information in the kernel log with an
82 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
83 * kernel config to at least 26 (which is 64MB). Usually the value is
84 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
85 * changed like this before LOG_BUF_SHIFT can be set to a high value:
86 * config LOG_BUF_SHIFT
87 * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
88 * range 12 30
89 */
90
91 #include <linux/sched.h>
92 #include <linux/slab.h>
93 #include <linux/buffer_head.h>
94 #include <linux/mutex.h>
95 #include <linux/genhd.h>
96 #include <linux/blkdev.h>
97 #include <linux/vmalloc.h>
98 #include "ctree.h"
99 #include "disk-io.h"
100 #include "hash.h"
101 #include "transaction.h"
102 #include "extent_io.h"
103 #include "volumes.h"
104 #include "print-tree.h"
105 #include "locking.h"
106 #include "check-integrity.h"
107 #include "rcu-string.h"
108
109 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
110 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
111 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
112 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
113 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
114 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
115 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
116 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
117 * excluding " [...]" */
118 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
119
120 /*
121 * The definition of the bitmask fields for the print_mask.
122 * They are specified with the mount option check_integrity_print_mask.
123 */
124 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
125 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
126 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
127 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
128 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
129 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
130 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
131 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
132 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
133 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
134 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
135 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
136 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
137 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
138
139 struct btrfsic_dev_state;
140 struct btrfsic_state;
141
142 struct btrfsic_block {
143 u32 magic_num; /* only used for debug purposes */
144 unsigned int is_metadata:1; /* if it is meta-data, not data-data */
145 unsigned int is_superblock:1; /* if it is one of the superblocks */
146 unsigned int is_iodone:1; /* if is done by lower subsystem */
147 unsigned int iodone_w_error:1; /* error was indicated to endio */
148 unsigned int never_written:1; /* block was added because it was
149 * referenced, not because it was
150 * written */
151 unsigned int mirror_num; /* large enough to hold
152 * BTRFS_SUPER_MIRROR_MAX */
153 struct btrfsic_dev_state *dev_state;
154 u64 dev_bytenr; /* key, physical byte num on disk */
155 u64 logical_bytenr; /* logical byte num on disk */
156 u64 generation;
157 struct btrfs_disk_key disk_key; /* extra info to print in case of
158 * issues, will not always be correct */
159 struct list_head collision_resolving_node; /* list node */
160 struct list_head all_blocks_node; /* list node */
161
162 /* the following two lists contain block_link items */
163 struct list_head ref_to_list; /* list */
164 struct list_head ref_from_list; /* list */
165 struct btrfsic_block *next_in_same_bio;
166 void *orig_bio_bh_private;
167 union {
168 bio_end_io_t *bio;
169 bh_end_io_t *bh;
170 } orig_bio_bh_end_io;
171 int submit_bio_bh_rw;
172 u64 flush_gen; /* only valid if !never_written */
173 };
174
175 /*
176 * Elements of this type are allocated dynamically and required because
177 * each block object can refer to and can be ref from multiple blocks.
178 * The key to lookup them in the hashtable is the dev_bytenr of
179 * the block ref to plus the one from the block refered from.
180 * The fact that they are searchable via a hashtable and that a
181 * ref_cnt is maintained is not required for the btrfs integrity
182 * check algorithm itself, it is only used to make the output more
183 * beautiful in case that an error is detected (an error is defined
184 * as a write operation to a block while that block is still referenced).
185 */
186 struct btrfsic_block_link {
187 u32 magic_num; /* only used for debug purposes */
188 u32 ref_cnt;
189 struct list_head node_ref_to; /* list node */
190 struct list_head node_ref_from; /* list node */
191 struct list_head collision_resolving_node; /* list node */
192 struct btrfsic_block *block_ref_to;
193 struct btrfsic_block *block_ref_from;
194 u64 parent_generation;
195 };
196
197 struct btrfsic_dev_state {
198 u32 magic_num; /* only used for debug purposes */
199 struct block_device *bdev;
200 struct btrfsic_state *state;
201 struct list_head collision_resolving_node; /* list node */
202 struct btrfsic_block dummy_block_for_bio_bh_flush;
203 u64 last_flush_gen;
204 char name[BDEVNAME_SIZE];
205 };
206
207 struct btrfsic_block_hashtable {
208 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
209 };
210
211 struct btrfsic_block_link_hashtable {
212 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
213 };
214
215 struct btrfsic_dev_state_hashtable {
216 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
217 };
218
219 struct btrfsic_block_data_ctx {
220 u64 start; /* virtual bytenr */
221 u64 dev_bytenr; /* physical bytenr on device */
222 u32 len;
223 struct btrfsic_dev_state *dev;
224 char **datav;
225 struct page **pagev;
226 void *mem_to_free;
227 };
228
229 /* This structure is used to implement recursion without occupying
230 * any stack space, refer to btrfsic_process_metablock() */
231 struct btrfsic_stack_frame {
232 u32 magic;
233 u32 nr;
234 int error;
235 int i;
236 int limit_nesting;
237 int num_copies;
238 int mirror_num;
239 struct btrfsic_block *block;
240 struct btrfsic_block_data_ctx *block_ctx;
241 struct btrfsic_block *next_block;
242 struct btrfsic_block_data_ctx next_block_ctx;
243 struct btrfs_header *hdr;
244 struct btrfsic_stack_frame *prev;
245 };
246
247 /* Some state per mounted filesystem */
248 struct btrfsic_state {
249 u32 print_mask;
250 int include_extent_data;
251 int csum_size;
252 struct list_head all_blocks_list;
253 struct btrfsic_block_hashtable block_hashtable;
254 struct btrfsic_block_link_hashtable block_link_hashtable;
255 struct btrfs_root *root;
256 u64 max_superblock_generation;
257 struct btrfsic_block *latest_superblock;
258 u32 metablock_size;
259 u32 datablock_size;
260 };
261
262 static void btrfsic_block_init(struct btrfsic_block *b);
263 static struct btrfsic_block *btrfsic_block_alloc(void);
264 static void btrfsic_block_free(struct btrfsic_block *b);
265 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
266 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
267 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
268 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
269 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
270 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
271 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
272 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
273 struct btrfsic_block_hashtable *h);
274 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
275 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
276 struct block_device *bdev,
277 u64 dev_bytenr,
278 struct btrfsic_block_hashtable *h);
279 static void btrfsic_block_link_hashtable_init(
280 struct btrfsic_block_link_hashtable *h);
281 static void btrfsic_block_link_hashtable_add(
282 struct btrfsic_block_link *l,
283 struct btrfsic_block_link_hashtable *h);
284 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
285 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
286 struct block_device *bdev_ref_to,
287 u64 dev_bytenr_ref_to,
288 struct block_device *bdev_ref_from,
289 u64 dev_bytenr_ref_from,
290 struct btrfsic_block_link_hashtable *h);
291 static void btrfsic_dev_state_hashtable_init(
292 struct btrfsic_dev_state_hashtable *h);
293 static void btrfsic_dev_state_hashtable_add(
294 struct btrfsic_dev_state *ds,
295 struct btrfsic_dev_state_hashtable *h);
296 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
297 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
298 struct block_device *bdev,
299 struct btrfsic_dev_state_hashtable *h);
300 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
301 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
302 static int btrfsic_process_superblock(struct btrfsic_state *state,
303 struct btrfs_fs_devices *fs_devices);
304 static int btrfsic_process_metablock(struct btrfsic_state *state,
305 struct btrfsic_block *block,
306 struct btrfsic_block_data_ctx *block_ctx,
307 int limit_nesting, int force_iodone_flag);
308 static void btrfsic_read_from_block_data(
309 struct btrfsic_block_data_ctx *block_ctx,
310 void *dst, u32 offset, size_t len);
311 static int btrfsic_create_link_to_next_block(
312 struct btrfsic_state *state,
313 struct btrfsic_block *block,
314 struct btrfsic_block_data_ctx
315 *block_ctx, u64 next_bytenr,
316 int limit_nesting,
317 struct btrfsic_block_data_ctx *next_block_ctx,
318 struct btrfsic_block **next_blockp,
319 int force_iodone_flag,
320 int *num_copiesp, int *mirror_nump,
321 struct btrfs_disk_key *disk_key,
322 u64 parent_generation);
323 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
324 struct btrfsic_block *block,
325 struct btrfsic_block_data_ctx *block_ctx,
326 u32 item_offset, int force_iodone_flag);
327 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
328 struct btrfsic_block_data_ctx *block_ctx_out,
329 int mirror_num);
330 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
331 static int btrfsic_read_block(struct btrfsic_state *state,
332 struct btrfsic_block_data_ctx *block_ctx);
333 static void btrfsic_dump_database(struct btrfsic_state *state);
334 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
335 char **datav, unsigned int num_pages);
336 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
337 u64 dev_bytenr, char **mapped_datav,
338 unsigned int num_pages,
339 struct bio *bio, int *bio_is_patched,
340 struct buffer_head *bh,
341 int submit_bio_bh_rw);
342 static int btrfsic_process_written_superblock(
343 struct btrfsic_state *state,
344 struct btrfsic_block *const block,
345 struct btrfs_super_block *const super_hdr);
346 static void btrfsic_bio_end_io(struct bio *bp);
347 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
348 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
349 const struct btrfsic_block *block,
350 int recursion_level);
351 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
352 struct btrfsic_block *const block,
353 int recursion_level);
354 static void btrfsic_print_add_link(const struct btrfsic_state *state,
355 const struct btrfsic_block_link *l);
356 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
357 const struct btrfsic_block_link *l);
358 static char btrfsic_get_block_type(const struct btrfsic_state *state,
359 const struct btrfsic_block *block);
360 static void btrfsic_dump_tree(const struct btrfsic_state *state);
361 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
362 const struct btrfsic_block *block,
363 int indent_level);
364 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
365 struct btrfsic_state *state,
366 struct btrfsic_block_data_ctx *next_block_ctx,
367 struct btrfsic_block *next_block,
368 struct btrfsic_block *from_block,
369 u64 parent_generation);
370 static struct btrfsic_block *btrfsic_block_lookup_or_add(
371 struct btrfsic_state *state,
372 struct btrfsic_block_data_ctx *block_ctx,
373 const char *additional_string,
374 int is_metadata,
375 int is_iodone,
376 int never_written,
377 int mirror_num,
378 int *was_created);
379 static int btrfsic_process_superblock_dev_mirror(
380 struct btrfsic_state *state,
381 struct btrfsic_dev_state *dev_state,
382 struct btrfs_device *device,
383 int superblock_mirror_num,
384 struct btrfsic_dev_state **selected_dev_state,
385 struct btrfs_super_block *selected_super);
386 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
387 struct block_device *bdev);
388 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
389 u64 bytenr,
390 struct btrfsic_dev_state *dev_state,
391 u64 dev_bytenr);
392
393 static struct mutex btrfsic_mutex;
394 static int btrfsic_is_initialized;
395 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
396
397
398 static void btrfsic_block_init(struct btrfsic_block *b)
399 {
400 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
401 b->dev_state = NULL;
402 b->dev_bytenr = 0;
403 b->logical_bytenr = 0;
404 b->generation = BTRFSIC_GENERATION_UNKNOWN;
405 b->disk_key.objectid = 0;
406 b->disk_key.type = 0;
407 b->disk_key.offset = 0;
408 b->is_metadata = 0;
409 b->is_superblock = 0;
410 b->is_iodone = 0;
411 b->iodone_w_error = 0;
412 b->never_written = 0;
413 b->mirror_num = 0;
414 b->next_in_same_bio = NULL;
415 b->orig_bio_bh_private = NULL;
416 b->orig_bio_bh_end_io.bio = NULL;
417 INIT_LIST_HEAD(&b->collision_resolving_node);
418 INIT_LIST_HEAD(&b->all_blocks_node);
419 INIT_LIST_HEAD(&b->ref_to_list);
420 INIT_LIST_HEAD(&b->ref_from_list);
421 b->submit_bio_bh_rw = 0;
422 b->flush_gen = 0;
423 }
424
425 static struct btrfsic_block *btrfsic_block_alloc(void)
426 {
427 struct btrfsic_block *b;
428
429 b = kzalloc(sizeof(*b), GFP_NOFS);
430 if (NULL != b)
431 btrfsic_block_init(b);
432
433 return b;
434 }
435
436 static void btrfsic_block_free(struct btrfsic_block *b)
437 {
438 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
439 kfree(b);
440 }
441
442 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
443 {
444 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
445 l->ref_cnt = 1;
446 INIT_LIST_HEAD(&l->node_ref_to);
447 INIT_LIST_HEAD(&l->node_ref_from);
448 INIT_LIST_HEAD(&l->collision_resolving_node);
449 l->block_ref_to = NULL;
450 l->block_ref_from = NULL;
451 }
452
453 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
454 {
455 struct btrfsic_block_link *l;
456
457 l = kzalloc(sizeof(*l), GFP_NOFS);
458 if (NULL != l)
459 btrfsic_block_link_init(l);
460
461 return l;
462 }
463
464 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
465 {
466 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
467 kfree(l);
468 }
469
470 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
471 {
472 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
473 ds->bdev = NULL;
474 ds->state = NULL;
475 ds->name[0] = '\0';
476 INIT_LIST_HEAD(&ds->collision_resolving_node);
477 ds->last_flush_gen = 0;
478 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
479 ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
480 ds->dummy_block_for_bio_bh_flush.dev_state = ds;
481 }
482
483 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
484 {
485 struct btrfsic_dev_state *ds;
486
487 ds = kzalloc(sizeof(*ds), GFP_NOFS);
488 if (NULL != ds)
489 btrfsic_dev_state_init(ds);
490
491 return ds;
492 }
493
494 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
495 {
496 BUG_ON(!(NULL == ds ||
497 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
498 kfree(ds);
499 }
500
501 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
502 {
503 int i;
504
505 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
506 INIT_LIST_HEAD(h->table + i);
507 }
508
509 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
510 struct btrfsic_block_hashtable *h)
511 {
512 const unsigned int hashval =
513 (((unsigned int)(b->dev_bytenr >> 16)) ^
514 ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
515 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
516
517 list_add(&b->collision_resolving_node, h->table + hashval);
518 }
519
520 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
521 {
522 list_del(&b->collision_resolving_node);
523 }
524
525 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
526 struct block_device *bdev,
527 u64 dev_bytenr,
528 struct btrfsic_block_hashtable *h)
529 {
530 const unsigned int hashval =
531 (((unsigned int)(dev_bytenr >> 16)) ^
532 ((unsigned int)((uintptr_t)bdev))) &
533 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
534 struct list_head *elem;
535
536 list_for_each(elem, h->table + hashval) {
537 struct btrfsic_block *const b =
538 list_entry(elem, struct btrfsic_block,
539 collision_resolving_node);
540
541 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
542 return b;
543 }
544
545 return NULL;
546 }
547
548 static void btrfsic_block_link_hashtable_init(
549 struct btrfsic_block_link_hashtable *h)
550 {
551 int i;
552
553 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
554 INIT_LIST_HEAD(h->table + i);
555 }
556
557 static void btrfsic_block_link_hashtable_add(
558 struct btrfsic_block_link *l,
559 struct btrfsic_block_link_hashtable *h)
560 {
561 const unsigned int hashval =
562 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
563 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
564 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
565 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
566 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
567
568 BUG_ON(NULL == l->block_ref_to);
569 BUG_ON(NULL == l->block_ref_from);
570 list_add(&l->collision_resolving_node, h->table + hashval);
571 }
572
573 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
574 {
575 list_del(&l->collision_resolving_node);
576 }
577
578 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
579 struct block_device *bdev_ref_to,
580 u64 dev_bytenr_ref_to,
581 struct block_device *bdev_ref_from,
582 u64 dev_bytenr_ref_from,
583 struct btrfsic_block_link_hashtable *h)
584 {
585 const unsigned int hashval =
586 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
587 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
588 ((unsigned int)((uintptr_t)bdev_ref_to)) ^
589 ((unsigned int)((uintptr_t)bdev_ref_from))) &
590 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
591 struct list_head *elem;
592
593 list_for_each(elem, h->table + hashval) {
594 struct btrfsic_block_link *const l =
595 list_entry(elem, struct btrfsic_block_link,
596 collision_resolving_node);
597
598 BUG_ON(NULL == l->block_ref_to);
599 BUG_ON(NULL == l->block_ref_from);
600 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
601 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
602 l->block_ref_from->dev_state->bdev == bdev_ref_from &&
603 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
604 return l;
605 }
606
607 return NULL;
608 }
609
610 static void btrfsic_dev_state_hashtable_init(
611 struct btrfsic_dev_state_hashtable *h)
612 {
613 int i;
614
615 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
616 INIT_LIST_HEAD(h->table + i);
617 }
618
619 static void btrfsic_dev_state_hashtable_add(
620 struct btrfsic_dev_state *ds,
621 struct btrfsic_dev_state_hashtable *h)
622 {
623 const unsigned int hashval =
624 (((unsigned int)((uintptr_t)ds->bdev)) &
625 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
626
627 list_add(&ds->collision_resolving_node, h->table + hashval);
628 }
629
630 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
631 {
632 list_del(&ds->collision_resolving_node);
633 }
634
635 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
636 struct block_device *bdev,
637 struct btrfsic_dev_state_hashtable *h)
638 {
639 const unsigned int hashval =
640 (((unsigned int)((uintptr_t)bdev)) &
641 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
642 struct list_head *elem;
643
644 list_for_each(elem, h->table + hashval) {
645 struct btrfsic_dev_state *const ds =
646 list_entry(elem, struct btrfsic_dev_state,
647 collision_resolving_node);
648
649 if (ds->bdev == bdev)
650 return ds;
651 }
652
653 return NULL;
654 }
655
656 static int btrfsic_process_superblock(struct btrfsic_state *state,
657 struct btrfs_fs_devices *fs_devices)
658 {
659 int ret = 0;
660 struct btrfs_super_block *selected_super;
661 struct list_head *dev_head = &fs_devices->devices;
662 struct btrfs_device *device;
663 struct btrfsic_dev_state *selected_dev_state = NULL;
664 int pass;
665
666 BUG_ON(NULL == state);
667 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
668 if (NULL == selected_super) {
669 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
670 return -1;
671 }
672
673 list_for_each_entry(device, dev_head, dev_list) {
674 int i;
675 struct btrfsic_dev_state *dev_state;
676
677 if (!device->bdev || !device->name)
678 continue;
679
680 dev_state = btrfsic_dev_state_lookup(device->bdev);
681 BUG_ON(NULL == dev_state);
682 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
683 ret = btrfsic_process_superblock_dev_mirror(
684 state, dev_state, device, i,
685 &selected_dev_state, selected_super);
686 if (0 != ret && 0 == i) {
687 kfree(selected_super);
688 return ret;
689 }
690 }
691 }
692
693 if (NULL == state->latest_superblock) {
694 printk(KERN_INFO "btrfsic: no superblock found!\n");
695 kfree(selected_super);
696 return -1;
697 }
698
699 state->csum_size = btrfs_super_csum_size(selected_super);
700
701 for (pass = 0; pass < 3; pass++) {
702 int num_copies;
703 int mirror_num;
704 u64 next_bytenr;
705
706 switch (pass) {
707 case 0:
708 next_bytenr = btrfs_super_root(selected_super);
709 if (state->print_mask &
710 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
711 printk(KERN_INFO "root@%llu\n", next_bytenr);
712 break;
713 case 1:
714 next_bytenr = btrfs_super_chunk_root(selected_super);
715 if (state->print_mask &
716 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
717 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
718 break;
719 case 2:
720 next_bytenr = btrfs_super_log_root(selected_super);
721 if (0 == next_bytenr)
722 continue;
723 if (state->print_mask &
724 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
725 printk(KERN_INFO "log@%llu\n", next_bytenr);
726 break;
727 }
728
729 num_copies =
730 btrfs_num_copies(state->root->fs_info,
731 next_bytenr, state->metablock_size);
732 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
733 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
734 next_bytenr, num_copies);
735
736 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
737 struct btrfsic_block *next_block;
738 struct btrfsic_block_data_ctx tmp_next_block_ctx;
739 struct btrfsic_block_link *l;
740
741 ret = btrfsic_map_block(state, next_bytenr,
742 state->metablock_size,
743 &tmp_next_block_ctx,
744 mirror_num);
745 if (ret) {
746 printk(KERN_INFO "btrfsic:"
747 " btrfsic_map_block(root @%llu,"
748 " mirror %d) failed!\n",
749 next_bytenr, mirror_num);
750 kfree(selected_super);
751 return -1;
752 }
753
754 next_block = btrfsic_block_hashtable_lookup(
755 tmp_next_block_ctx.dev->bdev,
756 tmp_next_block_ctx.dev_bytenr,
757 &state->block_hashtable);
758 BUG_ON(NULL == next_block);
759
760 l = btrfsic_block_link_hashtable_lookup(
761 tmp_next_block_ctx.dev->bdev,
762 tmp_next_block_ctx.dev_bytenr,
763 state->latest_superblock->dev_state->
764 bdev,
765 state->latest_superblock->dev_bytenr,
766 &state->block_link_hashtable);
767 BUG_ON(NULL == l);
768
769 ret = btrfsic_read_block(state, &tmp_next_block_ctx);
770 if (ret < (int)PAGE_CACHE_SIZE) {
771 printk(KERN_INFO
772 "btrfsic: read @logical %llu failed!\n",
773 tmp_next_block_ctx.start);
774 btrfsic_release_block_ctx(&tmp_next_block_ctx);
775 kfree(selected_super);
776 return -1;
777 }
778
779 ret = btrfsic_process_metablock(state,
780 next_block,
781 &tmp_next_block_ctx,
782 BTRFS_MAX_LEVEL + 3, 1);
783 btrfsic_release_block_ctx(&tmp_next_block_ctx);
784 }
785 }
786
787 kfree(selected_super);
788 return ret;
789 }
790
791 static int btrfsic_process_superblock_dev_mirror(
792 struct btrfsic_state *state,
793 struct btrfsic_dev_state *dev_state,
794 struct btrfs_device *device,
795 int superblock_mirror_num,
796 struct btrfsic_dev_state **selected_dev_state,
797 struct btrfs_super_block *selected_super)
798 {
799 struct btrfs_super_block *super_tmp;
800 u64 dev_bytenr;
801 struct buffer_head *bh;
802 struct btrfsic_block *superblock_tmp;
803 int pass;
804 struct block_device *const superblock_bdev = device->bdev;
805
806 /* super block bytenr is always the unmapped device bytenr */
807 dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
808 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
809 return -1;
810 bh = __bread(superblock_bdev, dev_bytenr / 4096,
811 BTRFS_SUPER_INFO_SIZE);
812 if (NULL == bh)
813 return -1;
814 super_tmp = (struct btrfs_super_block *)
815 (bh->b_data + (dev_bytenr & 4095));
816
817 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
818 btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
819 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
820 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
821 btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
822 brelse(bh);
823 return 0;
824 }
825
826 superblock_tmp =
827 btrfsic_block_hashtable_lookup(superblock_bdev,
828 dev_bytenr,
829 &state->block_hashtable);
830 if (NULL == superblock_tmp) {
831 superblock_tmp = btrfsic_block_alloc();
832 if (NULL == superblock_tmp) {
833 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
834 brelse(bh);
835 return -1;
836 }
837 /* for superblock, only the dev_bytenr makes sense */
838 superblock_tmp->dev_bytenr = dev_bytenr;
839 superblock_tmp->dev_state = dev_state;
840 superblock_tmp->logical_bytenr = dev_bytenr;
841 superblock_tmp->generation = btrfs_super_generation(super_tmp);
842 superblock_tmp->is_metadata = 1;
843 superblock_tmp->is_superblock = 1;
844 superblock_tmp->is_iodone = 1;
845 superblock_tmp->never_written = 0;
846 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
847 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
848 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
849 " @%llu (%s/%llu/%d)\n",
850 superblock_bdev,
851 rcu_str_deref(device->name), dev_bytenr,
852 dev_state->name, dev_bytenr,
853 superblock_mirror_num);
854 list_add(&superblock_tmp->all_blocks_node,
855 &state->all_blocks_list);
856 btrfsic_block_hashtable_add(superblock_tmp,
857 &state->block_hashtable);
858 }
859
860 /* select the one with the highest generation field */
861 if (btrfs_super_generation(super_tmp) >
862 state->max_superblock_generation ||
863 0 == state->max_superblock_generation) {
864 memcpy(selected_super, super_tmp, sizeof(*selected_super));
865 *selected_dev_state = dev_state;
866 state->max_superblock_generation =
867 btrfs_super_generation(super_tmp);
868 state->latest_superblock = superblock_tmp;
869 }
870
871 for (pass = 0; pass < 3; pass++) {
872 u64 next_bytenr;
873 int num_copies;
874 int mirror_num;
875 const char *additional_string = NULL;
876 struct btrfs_disk_key tmp_disk_key;
877
878 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
879 tmp_disk_key.offset = 0;
880 switch (pass) {
881 case 0:
882 btrfs_set_disk_key_objectid(&tmp_disk_key,
883 BTRFS_ROOT_TREE_OBJECTID);
884 additional_string = "initial root ";
885 next_bytenr = btrfs_super_root(super_tmp);
886 break;
887 case 1:
888 btrfs_set_disk_key_objectid(&tmp_disk_key,
889 BTRFS_CHUNK_TREE_OBJECTID);
890 additional_string = "initial chunk ";
891 next_bytenr = btrfs_super_chunk_root(super_tmp);
892 break;
893 case 2:
894 btrfs_set_disk_key_objectid(&tmp_disk_key,
895 BTRFS_TREE_LOG_OBJECTID);
896 additional_string = "initial log ";
897 next_bytenr = btrfs_super_log_root(super_tmp);
898 if (0 == next_bytenr)
899 continue;
900 break;
901 }
902
903 num_copies =
904 btrfs_num_copies(state->root->fs_info,
905 next_bytenr, state->metablock_size);
906 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
907 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
908 next_bytenr, num_copies);
909 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
910 struct btrfsic_block *next_block;
911 struct btrfsic_block_data_ctx tmp_next_block_ctx;
912 struct btrfsic_block_link *l;
913
914 if (btrfsic_map_block(state, next_bytenr,
915 state->metablock_size,
916 &tmp_next_block_ctx,
917 mirror_num)) {
918 printk(KERN_INFO "btrfsic: btrfsic_map_block("
919 "bytenr @%llu, mirror %d) failed!\n",
920 next_bytenr, mirror_num);
921 brelse(bh);
922 return -1;
923 }
924
925 next_block = btrfsic_block_lookup_or_add(
926 state, &tmp_next_block_ctx,
927 additional_string, 1, 1, 0,
928 mirror_num, NULL);
929 if (NULL == next_block) {
930 btrfsic_release_block_ctx(&tmp_next_block_ctx);
931 brelse(bh);
932 return -1;
933 }
934
935 next_block->disk_key = tmp_disk_key;
936 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
937 l = btrfsic_block_link_lookup_or_add(
938 state, &tmp_next_block_ctx,
939 next_block, superblock_tmp,
940 BTRFSIC_GENERATION_UNKNOWN);
941 btrfsic_release_block_ctx(&tmp_next_block_ctx);
942 if (NULL == l) {
943 brelse(bh);
944 return -1;
945 }
946 }
947 }
948 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
949 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
950
951 brelse(bh);
952 return 0;
953 }
954
955 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
956 {
957 struct btrfsic_stack_frame *sf;
958
959 sf = kzalloc(sizeof(*sf), GFP_NOFS);
960 if (NULL == sf)
961 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
962 else
963 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
964 return sf;
965 }
966
967 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
968 {
969 BUG_ON(!(NULL == sf ||
970 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
971 kfree(sf);
972 }
973
974 static int btrfsic_process_metablock(
975 struct btrfsic_state *state,
976 struct btrfsic_block *const first_block,
977 struct btrfsic_block_data_ctx *const first_block_ctx,
978 int first_limit_nesting, int force_iodone_flag)
979 {
980 struct btrfsic_stack_frame initial_stack_frame = { 0 };
981 struct btrfsic_stack_frame *sf;
982 struct btrfsic_stack_frame *next_stack;
983 struct btrfs_header *const first_hdr =
984 (struct btrfs_header *)first_block_ctx->datav[0];
985
986 BUG_ON(!first_hdr);
987 sf = &initial_stack_frame;
988 sf->error = 0;
989 sf->i = -1;
990 sf->limit_nesting = first_limit_nesting;
991 sf->block = first_block;
992 sf->block_ctx = first_block_ctx;
993 sf->next_block = NULL;
994 sf->hdr = first_hdr;
995 sf->prev = NULL;
996
997 continue_with_new_stack_frame:
998 sf->block->generation = le64_to_cpu(sf->hdr->generation);
999 if (0 == sf->hdr->level) {
1000 struct btrfs_leaf *const leafhdr =
1001 (struct btrfs_leaf *)sf->hdr;
1002
1003 if (-1 == sf->i) {
1004 sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
1005
1006 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1007 printk(KERN_INFO
1008 "leaf %llu items %d generation %llu"
1009 " owner %llu\n",
1010 sf->block_ctx->start, sf->nr,
1011 btrfs_stack_header_generation(
1012 &leafhdr->header),
1013 btrfs_stack_header_owner(
1014 &leafhdr->header));
1015 }
1016
1017 continue_with_current_leaf_stack_frame:
1018 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1019 sf->i++;
1020 sf->num_copies = 0;
1021 }
1022
1023 if (sf->i < sf->nr) {
1024 struct btrfs_item disk_item;
1025 u32 disk_item_offset =
1026 (uintptr_t)(leafhdr->items + sf->i) -
1027 (uintptr_t)leafhdr;
1028 struct btrfs_disk_key *disk_key;
1029 u8 type;
1030 u32 item_offset;
1031 u32 item_size;
1032
1033 if (disk_item_offset + sizeof(struct btrfs_item) >
1034 sf->block_ctx->len) {
1035 leaf_item_out_of_bounce_error:
1036 printk(KERN_INFO
1037 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1038 sf->block_ctx->start,
1039 sf->block_ctx->dev->name);
1040 goto one_stack_frame_backwards;
1041 }
1042 btrfsic_read_from_block_data(sf->block_ctx,
1043 &disk_item,
1044 disk_item_offset,
1045 sizeof(struct btrfs_item));
1046 item_offset = btrfs_stack_item_offset(&disk_item);
1047 item_size = btrfs_stack_item_size(&disk_item);
1048 disk_key = &disk_item.key;
1049 type = btrfs_disk_key_type(disk_key);
1050
1051 if (BTRFS_ROOT_ITEM_KEY == type) {
1052 struct btrfs_root_item root_item;
1053 u32 root_item_offset;
1054 u64 next_bytenr;
1055
1056 root_item_offset = item_offset +
1057 offsetof(struct btrfs_leaf, items);
1058 if (root_item_offset + item_size >
1059 sf->block_ctx->len)
1060 goto leaf_item_out_of_bounce_error;
1061 btrfsic_read_from_block_data(
1062 sf->block_ctx, &root_item,
1063 root_item_offset,
1064 item_size);
1065 next_bytenr = btrfs_root_bytenr(&root_item);
1066
1067 sf->error =
1068 btrfsic_create_link_to_next_block(
1069 state,
1070 sf->block,
1071 sf->block_ctx,
1072 next_bytenr,
1073 sf->limit_nesting,
1074 &sf->next_block_ctx,
1075 &sf->next_block,
1076 force_iodone_flag,
1077 &sf->num_copies,
1078 &sf->mirror_num,
1079 disk_key,
1080 btrfs_root_generation(
1081 &root_item));
1082 if (sf->error)
1083 goto one_stack_frame_backwards;
1084
1085 if (NULL != sf->next_block) {
1086 struct btrfs_header *const next_hdr =
1087 (struct btrfs_header *)
1088 sf->next_block_ctx.datav[0];
1089
1090 next_stack =
1091 btrfsic_stack_frame_alloc();
1092 if (NULL == next_stack) {
1093 sf->error = -1;
1094 btrfsic_release_block_ctx(
1095 &sf->
1096 next_block_ctx);
1097 goto one_stack_frame_backwards;
1098 }
1099
1100 next_stack->i = -1;
1101 next_stack->block = sf->next_block;
1102 next_stack->block_ctx =
1103 &sf->next_block_ctx;
1104 next_stack->next_block = NULL;
1105 next_stack->hdr = next_hdr;
1106 next_stack->limit_nesting =
1107 sf->limit_nesting - 1;
1108 next_stack->prev = sf;
1109 sf = next_stack;
1110 goto continue_with_new_stack_frame;
1111 }
1112 } else if (BTRFS_EXTENT_DATA_KEY == type &&
1113 state->include_extent_data) {
1114 sf->error = btrfsic_handle_extent_data(
1115 state,
1116 sf->block,
1117 sf->block_ctx,
1118 item_offset,
1119 force_iodone_flag);
1120 if (sf->error)
1121 goto one_stack_frame_backwards;
1122 }
1123
1124 goto continue_with_current_leaf_stack_frame;
1125 }
1126 } else {
1127 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1128
1129 if (-1 == sf->i) {
1130 sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
1131
1132 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1133 printk(KERN_INFO "node %llu level %d items %d"
1134 " generation %llu owner %llu\n",
1135 sf->block_ctx->start,
1136 nodehdr->header.level, sf->nr,
1137 btrfs_stack_header_generation(
1138 &nodehdr->header),
1139 btrfs_stack_header_owner(
1140 &nodehdr->header));
1141 }
1142
1143 continue_with_current_node_stack_frame:
1144 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1145 sf->i++;
1146 sf->num_copies = 0;
1147 }
1148
1149 if (sf->i < sf->nr) {
1150 struct btrfs_key_ptr key_ptr;
1151 u32 key_ptr_offset;
1152 u64 next_bytenr;
1153
1154 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1155 (uintptr_t)nodehdr;
1156 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1157 sf->block_ctx->len) {
1158 printk(KERN_INFO
1159 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1160 sf->block_ctx->start,
1161 sf->block_ctx->dev->name);
1162 goto one_stack_frame_backwards;
1163 }
1164 btrfsic_read_from_block_data(
1165 sf->block_ctx, &key_ptr, key_ptr_offset,
1166 sizeof(struct btrfs_key_ptr));
1167 next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
1168
1169 sf->error = btrfsic_create_link_to_next_block(
1170 state,
1171 sf->block,
1172 sf->block_ctx,
1173 next_bytenr,
1174 sf->limit_nesting,
1175 &sf->next_block_ctx,
1176 &sf->next_block,
1177 force_iodone_flag,
1178 &sf->num_copies,
1179 &sf->mirror_num,
1180 &key_ptr.key,
1181 btrfs_stack_key_generation(&key_ptr));
1182 if (sf->error)
1183 goto one_stack_frame_backwards;
1184
1185 if (NULL != sf->next_block) {
1186 struct btrfs_header *const next_hdr =
1187 (struct btrfs_header *)
1188 sf->next_block_ctx.datav[0];
1189
1190 next_stack = btrfsic_stack_frame_alloc();
1191 if (NULL == next_stack) {
1192 sf->error = -1;
1193 goto one_stack_frame_backwards;
1194 }
1195
1196 next_stack->i = -1;
1197 next_stack->block = sf->next_block;
1198 next_stack->block_ctx = &sf->next_block_ctx;
1199 next_stack->next_block = NULL;
1200 next_stack->hdr = next_hdr;
1201 next_stack->limit_nesting =
1202 sf->limit_nesting - 1;
1203 next_stack->prev = sf;
1204 sf = next_stack;
1205 goto continue_with_new_stack_frame;
1206 }
1207
1208 goto continue_with_current_node_stack_frame;
1209 }
1210 }
1211
1212 one_stack_frame_backwards:
1213 if (NULL != sf->prev) {
1214 struct btrfsic_stack_frame *const prev = sf->prev;
1215
1216 /* the one for the initial block is freed in the caller */
1217 btrfsic_release_block_ctx(sf->block_ctx);
1218
1219 if (sf->error) {
1220 prev->error = sf->error;
1221 btrfsic_stack_frame_free(sf);
1222 sf = prev;
1223 goto one_stack_frame_backwards;
1224 }
1225
1226 btrfsic_stack_frame_free(sf);
1227 sf = prev;
1228 goto continue_with_new_stack_frame;
1229 } else {
1230 BUG_ON(&initial_stack_frame != sf);
1231 }
1232
1233 return sf->error;
1234 }
1235
1236 static void btrfsic_read_from_block_data(
1237 struct btrfsic_block_data_ctx *block_ctx,
1238 void *dstv, u32 offset, size_t len)
1239 {
1240 size_t cur;
1241 size_t offset_in_page;
1242 char *kaddr;
1243 char *dst = (char *)dstv;
1244 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1245 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1246
1247 WARN_ON(offset + len > block_ctx->len);
1248 offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
1249
1250 while (len > 0) {
1251 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1252 BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE));
1253 kaddr = block_ctx->datav[i];
1254 memcpy(dst, kaddr + offset_in_page, cur);
1255
1256 dst += cur;
1257 len -= cur;
1258 offset_in_page = 0;
1259 i++;
1260 }
1261 }
1262
1263 static int btrfsic_create_link_to_next_block(
1264 struct btrfsic_state *state,
1265 struct btrfsic_block *block,
1266 struct btrfsic_block_data_ctx *block_ctx,
1267 u64 next_bytenr,
1268 int limit_nesting,
1269 struct btrfsic_block_data_ctx *next_block_ctx,
1270 struct btrfsic_block **next_blockp,
1271 int force_iodone_flag,
1272 int *num_copiesp, int *mirror_nump,
1273 struct btrfs_disk_key *disk_key,
1274 u64 parent_generation)
1275 {
1276 struct btrfsic_block *next_block = NULL;
1277 int ret;
1278 struct btrfsic_block_link *l;
1279 int did_alloc_block_link;
1280 int block_was_created;
1281
1282 *next_blockp = NULL;
1283 if (0 == *num_copiesp) {
1284 *num_copiesp =
1285 btrfs_num_copies(state->root->fs_info,
1286 next_bytenr, state->metablock_size);
1287 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1288 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1289 next_bytenr, *num_copiesp);
1290 *mirror_nump = 1;
1291 }
1292
1293 if (*mirror_nump > *num_copiesp)
1294 return 0;
1295
1296 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1297 printk(KERN_INFO
1298 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1299 *mirror_nump);
1300 ret = btrfsic_map_block(state, next_bytenr,
1301 state->metablock_size,
1302 next_block_ctx, *mirror_nump);
1303 if (ret) {
1304 printk(KERN_INFO
1305 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1306 next_bytenr, *mirror_nump);
1307 btrfsic_release_block_ctx(next_block_ctx);
1308 *next_blockp = NULL;
1309 return -1;
1310 }
1311
1312 next_block = btrfsic_block_lookup_or_add(state,
1313 next_block_ctx, "referenced ",
1314 1, force_iodone_flag,
1315 !force_iodone_flag,
1316 *mirror_nump,
1317 &block_was_created);
1318 if (NULL == next_block) {
1319 btrfsic_release_block_ctx(next_block_ctx);
1320 *next_blockp = NULL;
1321 return -1;
1322 }
1323 if (block_was_created) {
1324 l = NULL;
1325 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1326 } else {
1327 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1328 if (next_block->logical_bytenr != next_bytenr &&
1329 !(!next_block->is_metadata &&
1330 0 == next_block->logical_bytenr))
1331 printk(KERN_INFO
1332 "Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1333 next_bytenr, next_block_ctx->dev->name,
1334 next_block_ctx->dev_bytenr, *mirror_nump,
1335 btrfsic_get_block_type(state,
1336 next_block),
1337 next_block->logical_bytenr);
1338 else
1339 printk(KERN_INFO
1340 "Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1341 next_bytenr, next_block_ctx->dev->name,
1342 next_block_ctx->dev_bytenr, *mirror_nump,
1343 btrfsic_get_block_type(state,
1344 next_block));
1345 }
1346 next_block->logical_bytenr = next_bytenr;
1347
1348 next_block->mirror_num = *mirror_nump;
1349 l = btrfsic_block_link_hashtable_lookup(
1350 next_block_ctx->dev->bdev,
1351 next_block_ctx->dev_bytenr,
1352 block_ctx->dev->bdev,
1353 block_ctx->dev_bytenr,
1354 &state->block_link_hashtable);
1355 }
1356
1357 next_block->disk_key = *disk_key;
1358 if (NULL == l) {
1359 l = btrfsic_block_link_alloc();
1360 if (NULL == l) {
1361 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1362 btrfsic_release_block_ctx(next_block_ctx);
1363 *next_blockp = NULL;
1364 return -1;
1365 }
1366
1367 did_alloc_block_link = 1;
1368 l->block_ref_to = next_block;
1369 l->block_ref_from = block;
1370 l->ref_cnt = 1;
1371 l->parent_generation = parent_generation;
1372
1373 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1374 btrfsic_print_add_link(state, l);
1375
1376 list_add(&l->node_ref_to, &block->ref_to_list);
1377 list_add(&l->node_ref_from, &next_block->ref_from_list);
1378
1379 btrfsic_block_link_hashtable_add(l,
1380 &state->block_link_hashtable);
1381 } else {
1382 did_alloc_block_link = 0;
1383 if (0 == limit_nesting) {
1384 l->ref_cnt++;
1385 l->parent_generation = parent_generation;
1386 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1387 btrfsic_print_add_link(state, l);
1388 }
1389 }
1390
1391 if (limit_nesting > 0 && did_alloc_block_link) {
1392 ret = btrfsic_read_block(state, next_block_ctx);
1393 if (ret < (int)next_block_ctx->len) {
1394 printk(KERN_INFO
1395 "btrfsic: read block @logical %llu failed!\n",
1396 next_bytenr);
1397 btrfsic_release_block_ctx(next_block_ctx);
1398 *next_blockp = NULL;
1399 return -1;
1400 }
1401
1402 *next_blockp = next_block;
1403 } else {
1404 *next_blockp = NULL;
1405 }
1406 (*mirror_nump)++;
1407
1408 return 0;
1409 }
1410
1411 static int btrfsic_handle_extent_data(
1412 struct btrfsic_state *state,
1413 struct btrfsic_block *block,
1414 struct btrfsic_block_data_ctx *block_ctx,
1415 u32 item_offset, int force_iodone_flag)
1416 {
1417 int ret;
1418 struct btrfs_file_extent_item file_extent_item;
1419 u64 file_extent_item_offset;
1420 u64 next_bytenr;
1421 u64 num_bytes;
1422 u64 generation;
1423 struct btrfsic_block_link *l;
1424
1425 file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1426 item_offset;
1427 if (file_extent_item_offset +
1428 offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1429 block_ctx->len) {
1430 printk(KERN_INFO
1431 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1432 block_ctx->start, block_ctx->dev->name);
1433 return -1;
1434 }
1435
1436 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1437 file_extent_item_offset,
1438 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1439 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1440 btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
1441 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1442 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1443 file_extent_item.type,
1444 btrfs_stack_file_extent_disk_bytenr(
1445 &file_extent_item));
1446 return 0;
1447 }
1448
1449 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1450 block_ctx->len) {
1451 printk(KERN_INFO
1452 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1453 block_ctx->start, block_ctx->dev->name);
1454 return -1;
1455 }
1456 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1457 file_extent_item_offset,
1458 sizeof(struct btrfs_file_extent_item));
1459 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
1460 if (btrfs_stack_file_extent_compression(&file_extent_item) ==
1461 BTRFS_COMPRESS_NONE) {
1462 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
1463 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1464 } else {
1465 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
1466 }
1467 generation = btrfs_stack_file_extent_generation(&file_extent_item);
1468
1469 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1470 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1471 " offset = %llu, num_bytes = %llu\n",
1472 file_extent_item.type,
1473 btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
1474 btrfs_stack_file_extent_offset(&file_extent_item),
1475 num_bytes);
1476 while (num_bytes > 0) {
1477 u32 chunk_len;
1478 int num_copies;
1479 int mirror_num;
1480
1481 if (num_bytes > state->datablock_size)
1482 chunk_len = state->datablock_size;
1483 else
1484 chunk_len = num_bytes;
1485
1486 num_copies =
1487 btrfs_num_copies(state->root->fs_info,
1488 next_bytenr, state->datablock_size);
1489 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1490 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1491 next_bytenr, num_copies);
1492 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1493 struct btrfsic_block_data_ctx next_block_ctx;
1494 struct btrfsic_block *next_block;
1495 int block_was_created;
1496
1497 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1498 printk(KERN_INFO "btrfsic_handle_extent_data("
1499 "mirror_num=%d)\n", mirror_num);
1500 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1501 printk(KERN_INFO
1502 "\tdisk_bytenr = %llu, num_bytes %u\n",
1503 next_bytenr, chunk_len);
1504 ret = btrfsic_map_block(state, next_bytenr,
1505 chunk_len, &next_block_ctx,
1506 mirror_num);
1507 if (ret) {
1508 printk(KERN_INFO
1509 "btrfsic: btrfsic_map_block(@%llu,"
1510 " mirror=%d) failed!\n",
1511 next_bytenr, mirror_num);
1512 return -1;
1513 }
1514
1515 next_block = btrfsic_block_lookup_or_add(
1516 state,
1517 &next_block_ctx,
1518 "referenced ",
1519 0,
1520 force_iodone_flag,
1521 !force_iodone_flag,
1522 mirror_num,
1523 &block_was_created);
1524 if (NULL == next_block) {
1525 printk(KERN_INFO
1526 "btrfsic: error, kmalloc failed!\n");
1527 btrfsic_release_block_ctx(&next_block_ctx);
1528 return -1;
1529 }
1530 if (!block_was_created) {
1531 if ((state->print_mask &
1532 BTRFSIC_PRINT_MASK_VERBOSE) &&
1533 next_block->logical_bytenr != next_bytenr &&
1534 !(!next_block->is_metadata &&
1535 0 == next_block->logical_bytenr)) {
1536 printk(KERN_INFO
1537 "Referenced block"
1538 " @%llu (%s/%llu/%d)"
1539 " found in hash table, D,"
1540 " bytenr mismatch"
1541 " (!= stored %llu).\n",
1542 next_bytenr,
1543 next_block_ctx.dev->name,
1544 next_block_ctx.dev_bytenr,
1545 mirror_num,
1546 next_block->logical_bytenr);
1547 }
1548 next_block->logical_bytenr = next_bytenr;
1549 next_block->mirror_num = mirror_num;
1550 }
1551
1552 l = btrfsic_block_link_lookup_or_add(state,
1553 &next_block_ctx,
1554 next_block, block,
1555 generation);
1556 btrfsic_release_block_ctx(&next_block_ctx);
1557 if (NULL == l)
1558 return -1;
1559 }
1560
1561 next_bytenr += chunk_len;
1562 num_bytes -= chunk_len;
1563 }
1564
1565 return 0;
1566 }
1567
1568 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1569 struct btrfsic_block_data_ctx *block_ctx_out,
1570 int mirror_num)
1571 {
1572 int ret;
1573 u64 length;
1574 struct btrfs_bio *multi = NULL;
1575 struct btrfs_device *device;
1576
1577 length = len;
1578 ret = btrfs_map_block(state->root->fs_info, READ,
1579 bytenr, &length, &multi, mirror_num);
1580
1581 if (ret) {
1582 block_ctx_out->start = 0;
1583 block_ctx_out->dev_bytenr = 0;
1584 block_ctx_out->len = 0;
1585 block_ctx_out->dev = NULL;
1586 block_ctx_out->datav = NULL;
1587 block_ctx_out->pagev = NULL;
1588 block_ctx_out->mem_to_free = NULL;
1589
1590 return ret;
1591 }
1592
1593 device = multi->stripes[0].dev;
1594 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1595 block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1596 block_ctx_out->start = bytenr;
1597 block_ctx_out->len = len;
1598 block_ctx_out->datav = NULL;
1599 block_ctx_out->pagev = NULL;
1600 block_ctx_out->mem_to_free = NULL;
1601
1602 kfree(multi);
1603 if (NULL == block_ctx_out->dev) {
1604 ret = -ENXIO;
1605 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1606 }
1607
1608 return ret;
1609 }
1610
1611 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1612 {
1613 if (block_ctx->mem_to_free) {
1614 unsigned int num_pages;
1615
1616 BUG_ON(!block_ctx->datav);
1617 BUG_ON(!block_ctx->pagev);
1618 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1619 PAGE_CACHE_SHIFT;
1620 while (num_pages > 0) {
1621 num_pages--;
1622 if (block_ctx->datav[num_pages]) {
1623 kunmap(block_ctx->pagev[num_pages]);
1624 block_ctx->datav[num_pages] = NULL;
1625 }
1626 if (block_ctx->pagev[num_pages]) {
1627 __free_page(block_ctx->pagev[num_pages]);
1628 block_ctx->pagev[num_pages] = NULL;
1629 }
1630 }
1631
1632 kfree(block_ctx->mem_to_free);
1633 block_ctx->mem_to_free = NULL;
1634 block_ctx->pagev = NULL;
1635 block_ctx->datav = NULL;
1636 }
1637 }
1638
1639 static int btrfsic_read_block(struct btrfsic_state *state,
1640 struct btrfsic_block_data_ctx *block_ctx)
1641 {
1642 unsigned int num_pages;
1643 unsigned int i;
1644 u64 dev_bytenr;
1645 int ret;
1646
1647 BUG_ON(block_ctx->datav);
1648 BUG_ON(block_ctx->pagev);
1649 BUG_ON(block_ctx->mem_to_free);
1650 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1651 printk(KERN_INFO
1652 "btrfsic: read_block() with unaligned bytenr %llu\n",
1653 block_ctx->dev_bytenr);
1654 return -1;
1655 }
1656
1657 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1658 PAGE_CACHE_SHIFT;
1659 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1660 sizeof(*block_ctx->pagev)) *
1661 num_pages, GFP_NOFS);
1662 if (!block_ctx->mem_to_free)
1663 return -1;
1664 block_ctx->datav = block_ctx->mem_to_free;
1665 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1666 for (i = 0; i < num_pages; i++) {
1667 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1668 if (!block_ctx->pagev[i])
1669 return -1;
1670 }
1671
1672 dev_bytenr = block_ctx->dev_bytenr;
1673 for (i = 0; i < num_pages;) {
1674 struct bio *bio;
1675 unsigned int j;
1676
1677 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
1678 if (!bio) {
1679 printk(KERN_INFO
1680 "btrfsic: bio_alloc() for %u pages failed!\n",
1681 num_pages - i);
1682 return -1;
1683 }
1684 bio->bi_bdev = block_ctx->dev->bdev;
1685 bio->bi_iter.bi_sector = dev_bytenr >> 9;
1686
1687 for (j = i; j < num_pages; j++) {
1688 ret = bio_add_page(bio, block_ctx->pagev[j],
1689 PAGE_CACHE_SIZE, 0);
1690 if (PAGE_CACHE_SIZE != ret)
1691 break;
1692 }
1693 if (j == i) {
1694 printk(KERN_INFO
1695 "btrfsic: error, failed to add a single page!\n");
1696 return -1;
1697 }
1698 if (submit_bio_wait(READ, bio)) {
1699 printk(KERN_INFO
1700 "btrfsic: read error at logical %llu dev %s!\n",
1701 block_ctx->start, block_ctx->dev->name);
1702 bio_put(bio);
1703 return -1;
1704 }
1705 bio_put(bio);
1706 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1707 i = j;
1708 }
1709 for (i = 0; i < num_pages; i++) {
1710 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1711 if (!block_ctx->datav[i]) {
1712 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1713 block_ctx->dev->name);
1714 return -1;
1715 }
1716 }
1717
1718 return block_ctx->len;
1719 }
1720
1721 static void btrfsic_dump_database(struct btrfsic_state *state)
1722 {
1723 struct list_head *elem_all;
1724
1725 BUG_ON(NULL == state);
1726
1727 printk(KERN_INFO "all_blocks_list:\n");
1728 list_for_each(elem_all, &state->all_blocks_list) {
1729 const struct btrfsic_block *const b_all =
1730 list_entry(elem_all, struct btrfsic_block,
1731 all_blocks_node);
1732 struct list_head *elem_ref_to;
1733 struct list_head *elem_ref_from;
1734
1735 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1736 btrfsic_get_block_type(state, b_all),
1737 b_all->logical_bytenr, b_all->dev_state->name,
1738 b_all->dev_bytenr, b_all->mirror_num);
1739
1740 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1741 const struct btrfsic_block_link *const l =
1742 list_entry(elem_ref_to,
1743 struct btrfsic_block_link,
1744 node_ref_to);
1745
1746 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1747 " refers %u* to"
1748 " %c @%llu (%s/%llu/%d)\n",
1749 btrfsic_get_block_type(state, b_all),
1750 b_all->logical_bytenr, b_all->dev_state->name,
1751 b_all->dev_bytenr, b_all->mirror_num,
1752 l->ref_cnt,
1753 btrfsic_get_block_type(state, l->block_ref_to),
1754 l->block_ref_to->logical_bytenr,
1755 l->block_ref_to->dev_state->name,
1756 l->block_ref_to->dev_bytenr,
1757 l->block_ref_to->mirror_num);
1758 }
1759
1760 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1761 const struct btrfsic_block_link *const l =
1762 list_entry(elem_ref_from,
1763 struct btrfsic_block_link,
1764 node_ref_from);
1765
1766 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1767 " is ref %u* from"
1768 " %c @%llu (%s/%llu/%d)\n",
1769 btrfsic_get_block_type(state, b_all),
1770 b_all->logical_bytenr, b_all->dev_state->name,
1771 b_all->dev_bytenr, b_all->mirror_num,
1772 l->ref_cnt,
1773 btrfsic_get_block_type(state, l->block_ref_from),
1774 l->block_ref_from->logical_bytenr,
1775 l->block_ref_from->dev_state->name,
1776 l->block_ref_from->dev_bytenr,
1777 l->block_ref_from->mirror_num);
1778 }
1779
1780 printk(KERN_INFO "\n");
1781 }
1782 }
1783
1784 /*
1785 * Test whether the disk block contains a tree block (leaf or node)
1786 * (note that this test fails for the super block)
1787 */
1788 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1789 char **datav, unsigned int num_pages)
1790 {
1791 struct btrfs_header *h;
1792 u8 csum[BTRFS_CSUM_SIZE];
1793 u32 crc = ~(u32)0;
1794 unsigned int i;
1795
1796 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1797 return 1; /* not metadata */
1798 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1799 h = (struct btrfs_header *)datav[0];
1800
1801 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1802 return 1;
1803
1804 for (i = 0; i < num_pages; i++) {
1805 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1806 size_t sublen = i ? PAGE_CACHE_SIZE :
1807 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1808
1809 crc = btrfs_crc32c(crc, data, sublen);
1810 }
1811 btrfs_csum_final(crc, csum);
1812 if (memcmp(csum, h->csum, state->csum_size))
1813 return 1;
1814
1815 return 0; /* is metadata */
1816 }
1817
1818 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1819 u64 dev_bytenr, char **mapped_datav,
1820 unsigned int num_pages,
1821 struct bio *bio, int *bio_is_patched,
1822 struct buffer_head *bh,
1823 int submit_bio_bh_rw)
1824 {
1825 int is_metadata;
1826 struct btrfsic_block *block;
1827 struct btrfsic_block_data_ctx block_ctx;
1828 int ret;
1829 struct btrfsic_state *state = dev_state->state;
1830 struct block_device *bdev = dev_state->bdev;
1831 unsigned int processed_len;
1832
1833 if (NULL != bio_is_patched)
1834 *bio_is_patched = 0;
1835
1836 again:
1837 if (num_pages == 0)
1838 return;
1839
1840 processed_len = 0;
1841 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1842 num_pages));
1843
1844 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1845 &state->block_hashtable);
1846 if (NULL != block) {
1847 u64 bytenr = 0;
1848 struct list_head *elem_ref_to;
1849 struct list_head *tmp_ref_to;
1850
1851 if (block->is_superblock) {
1852 bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1853 mapped_datav[0]);
1854 if (num_pages * PAGE_CACHE_SIZE <
1855 BTRFS_SUPER_INFO_SIZE) {
1856 printk(KERN_INFO
1857 "btrfsic: cannot work with too short bios!\n");
1858 return;
1859 }
1860 is_metadata = 1;
1861 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1862 processed_len = BTRFS_SUPER_INFO_SIZE;
1863 if (state->print_mask &
1864 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1865 printk(KERN_INFO
1866 "[before new superblock is written]:\n");
1867 btrfsic_dump_tree_sub(state, block, 0);
1868 }
1869 }
1870 if (is_metadata) {
1871 if (!block->is_superblock) {
1872 if (num_pages * PAGE_CACHE_SIZE <
1873 state->metablock_size) {
1874 printk(KERN_INFO
1875 "btrfsic: cannot work with too short bios!\n");
1876 return;
1877 }
1878 processed_len = state->metablock_size;
1879 bytenr = btrfs_stack_header_bytenr(
1880 (struct btrfs_header *)
1881 mapped_datav[0]);
1882 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1883 dev_state,
1884 dev_bytenr);
1885 }
1886 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1887 if (block->logical_bytenr != bytenr &&
1888 !(!block->is_metadata &&
1889 block->logical_bytenr == 0))
1890 printk(KERN_INFO
1891 "Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1892 bytenr, dev_state->name,
1893 dev_bytenr,
1894 block->mirror_num,
1895 btrfsic_get_block_type(state,
1896 block),
1897 block->logical_bytenr);
1898 else
1899 printk(KERN_INFO
1900 "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1901 bytenr, dev_state->name,
1902 dev_bytenr, block->mirror_num,
1903 btrfsic_get_block_type(state,
1904 block));
1905 }
1906 block->logical_bytenr = bytenr;
1907 } else {
1908 if (num_pages * PAGE_CACHE_SIZE <
1909 state->datablock_size) {
1910 printk(KERN_INFO
1911 "btrfsic: cannot work with too short bios!\n");
1912 return;
1913 }
1914 processed_len = state->datablock_size;
1915 bytenr = block->logical_bytenr;
1916 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1917 printk(KERN_INFO
1918 "Written block @%llu (%s/%llu/%d)"
1919 " found in hash table, %c.\n",
1920 bytenr, dev_state->name, dev_bytenr,
1921 block->mirror_num,
1922 btrfsic_get_block_type(state, block));
1923 }
1924
1925 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1926 printk(KERN_INFO
1927 "ref_to_list: %cE, ref_from_list: %cE\n",
1928 list_empty(&block->ref_to_list) ? ' ' : '!',
1929 list_empty(&block->ref_from_list) ? ' ' : '!');
1930 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1931 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1932 " @%llu (%s/%llu/%d), old(gen=%llu,"
1933 " objectid=%llu, type=%d, offset=%llu),"
1934 " new(gen=%llu),"
1935 " which is referenced by most recent superblock"
1936 " (superblockgen=%llu)!\n",
1937 btrfsic_get_block_type(state, block), bytenr,
1938 dev_state->name, dev_bytenr, block->mirror_num,
1939 block->generation,
1940 btrfs_disk_key_objectid(&block->disk_key),
1941 block->disk_key.type,
1942 btrfs_disk_key_offset(&block->disk_key),
1943 btrfs_stack_header_generation(
1944 (struct btrfs_header *) mapped_datav[0]),
1945 state->max_superblock_generation);
1946 btrfsic_dump_tree(state);
1947 }
1948
1949 if (!block->is_iodone && !block->never_written) {
1950 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1951 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1952 " which is not yet iodone!\n",
1953 btrfsic_get_block_type(state, block), bytenr,
1954 dev_state->name, dev_bytenr, block->mirror_num,
1955 block->generation,
1956 btrfs_stack_header_generation(
1957 (struct btrfs_header *)
1958 mapped_datav[0]));
1959 /* it would not be safe to go on */
1960 btrfsic_dump_tree(state);
1961 goto continue_loop;
1962 }
1963
1964 /*
1965 * Clear all references of this block. Do not free
1966 * the block itself even if is not referenced anymore
1967 * because it still carries valueable information
1968 * like whether it was ever written and IO completed.
1969 */
1970 list_for_each_safe(elem_ref_to, tmp_ref_to,
1971 &block->ref_to_list) {
1972 struct btrfsic_block_link *const l =
1973 list_entry(elem_ref_to,
1974 struct btrfsic_block_link,
1975 node_ref_to);
1976
1977 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1978 btrfsic_print_rem_link(state, l);
1979 l->ref_cnt--;
1980 if (0 == l->ref_cnt) {
1981 list_del(&l->node_ref_to);
1982 list_del(&l->node_ref_from);
1983 btrfsic_block_link_hashtable_remove(l);
1984 btrfsic_block_link_free(l);
1985 }
1986 }
1987
1988 block_ctx.dev = dev_state;
1989 block_ctx.dev_bytenr = dev_bytenr;
1990 block_ctx.start = bytenr;
1991 block_ctx.len = processed_len;
1992 block_ctx.pagev = NULL;
1993 block_ctx.mem_to_free = NULL;
1994 block_ctx.datav = mapped_datav;
1995
1996 if (is_metadata || state->include_extent_data) {
1997 block->never_written = 0;
1998 block->iodone_w_error = 0;
1999 if (NULL != bio) {
2000 block->is_iodone = 0;
2001 BUG_ON(NULL == bio_is_patched);
2002 if (!*bio_is_patched) {
2003 block->orig_bio_bh_private =
2004 bio->bi_private;
2005 block->orig_bio_bh_end_io.bio =
2006 bio->bi_end_io;
2007 block->next_in_same_bio = NULL;
2008 bio->bi_private = block;
2009 bio->bi_end_io = btrfsic_bio_end_io;
2010 *bio_is_patched = 1;
2011 } else {
2012 struct btrfsic_block *chained_block =
2013 (struct btrfsic_block *)
2014 bio->bi_private;
2015
2016 BUG_ON(NULL == chained_block);
2017 block->orig_bio_bh_private =
2018 chained_block->orig_bio_bh_private;
2019 block->orig_bio_bh_end_io.bio =
2020 chained_block->orig_bio_bh_end_io.
2021 bio;
2022 block->next_in_same_bio = chained_block;
2023 bio->bi_private = block;
2024 }
2025 } else if (NULL != bh) {
2026 block->is_iodone = 0;
2027 block->orig_bio_bh_private = bh->b_private;
2028 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2029 block->next_in_same_bio = NULL;
2030 bh->b_private = block;
2031 bh->b_end_io = btrfsic_bh_end_io;
2032 } else {
2033 block->is_iodone = 1;
2034 block->orig_bio_bh_private = NULL;
2035 block->orig_bio_bh_end_io.bio = NULL;
2036 block->next_in_same_bio = NULL;
2037 }
2038 }
2039
2040 block->flush_gen = dev_state->last_flush_gen + 1;
2041 block->submit_bio_bh_rw = submit_bio_bh_rw;
2042 if (is_metadata) {
2043 block->logical_bytenr = bytenr;
2044 block->is_metadata = 1;
2045 if (block->is_superblock) {
2046 BUG_ON(PAGE_CACHE_SIZE !=
2047 BTRFS_SUPER_INFO_SIZE);
2048 ret = btrfsic_process_written_superblock(
2049 state,
2050 block,
2051 (struct btrfs_super_block *)
2052 mapped_datav[0]);
2053 if (state->print_mask &
2054 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2055 printk(KERN_INFO
2056 "[after new superblock is written]:\n");
2057 btrfsic_dump_tree_sub(state, block, 0);
2058 }
2059 } else {
2060 block->mirror_num = 0; /* unknown */
2061 ret = btrfsic_process_metablock(
2062 state,
2063 block,
2064 &block_ctx,
2065 0, 0);
2066 }
2067 if (ret)
2068 printk(KERN_INFO
2069 "btrfsic: btrfsic_process_metablock"
2070 "(root @%llu) failed!\n",
2071 dev_bytenr);
2072 } else {
2073 block->is_metadata = 0;
2074 block->mirror_num = 0; /* unknown */
2075 block->generation = BTRFSIC_GENERATION_UNKNOWN;
2076 if (!state->include_extent_data
2077 && list_empty(&block->ref_from_list)) {
2078 /*
2079 * disk block is overwritten with extent
2080 * data (not meta data) and we are configured
2081 * to not include extent data: take the
2082 * chance and free the block's memory
2083 */
2084 btrfsic_block_hashtable_remove(block);
2085 list_del(&block->all_blocks_node);
2086 btrfsic_block_free(block);
2087 }
2088 }
2089 btrfsic_release_block_ctx(&block_ctx);
2090 } else {
2091 /* block has not been found in hash table */
2092 u64 bytenr;
2093
2094 if (!is_metadata) {
2095 processed_len = state->datablock_size;
2096 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2097 printk(KERN_INFO "Written block (%s/%llu/?)"
2098 " !found in hash table, D.\n",
2099 dev_state->name, dev_bytenr);
2100 if (!state->include_extent_data) {
2101 /* ignore that written D block */
2102 goto continue_loop;
2103 }
2104
2105 /* this is getting ugly for the
2106 * include_extent_data case... */
2107 bytenr = 0; /* unknown */
2108 } else {
2109 processed_len = state->metablock_size;
2110 bytenr = btrfs_stack_header_bytenr(
2111 (struct btrfs_header *)
2112 mapped_datav[0]);
2113 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2114 dev_bytenr);
2115 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2116 printk(KERN_INFO
2117 "Written block @%llu (%s/%llu/?)"
2118 " !found in hash table, M.\n",
2119 bytenr, dev_state->name, dev_bytenr);
2120 }
2121
2122 block_ctx.dev = dev_state;
2123 block_ctx.dev_bytenr = dev_bytenr;
2124 block_ctx.start = bytenr;
2125 block_ctx.len = processed_len;
2126 block_ctx.pagev = NULL;
2127 block_ctx.mem_to_free = NULL;
2128 block_ctx.datav = mapped_datav;
2129
2130 block = btrfsic_block_alloc();
2131 if (NULL == block) {
2132 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2133 btrfsic_release_block_ctx(&block_ctx);
2134 goto continue_loop;
2135 }
2136 block->dev_state = dev_state;
2137 block->dev_bytenr = dev_bytenr;
2138 block->logical_bytenr = bytenr;
2139 block->is_metadata = is_metadata;
2140 block->never_written = 0;
2141 block->iodone_w_error = 0;
2142 block->mirror_num = 0; /* unknown */
2143 block->flush_gen = dev_state->last_flush_gen + 1;
2144 block->submit_bio_bh_rw = submit_bio_bh_rw;
2145 if (NULL != bio) {
2146 block->is_iodone = 0;
2147 BUG_ON(NULL == bio_is_patched);
2148 if (!*bio_is_patched) {
2149 block->orig_bio_bh_private = bio->bi_private;
2150 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2151 block->next_in_same_bio = NULL;
2152 bio->bi_private = block;
2153 bio->bi_end_io = btrfsic_bio_end_io;
2154 *bio_is_patched = 1;
2155 } else {
2156 struct btrfsic_block *chained_block =
2157 (struct btrfsic_block *)
2158 bio->bi_private;
2159
2160 BUG_ON(NULL == chained_block);
2161 block->orig_bio_bh_private =
2162 chained_block->orig_bio_bh_private;
2163 block->orig_bio_bh_end_io.bio =
2164 chained_block->orig_bio_bh_end_io.bio;
2165 block->next_in_same_bio = chained_block;
2166 bio->bi_private = block;
2167 }
2168 } else if (NULL != bh) {
2169 block->is_iodone = 0;
2170 block->orig_bio_bh_private = bh->b_private;
2171 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2172 block->next_in_same_bio = NULL;
2173 bh->b_private = block;
2174 bh->b_end_io = btrfsic_bh_end_io;
2175 } else {
2176 block->is_iodone = 1;
2177 block->orig_bio_bh_private = NULL;
2178 block->orig_bio_bh_end_io.bio = NULL;
2179 block->next_in_same_bio = NULL;
2180 }
2181 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2182 printk(KERN_INFO
2183 "New written %c-block @%llu (%s/%llu/%d)\n",
2184 is_metadata ? 'M' : 'D',
2185 block->logical_bytenr, block->dev_state->name,
2186 block->dev_bytenr, block->mirror_num);
2187 list_add(&block->all_blocks_node, &state->all_blocks_list);
2188 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2189
2190 if (is_metadata) {
2191 ret = btrfsic_process_metablock(state, block,
2192 &block_ctx, 0, 0);
2193 if (ret)
2194 printk(KERN_INFO
2195 "btrfsic: process_metablock(root @%llu)"
2196 " failed!\n",
2197 dev_bytenr);
2198 }
2199 btrfsic_release_block_ctx(&block_ctx);
2200 }
2201
2202 continue_loop:
2203 BUG_ON(!processed_len);
2204 dev_bytenr += processed_len;
2205 mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2206 num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2207 goto again;
2208 }
2209
2210 static void btrfsic_bio_end_io(struct bio *bp)
2211 {
2212 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2213 int iodone_w_error;
2214
2215 /* mutex is not held! This is not save if IO is not yet completed
2216 * on umount */
2217 iodone_w_error = 0;
2218 if (bp->bi_error)
2219 iodone_w_error = 1;
2220
2221 BUG_ON(NULL == block);
2222 bp->bi_private = block->orig_bio_bh_private;
2223 bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2224
2225 do {
2226 struct btrfsic_block *next_block;
2227 struct btrfsic_dev_state *const dev_state = block->dev_state;
2228
2229 if ((dev_state->state->print_mask &
2230 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2231 printk(KERN_INFO
2232 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2233 bp->bi_error,
2234 btrfsic_get_block_type(dev_state->state, block),
2235 block->logical_bytenr, dev_state->name,
2236 block->dev_bytenr, block->mirror_num);
2237 next_block = block->next_in_same_bio;
2238 block->iodone_w_error = iodone_w_error;
2239 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2240 dev_state->last_flush_gen++;
2241 if ((dev_state->state->print_mask &
2242 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2243 printk(KERN_INFO
2244 "bio_end_io() new %s flush_gen=%llu\n",
2245 dev_state->name,
2246 dev_state->last_flush_gen);
2247 }
2248 if (block->submit_bio_bh_rw & REQ_FUA)
2249 block->flush_gen = 0; /* FUA completed means block is
2250 * on disk */
2251 block->is_iodone = 1; /* for FLUSH, this releases the block */
2252 block = next_block;
2253 } while (NULL != block);
2254
2255 bp->bi_end_io(bp);
2256 }
2257
2258 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2259 {
2260 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2261 int iodone_w_error = !uptodate;
2262 struct btrfsic_dev_state *dev_state;
2263
2264 BUG_ON(NULL == block);
2265 dev_state = block->dev_state;
2266 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2267 printk(KERN_INFO
2268 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2269 iodone_w_error,
2270 btrfsic_get_block_type(dev_state->state, block),
2271 block->logical_bytenr, block->dev_state->name,
2272 block->dev_bytenr, block->mirror_num);
2273
2274 block->iodone_w_error = iodone_w_error;
2275 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2276 dev_state->last_flush_gen++;
2277 if ((dev_state->state->print_mask &
2278 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2279 printk(KERN_INFO
2280 "bh_end_io() new %s flush_gen=%llu\n",
2281 dev_state->name, dev_state->last_flush_gen);
2282 }
2283 if (block->submit_bio_bh_rw & REQ_FUA)
2284 block->flush_gen = 0; /* FUA completed means block is on disk */
2285
2286 bh->b_private = block->orig_bio_bh_private;
2287 bh->b_end_io = block->orig_bio_bh_end_io.bh;
2288 block->is_iodone = 1; /* for FLUSH, this releases the block */
2289 bh->b_end_io(bh, uptodate);
2290 }
2291
2292 static int btrfsic_process_written_superblock(
2293 struct btrfsic_state *state,
2294 struct btrfsic_block *const superblock,
2295 struct btrfs_super_block *const super_hdr)
2296 {
2297 int pass;
2298
2299 superblock->generation = btrfs_super_generation(super_hdr);
2300 if (!(superblock->generation > state->max_superblock_generation ||
2301 0 == state->max_superblock_generation)) {
2302 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2303 printk(KERN_INFO
2304 "btrfsic: superblock @%llu (%s/%llu/%d)"
2305 " with old gen %llu <= %llu\n",
2306 superblock->logical_bytenr,
2307 superblock->dev_state->name,
2308 superblock->dev_bytenr, superblock->mirror_num,
2309 btrfs_super_generation(super_hdr),
2310 state->max_superblock_generation);
2311 } else {
2312 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2313 printk(KERN_INFO
2314 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2315 " with new gen %llu > %llu\n",
2316 superblock->logical_bytenr,
2317 superblock->dev_state->name,
2318 superblock->dev_bytenr, superblock->mirror_num,
2319 btrfs_super_generation(super_hdr),
2320 state->max_superblock_generation);
2321
2322 state->max_superblock_generation =
2323 btrfs_super_generation(super_hdr);
2324 state->latest_superblock = superblock;
2325 }
2326
2327 for (pass = 0; pass < 3; pass++) {
2328 int ret;
2329 u64 next_bytenr;
2330 struct btrfsic_block *next_block;
2331 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2332 struct btrfsic_block_link *l;
2333 int num_copies;
2334 int mirror_num;
2335 const char *additional_string = NULL;
2336 struct btrfs_disk_key tmp_disk_key = {0};
2337
2338 btrfs_set_disk_key_objectid(&tmp_disk_key,
2339 BTRFS_ROOT_ITEM_KEY);
2340 btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
2341
2342 switch (pass) {
2343 case 0:
2344 btrfs_set_disk_key_objectid(&tmp_disk_key,
2345 BTRFS_ROOT_TREE_OBJECTID);
2346 additional_string = "root ";
2347 next_bytenr = btrfs_super_root(super_hdr);
2348 if (state->print_mask &
2349 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2350 printk(KERN_INFO "root@%llu\n", next_bytenr);
2351 break;
2352 case 1:
2353 btrfs_set_disk_key_objectid(&tmp_disk_key,
2354 BTRFS_CHUNK_TREE_OBJECTID);
2355 additional_string = "chunk ";
2356 next_bytenr = btrfs_super_chunk_root(super_hdr);
2357 if (state->print_mask &
2358 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2359 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
2360 break;
2361 case 2:
2362 btrfs_set_disk_key_objectid(&tmp_disk_key,
2363 BTRFS_TREE_LOG_OBJECTID);
2364 additional_string = "log ";
2365 next_bytenr = btrfs_super_log_root(super_hdr);
2366 if (0 == next_bytenr)
2367 continue;
2368 if (state->print_mask &
2369 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2370 printk(KERN_INFO "log@%llu\n", next_bytenr);
2371 break;
2372 }
2373
2374 num_copies =
2375 btrfs_num_copies(state->root->fs_info,
2376 next_bytenr, BTRFS_SUPER_INFO_SIZE);
2377 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2378 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2379 next_bytenr, num_copies);
2380 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2381 int was_created;
2382
2383 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2384 printk(KERN_INFO
2385 "btrfsic_process_written_superblock("
2386 "mirror_num=%d)\n", mirror_num);
2387 ret = btrfsic_map_block(state, next_bytenr,
2388 BTRFS_SUPER_INFO_SIZE,
2389 &tmp_next_block_ctx,
2390 mirror_num);
2391 if (ret) {
2392 printk(KERN_INFO
2393 "btrfsic: btrfsic_map_block(@%llu,"
2394 " mirror=%d) failed!\n",
2395 next_bytenr, mirror_num);
2396 return -1;
2397 }
2398
2399 next_block = btrfsic_block_lookup_or_add(
2400 state,
2401 &tmp_next_block_ctx,
2402 additional_string,
2403 1, 0, 1,
2404 mirror_num,
2405 &was_created);
2406 if (NULL == next_block) {
2407 printk(KERN_INFO
2408 "btrfsic: error, kmalloc failed!\n");
2409 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2410 return -1;
2411 }
2412
2413 next_block->disk_key = tmp_disk_key;
2414 if (was_created)
2415 next_block->generation =
2416 BTRFSIC_GENERATION_UNKNOWN;
2417 l = btrfsic_block_link_lookup_or_add(
2418 state,
2419 &tmp_next_block_ctx,
2420 next_block,
2421 superblock,
2422 BTRFSIC_GENERATION_UNKNOWN);
2423 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2424 if (NULL == l)
2425 return -1;
2426 }
2427 }
2428
2429 if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
2430 btrfsic_dump_tree(state);
2431
2432 return 0;
2433 }
2434
2435 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2436 struct btrfsic_block *const block,
2437 int recursion_level)
2438 {
2439 struct list_head *elem_ref_to;
2440 int ret = 0;
2441
2442 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2443 /*
2444 * Note that this situation can happen and does not
2445 * indicate an error in regular cases. It happens
2446 * when disk blocks are freed and later reused.
2447 * The check-integrity module is not aware of any
2448 * block free operations, it just recognizes block
2449 * write operations. Therefore it keeps the linkage
2450 * information for a block until a block is
2451 * rewritten. This can temporarily cause incorrect
2452 * and even circular linkage informations. This
2453 * causes no harm unless such blocks are referenced
2454 * by the most recent super block.
2455 */
2456 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2457 printk(KERN_INFO
2458 "btrfsic: abort cyclic linkage (case 1).\n");
2459
2460 return ret;
2461 }
2462
2463 /*
2464 * This algorithm is recursive because the amount of used stack
2465 * space is very small and the max recursion depth is limited.
2466 */
2467 list_for_each(elem_ref_to, &block->ref_to_list) {
2468 const struct btrfsic_block_link *const l =
2469 list_entry(elem_ref_to, struct btrfsic_block_link,
2470 node_ref_to);
2471
2472 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2473 printk(KERN_INFO
2474 "rl=%d, %c @%llu (%s/%llu/%d)"
2475 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2476 recursion_level,
2477 btrfsic_get_block_type(state, block),
2478 block->logical_bytenr, block->dev_state->name,
2479 block->dev_bytenr, block->mirror_num,
2480 l->ref_cnt,
2481 btrfsic_get_block_type(state, l->block_ref_to),
2482 l->block_ref_to->logical_bytenr,
2483 l->block_ref_to->dev_state->name,
2484 l->block_ref_to->dev_bytenr,
2485 l->block_ref_to->mirror_num);
2486 if (l->block_ref_to->never_written) {
2487 printk(KERN_INFO "btrfs: attempt to write superblock"
2488 " which references block %c @%llu (%s/%llu/%d)"
2489 " which is never written!\n",
2490 btrfsic_get_block_type(state, l->block_ref_to),
2491 l->block_ref_to->logical_bytenr,
2492 l->block_ref_to->dev_state->name,
2493 l->block_ref_to->dev_bytenr,
2494 l->block_ref_to->mirror_num);
2495 ret = -1;
2496 } else if (!l->block_ref_to->is_iodone) {
2497 printk(KERN_INFO "btrfs: attempt to write superblock"
2498 " which references block %c @%llu (%s/%llu/%d)"
2499 " which is not yet iodone!\n",
2500 btrfsic_get_block_type(state, l->block_ref_to),
2501 l->block_ref_to->logical_bytenr,
2502 l->block_ref_to->dev_state->name,
2503 l->block_ref_to->dev_bytenr,
2504 l->block_ref_to->mirror_num);
2505 ret = -1;
2506 } else if (l->block_ref_to->iodone_w_error) {
2507 printk(KERN_INFO "btrfs: attempt to write superblock"
2508 " which references block %c @%llu (%s/%llu/%d)"
2509 " which has write error!\n",
2510 btrfsic_get_block_type(state, l->block_ref_to),
2511 l->block_ref_to->logical_bytenr,
2512 l->block_ref_to->dev_state->name,
2513 l->block_ref_to->dev_bytenr,
2514 l->block_ref_to->mirror_num);
2515 ret = -1;
2516 } else if (l->parent_generation !=
2517 l->block_ref_to->generation &&
2518 BTRFSIC_GENERATION_UNKNOWN !=
2519 l->parent_generation &&
2520 BTRFSIC_GENERATION_UNKNOWN !=
2521 l->block_ref_to->generation) {
2522 printk(KERN_INFO "btrfs: attempt to write superblock"
2523 " which references block %c @%llu (%s/%llu/%d)"
2524 " with generation %llu !="
2525 " parent generation %llu!\n",
2526 btrfsic_get_block_type(state, l->block_ref_to),
2527 l->block_ref_to->logical_bytenr,
2528 l->block_ref_to->dev_state->name,
2529 l->block_ref_to->dev_bytenr,
2530 l->block_ref_to->mirror_num,
2531 l->block_ref_to->generation,
2532 l->parent_generation);
2533 ret = -1;
2534 } else if (l->block_ref_to->flush_gen >
2535 l->block_ref_to->dev_state->last_flush_gen) {
2536 printk(KERN_INFO "btrfs: attempt to write superblock"
2537 " which references block %c @%llu (%s/%llu/%d)"
2538 " which is not flushed out of disk's write cache"
2539 " (block flush_gen=%llu,"
2540 " dev->flush_gen=%llu)!\n",
2541 btrfsic_get_block_type(state, l->block_ref_to),
2542 l->block_ref_to->logical_bytenr,
2543 l->block_ref_to->dev_state->name,
2544 l->block_ref_to->dev_bytenr,
2545 l->block_ref_to->mirror_num, block->flush_gen,
2546 l->block_ref_to->dev_state->last_flush_gen);
2547 ret = -1;
2548 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2549 l->block_ref_to,
2550 recursion_level +
2551 1)) {
2552 ret = -1;
2553 }
2554 }
2555
2556 return ret;
2557 }
2558
2559 static int btrfsic_is_block_ref_by_superblock(
2560 const struct btrfsic_state *state,
2561 const struct btrfsic_block *block,
2562 int recursion_level)
2563 {
2564 struct list_head *elem_ref_from;
2565
2566 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2567 /* refer to comment at "abort cyclic linkage (case 1)" */
2568 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2569 printk(KERN_INFO
2570 "btrfsic: abort cyclic linkage (case 2).\n");
2571
2572 return 0;
2573 }
2574
2575 /*
2576 * This algorithm is recursive because the amount of used stack space
2577 * is very small and the max recursion depth is limited.
2578 */
2579 list_for_each(elem_ref_from, &block->ref_from_list) {
2580 const struct btrfsic_block_link *const l =
2581 list_entry(elem_ref_from, struct btrfsic_block_link,
2582 node_ref_from);
2583
2584 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2585 printk(KERN_INFO
2586 "rl=%d, %c @%llu (%s/%llu/%d)"
2587 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2588 recursion_level,
2589 btrfsic_get_block_type(state, block),
2590 block->logical_bytenr, block->dev_state->name,
2591 block->dev_bytenr, block->mirror_num,
2592 l->ref_cnt,
2593 btrfsic_get_block_type(state, l->block_ref_from),
2594 l->block_ref_from->logical_bytenr,
2595 l->block_ref_from->dev_state->name,
2596 l->block_ref_from->dev_bytenr,
2597 l->block_ref_from->mirror_num);
2598 if (l->block_ref_from->is_superblock &&
2599 state->latest_superblock->dev_bytenr ==
2600 l->block_ref_from->dev_bytenr &&
2601 state->latest_superblock->dev_state->bdev ==
2602 l->block_ref_from->dev_state->bdev)
2603 return 1;
2604 else if (btrfsic_is_block_ref_by_superblock(state,
2605 l->block_ref_from,
2606 recursion_level +
2607 1))
2608 return 1;
2609 }
2610
2611 return 0;
2612 }
2613
2614 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2615 const struct btrfsic_block_link *l)
2616 {
2617 printk(KERN_INFO
2618 "Add %u* link from %c @%llu (%s/%llu/%d)"
2619 " to %c @%llu (%s/%llu/%d).\n",
2620 l->ref_cnt,
2621 btrfsic_get_block_type(state, l->block_ref_from),
2622 l->block_ref_from->logical_bytenr,
2623 l->block_ref_from->dev_state->name,
2624 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2625 btrfsic_get_block_type(state, l->block_ref_to),
2626 l->block_ref_to->logical_bytenr,
2627 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2628 l->block_ref_to->mirror_num);
2629 }
2630
2631 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2632 const struct btrfsic_block_link *l)
2633 {
2634 printk(KERN_INFO
2635 "Rem %u* link from %c @%llu (%s/%llu/%d)"
2636 " to %c @%llu (%s/%llu/%d).\n",
2637 l->ref_cnt,
2638 btrfsic_get_block_type(state, l->block_ref_from),
2639 l->block_ref_from->logical_bytenr,
2640 l->block_ref_from->dev_state->name,
2641 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2642 btrfsic_get_block_type(state, l->block_ref_to),
2643 l->block_ref_to->logical_bytenr,
2644 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2645 l->block_ref_to->mirror_num);
2646 }
2647
2648 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2649 const struct btrfsic_block *block)
2650 {
2651 if (block->is_superblock &&
2652 state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2653 state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2654 return 'S';
2655 else if (block->is_superblock)
2656 return 's';
2657 else if (block->is_metadata)
2658 return 'M';
2659 else
2660 return 'D';
2661 }
2662
2663 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2664 {
2665 btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2666 }
2667
2668 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2669 const struct btrfsic_block *block,
2670 int indent_level)
2671 {
2672 struct list_head *elem_ref_to;
2673 int indent_add;
2674 static char buf[80];
2675 int cursor_position;
2676
2677 /*
2678 * Should better fill an on-stack buffer with a complete line and
2679 * dump it at once when it is time to print a newline character.
2680 */
2681
2682 /*
2683 * This algorithm is recursive because the amount of used stack space
2684 * is very small and the max recursion depth is limited.
2685 */
2686 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2687 btrfsic_get_block_type(state, block),
2688 block->logical_bytenr, block->dev_state->name,
2689 block->dev_bytenr, block->mirror_num);
2690 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2691 printk("[...]\n");
2692 return;
2693 }
2694 printk(buf);
2695 indent_level += indent_add;
2696 if (list_empty(&block->ref_to_list)) {
2697 printk("\n");
2698 return;
2699 }
2700 if (block->mirror_num > 1 &&
2701 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2702 printk(" [...]\n");
2703 return;
2704 }
2705
2706 cursor_position = indent_level;
2707 list_for_each(elem_ref_to, &block->ref_to_list) {
2708 const struct btrfsic_block_link *const l =
2709 list_entry(elem_ref_to, struct btrfsic_block_link,
2710 node_ref_to);
2711
2712 while (cursor_position < indent_level) {
2713 printk(" ");
2714 cursor_position++;
2715 }
2716 if (l->ref_cnt > 1)
2717 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2718 else
2719 indent_add = sprintf(buf, " --> ");
2720 if (indent_level + indent_add >
2721 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2722 printk("[...]\n");
2723 cursor_position = 0;
2724 continue;
2725 }
2726
2727 printk(buf);
2728
2729 btrfsic_dump_tree_sub(state, l->block_ref_to,
2730 indent_level + indent_add);
2731 cursor_position = 0;
2732 }
2733 }
2734
2735 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2736 struct btrfsic_state *state,
2737 struct btrfsic_block_data_ctx *next_block_ctx,
2738 struct btrfsic_block *next_block,
2739 struct btrfsic_block *from_block,
2740 u64 parent_generation)
2741 {
2742 struct btrfsic_block_link *l;
2743
2744 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2745 next_block_ctx->dev_bytenr,
2746 from_block->dev_state->bdev,
2747 from_block->dev_bytenr,
2748 &state->block_link_hashtable);
2749 if (NULL == l) {
2750 l = btrfsic_block_link_alloc();
2751 if (NULL == l) {
2752 printk(KERN_INFO
2753 "btrfsic: error, kmalloc" " failed!\n");
2754 return NULL;
2755 }
2756
2757 l->block_ref_to = next_block;
2758 l->block_ref_from = from_block;
2759 l->ref_cnt = 1;
2760 l->parent_generation = parent_generation;
2761
2762 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2763 btrfsic_print_add_link(state, l);
2764
2765 list_add(&l->node_ref_to, &from_block->ref_to_list);
2766 list_add(&l->node_ref_from, &next_block->ref_from_list);
2767
2768 btrfsic_block_link_hashtable_add(l,
2769 &state->block_link_hashtable);
2770 } else {
2771 l->ref_cnt++;
2772 l->parent_generation = parent_generation;
2773 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2774 btrfsic_print_add_link(state, l);
2775 }
2776
2777 return l;
2778 }
2779
2780 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2781 struct btrfsic_state *state,
2782 struct btrfsic_block_data_ctx *block_ctx,
2783 const char *additional_string,
2784 int is_metadata,
2785 int is_iodone,
2786 int never_written,
2787 int mirror_num,
2788 int *was_created)
2789 {
2790 struct btrfsic_block *block;
2791
2792 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2793 block_ctx->dev_bytenr,
2794 &state->block_hashtable);
2795 if (NULL == block) {
2796 struct btrfsic_dev_state *dev_state;
2797
2798 block = btrfsic_block_alloc();
2799 if (NULL == block) {
2800 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2801 return NULL;
2802 }
2803 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2804 if (NULL == dev_state) {
2805 printk(KERN_INFO
2806 "btrfsic: error, lookup dev_state failed!\n");
2807 btrfsic_block_free(block);
2808 return NULL;
2809 }
2810 block->dev_state = dev_state;
2811 block->dev_bytenr = block_ctx->dev_bytenr;
2812 block->logical_bytenr = block_ctx->start;
2813 block->is_metadata = is_metadata;
2814 block->is_iodone = is_iodone;
2815 block->never_written = never_written;
2816 block->mirror_num = mirror_num;
2817 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2818 printk(KERN_INFO
2819 "New %s%c-block @%llu (%s/%llu/%d)\n",
2820 additional_string,
2821 btrfsic_get_block_type(state, block),
2822 block->logical_bytenr, dev_state->name,
2823 block->dev_bytenr, mirror_num);
2824 list_add(&block->all_blocks_node, &state->all_blocks_list);
2825 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2826 if (NULL != was_created)
2827 *was_created = 1;
2828 } else {
2829 if (NULL != was_created)
2830 *was_created = 0;
2831 }
2832
2833 return block;
2834 }
2835
2836 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2837 u64 bytenr,
2838 struct btrfsic_dev_state *dev_state,
2839 u64 dev_bytenr)
2840 {
2841 int num_copies;
2842 int mirror_num;
2843 int ret;
2844 struct btrfsic_block_data_ctx block_ctx;
2845 int match = 0;
2846
2847 num_copies = btrfs_num_copies(state->root->fs_info,
2848 bytenr, state->metablock_size);
2849
2850 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2851 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2852 &block_ctx, mirror_num);
2853 if (ret) {
2854 printk(KERN_INFO "btrfsic:"
2855 " btrfsic_map_block(logical @%llu,"
2856 " mirror %d) failed!\n",
2857 bytenr, mirror_num);
2858 continue;
2859 }
2860
2861 if (dev_state->bdev == block_ctx.dev->bdev &&
2862 dev_bytenr == block_ctx.dev_bytenr) {
2863 match++;
2864 btrfsic_release_block_ctx(&block_ctx);
2865 break;
2866 }
2867 btrfsic_release_block_ctx(&block_ctx);
2868 }
2869
2870 if (WARN_ON(!match)) {
2871 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2872 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2873 " phys_bytenr=%llu)!\n",
2874 bytenr, dev_state->name, dev_bytenr);
2875 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2876 ret = btrfsic_map_block(state, bytenr,
2877 state->metablock_size,
2878 &block_ctx, mirror_num);
2879 if (ret)
2880 continue;
2881
2882 printk(KERN_INFO "Read logical bytenr @%llu maps to"
2883 " (%s/%llu/%d)\n",
2884 bytenr, block_ctx.dev->name,
2885 block_ctx.dev_bytenr, mirror_num);
2886 }
2887 }
2888 }
2889
2890 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2891 struct block_device *bdev)
2892 {
2893 struct btrfsic_dev_state *ds;
2894
2895 ds = btrfsic_dev_state_hashtable_lookup(bdev,
2896 &btrfsic_dev_state_hashtable);
2897 return ds;
2898 }
2899
2900 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2901 {
2902 struct btrfsic_dev_state *dev_state;
2903
2904 if (!btrfsic_is_initialized)
2905 return submit_bh(rw, bh);
2906
2907 mutex_lock(&btrfsic_mutex);
2908 /* since btrfsic_submit_bh() might also be called before
2909 * btrfsic_mount(), this might return NULL */
2910 dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
2911
2912 /* Only called to write the superblock (incl. FLUSH/FUA) */
2913 if (NULL != dev_state &&
2914 (rw & WRITE) && bh->b_size > 0) {
2915 u64 dev_bytenr;
2916
2917 dev_bytenr = 4096 * bh->b_blocknr;
2918 if (dev_state->state->print_mask &
2919 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2920 printk(KERN_INFO
2921 "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
2922 " size=%zu, data=%p, bdev=%p)\n",
2923 rw, (unsigned long long)bh->b_blocknr,
2924 dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
2925 btrfsic_process_written_block(dev_state, dev_bytenr,
2926 &bh->b_data, 1, NULL,
2927 NULL, bh, rw);
2928 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2929 if (dev_state->state->print_mask &
2930 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2931 printk(KERN_INFO
2932 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
2933 rw, bh->b_bdev);
2934 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2935 if ((dev_state->state->print_mask &
2936 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2937 BTRFSIC_PRINT_MASK_VERBOSE)))
2938 printk(KERN_INFO
2939 "btrfsic_submit_bh(%s) with FLUSH"
2940 " but dummy block already in use"
2941 " (ignored)!\n",
2942 dev_state->name);
2943 } else {
2944 struct btrfsic_block *const block =
2945 &dev_state->dummy_block_for_bio_bh_flush;
2946
2947 block->is_iodone = 0;
2948 block->never_written = 0;
2949 block->iodone_w_error = 0;
2950 block->flush_gen = dev_state->last_flush_gen + 1;
2951 block->submit_bio_bh_rw = rw;
2952 block->orig_bio_bh_private = bh->b_private;
2953 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2954 block->next_in_same_bio = NULL;
2955 bh->b_private = block;
2956 bh->b_end_io = btrfsic_bh_end_io;
2957 }
2958 }
2959 mutex_unlock(&btrfsic_mutex);
2960 return submit_bh(rw, bh);
2961 }
2962
2963 static void __btrfsic_submit_bio(int rw, struct bio *bio)
2964 {
2965 struct btrfsic_dev_state *dev_state;
2966
2967 if (!btrfsic_is_initialized)
2968 return;
2969
2970 mutex_lock(&btrfsic_mutex);
2971 /* since btrfsic_submit_bio() is also called before
2972 * btrfsic_mount(), this might return NULL */
2973 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
2974 if (NULL != dev_state &&
2975 (rw & WRITE) && NULL != bio->bi_io_vec) {
2976 unsigned int i;
2977 u64 dev_bytenr;
2978 u64 cur_bytenr;
2979 int bio_is_patched;
2980 char **mapped_datav;
2981
2982 dev_bytenr = 512 * bio->bi_iter.bi_sector;
2983 bio_is_patched = 0;
2984 if (dev_state->state->print_mask &
2985 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2986 printk(KERN_INFO
2987 "submit_bio(rw=0x%x, bi_vcnt=%u,"
2988 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
2989 rw, bio->bi_vcnt,
2990 (unsigned long long)bio->bi_iter.bi_sector,
2991 dev_bytenr, bio->bi_bdev);
2992
2993 mapped_datav = kmalloc_array(bio->bi_vcnt,
2994 sizeof(*mapped_datav), GFP_NOFS);
2995 if (!mapped_datav)
2996 goto leave;
2997 cur_bytenr = dev_bytenr;
2998 for (i = 0; i < bio->bi_vcnt; i++) {
2999 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3000 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3001 if (!mapped_datav[i]) {
3002 while (i > 0) {
3003 i--;
3004 kunmap(bio->bi_io_vec[i].bv_page);
3005 }
3006 kfree(mapped_datav);
3007 goto leave;
3008 }
3009 if (dev_state->state->print_mask &
3010 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
3011 printk(KERN_INFO
3012 "#%u: bytenr=%llu, len=%u, offset=%u\n",
3013 i, cur_bytenr, bio->bi_io_vec[i].bv_len,
3014 bio->bi_io_vec[i].bv_offset);
3015 cur_bytenr += bio->bi_io_vec[i].bv_len;
3016 }
3017 btrfsic_process_written_block(dev_state, dev_bytenr,
3018 mapped_datav, bio->bi_vcnt,
3019 bio, &bio_is_patched,
3020 NULL, rw);
3021 while (i > 0) {
3022 i--;
3023 kunmap(bio->bi_io_vec[i].bv_page);
3024 }
3025 kfree(mapped_datav);
3026 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3027 if (dev_state->state->print_mask &
3028 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3029 printk(KERN_INFO
3030 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3031 rw, bio->bi_bdev);
3032 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3033 if ((dev_state->state->print_mask &
3034 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3035 BTRFSIC_PRINT_MASK_VERBOSE)))
3036 printk(KERN_INFO
3037 "btrfsic_submit_bio(%s) with FLUSH"
3038 " but dummy block already in use"
3039 " (ignored)!\n",
3040 dev_state->name);
3041 } else {
3042 struct btrfsic_block *const block =
3043 &dev_state->dummy_block_for_bio_bh_flush;
3044
3045 block->is_iodone = 0;
3046 block->never_written = 0;
3047 block->iodone_w_error = 0;
3048 block->flush_gen = dev_state->last_flush_gen + 1;
3049 block->submit_bio_bh_rw = rw;
3050 block->orig_bio_bh_private = bio->bi_private;
3051 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3052 block->next_in_same_bio = NULL;
3053 bio->bi_private = block;
3054 bio->bi_end_io = btrfsic_bio_end_io;
3055 }
3056 }
3057 leave:
3058 mutex_unlock(&btrfsic_mutex);
3059 }
3060
3061 void btrfsic_submit_bio(int rw, struct bio *bio)
3062 {
3063 __btrfsic_submit_bio(rw, bio);
3064 submit_bio(rw, bio);
3065 }
3066
3067 int btrfsic_submit_bio_wait(int rw, struct bio *bio)
3068 {
3069 __btrfsic_submit_bio(rw, bio);
3070 return submit_bio_wait(rw, bio);
3071 }
3072
3073 int btrfsic_mount(struct btrfs_root *root,
3074 struct btrfs_fs_devices *fs_devices,
3075 int including_extent_data, u32 print_mask)
3076 {
3077 int ret;
3078 struct btrfsic_state *state;
3079 struct list_head *dev_head = &fs_devices->devices;
3080 struct btrfs_device *device;
3081
3082 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3083 printk(KERN_INFO
3084 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3085 root->nodesize, PAGE_CACHE_SIZE);
3086 return -1;
3087 }
3088 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3089 printk(KERN_INFO
3090 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3091 root->sectorsize, PAGE_CACHE_SIZE);
3092 return -1;
3093 }
3094 state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
3095 if (!state) {
3096 state = vzalloc(sizeof(*state));
3097 if (!state) {
3098 printk(KERN_INFO "btrfs check-integrity: vzalloc() failed!\n");
3099 return -1;
3100 }
3101 }
3102
3103 if (!btrfsic_is_initialized) {
3104 mutex_init(&btrfsic_mutex);
3105 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3106 btrfsic_is_initialized = 1;
3107 }
3108 mutex_lock(&btrfsic_mutex);
3109 state->root = root;
3110 state->print_mask = print_mask;
3111 state->include_extent_data = including_extent_data;
3112 state->csum_size = 0;
3113 state->metablock_size = root->nodesize;
3114 state->datablock_size = root->sectorsize;
3115 INIT_LIST_HEAD(&state->all_blocks_list);
3116 btrfsic_block_hashtable_init(&state->block_hashtable);
3117 btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3118 state->max_superblock_generation = 0;
3119 state->latest_superblock = NULL;
3120
3121 list_for_each_entry(device, dev_head, dev_list) {
3122 struct btrfsic_dev_state *ds;
3123 char *p;
3124
3125 if (!device->bdev || !device->name)
3126 continue;
3127
3128 ds = btrfsic_dev_state_alloc();
3129 if (NULL == ds) {
3130 printk(KERN_INFO
3131 "btrfs check-integrity: kmalloc() failed!\n");
3132 mutex_unlock(&btrfsic_mutex);
3133 return -1;
3134 }
3135 ds->bdev = device->bdev;
3136 ds->state = state;
3137 bdevname(ds->bdev, ds->name);
3138 ds->name[BDEVNAME_SIZE - 1] = '\0';
3139 for (p = ds->name; *p != '\0'; p++);
3140 while (p > ds->name && *p != '/')
3141 p--;
3142 if (*p == '/')
3143 p++;
3144 strlcpy(ds->name, p, sizeof(ds->name));
3145 btrfsic_dev_state_hashtable_add(ds,
3146 &btrfsic_dev_state_hashtable);
3147 }
3148
3149 ret = btrfsic_process_superblock(state, fs_devices);
3150 if (0 != ret) {
3151 mutex_unlock(&btrfsic_mutex);
3152 btrfsic_unmount(root, fs_devices);
3153 return ret;
3154 }
3155
3156 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3157 btrfsic_dump_database(state);
3158 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3159 btrfsic_dump_tree(state);
3160
3161 mutex_unlock(&btrfsic_mutex);
3162 return 0;
3163 }
3164
3165 void btrfsic_unmount(struct btrfs_root *root,
3166 struct btrfs_fs_devices *fs_devices)
3167 {
3168 struct list_head *elem_all;
3169 struct list_head *tmp_all;
3170 struct btrfsic_state *state;
3171 struct list_head *dev_head = &fs_devices->devices;
3172 struct btrfs_device *device;
3173
3174 if (!btrfsic_is_initialized)
3175 return;
3176
3177 mutex_lock(&btrfsic_mutex);
3178
3179 state = NULL;
3180 list_for_each_entry(device, dev_head, dev_list) {
3181 struct btrfsic_dev_state *ds;
3182
3183 if (!device->bdev || !device->name)
3184 continue;
3185
3186 ds = btrfsic_dev_state_hashtable_lookup(
3187 device->bdev,
3188 &btrfsic_dev_state_hashtable);
3189 if (NULL != ds) {
3190 state = ds->state;
3191 btrfsic_dev_state_hashtable_remove(ds);
3192 btrfsic_dev_state_free(ds);
3193 }
3194 }
3195
3196 if (NULL == state) {
3197 printk(KERN_INFO
3198 "btrfsic: error, cannot find state information"
3199 " on umount!\n");
3200 mutex_unlock(&btrfsic_mutex);
3201 return;
3202 }
3203
3204 /*
3205 * Don't care about keeping the lists' state up to date,
3206 * just free all memory that was allocated dynamically.
3207 * Free the blocks and the block_links.
3208 */
3209 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3210 struct btrfsic_block *const b_all =
3211 list_entry(elem_all, struct btrfsic_block,
3212 all_blocks_node);
3213 struct list_head *elem_ref_to;
3214 struct list_head *tmp_ref_to;
3215
3216 list_for_each_safe(elem_ref_to, tmp_ref_to,
3217 &b_all->ref_to_list) {
3218 struct btrfsic_block_link *const l =
3219 list_entry(elem_ref_to,
3220 struct btrfsic_block_link,
3221 node_ref_to);
3222
3223 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3224 btrfsic_print_rem_link(state, l);
3225
3226 l->ref_cnt--;
3227 if (0 == l->ref_cnt)
3228 btrfsic_block_link_free(l);
3229 }
3230
3231 if (b_all->is_iodone || b_all->never_written)
3232 btrfsic_block_free(b_all);
3233 else
3234 printk(KERN_INFO "btrfs: attempt to free %c-block"
3235 " @%llu (%s/%llu/%d) on umount which is"
3236 " not yet iodone!\n",
3237 btrfsic_get_block_type(state, b_all),
3238 b_all->logical_bytenr, b_all->dev_state->name,
3239 b_all->dev_bytenr, b_all->mirror_num);
3240 }
3241
3242 mutex_unlock(&btrfsic_mutex);
3243
3244 kvfree(state);
3245 }