]> git.proxmox.com Git - mirror_zfs.git/blob - cmd/zdb/zdb.c
zdb: dump_history can be improved
[mirror_zfs.git] / cmd / zdb / zdb.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2016 Nexenta Systems, Inc.
27 * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
28 * Copyright (c) 2015, 2017, Intel Corporation.
29 * Copyright (c) 2020 Datto Inc.
30 * Copyright (c) 2020, The FreeBSD Foundation [1]
31 *
32 * [1] Portions of this software were developed by Allan Jude
33 * under sponsorship from the FreeBSD Foundation.
34 * Copyright (c) 2021 Allan Jude
35 * Copyright (c) 2021 Toomas Soome <tsoome@me.com>
36 */
37
38 #include <stdio.h>
39 #include <unistd.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <sys/zfs_context.h>
43 #include <sys/spa.h>
44 #include <sys/spa_impl.h>
45 #include <sys/dmu.h>
46 #include <sys/zap.h>
47 #include <sys/fs/zfs.h>
48 #include <sys/zfs_znode.h>
49 #include <sys/zfs_sa.h>
50 #include <sys/sa.h>
51 #include <sys/sa_impl.h>
52 #include <sys/vdev.h>
53 #include <sys/vdev_impl.h>
54 #include <sys/metaslab_impl.h>
55 #include <sys/dmu_objset.h>
56 #include <sys/dsl_dir.h>
57 #include <sys/dsl_dataset.h>
58 #include <sys/dsl_pool.h>
59 #include <sys/dsl_bookmark.h>
60 #include <sys/dbuf.h>
61 #include <sys/zil.h>
62 #include <sys/zil_impl.h>
63 #include <sys/stat.h>
64 #include <sys/resource.h>
65 #include <sys/dmu_send.h>
66 #include <sys/dmu_traverse.h>
67 #include <sys/zio_checksum.h>
68 #include <sys/zio_compress.h>
69 #include <sys/zfs_fuid.h>
70 #include <sys/arc.h>
71 #include <sys/arc_impl.h>
72 #include <sys/ddt.h>
73 #include <sys/zfeature.h>
74 #include <sys/abd.h>
75 #include <sys/blkptr.h>
76 #include <sys/dsl_crypt.h>
77 #include <sys/dsl_scan.h>
78 #include <sys/btree.h>
79 #include <zfs_comutil.h>
80 #include <sys/zstd/zstd.h>
81
82 #include <libnvpair.h>
83 #include <libzutil.h>
84
85 #include "zdb.h"
86
87 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
88 zio_compress_table[(idx)].ci_name : "UNKNOWN")
89 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
90 zio_checksum_table[(idx)].ci_name : "UNKNOWN")
91 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
92 (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \
93 DMU_OT_ZAP_OTHER : \
94 (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
95 DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
96
97 static char *
98 zdb_ot_name(dmu_object_type_t type)
99 {
100 if (type < DMU_OT_NUMTYPES)
101 return (dmu_ot[type].ot_name);
102 else if ((type & DMU_OT_NEWTYPE) &&
103 ((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS))
104 return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name);
105 else
106 return ("UNKNOWN");
107 }
108
109 extern int reference_tracking_enable;
110 extern int zfs_recover;
111 extern unsigned long zfs_arc_meta_min, zfs_arc_meta_limit;
112 extern int zfs_vdev_async_read_max_active;
113 extern boolean_t spa_load_verify_dryrun;
114 extern int zfs_reconstruct_indirect_combinations_max;
115 extern int zfs_btree_verify_intensity;
116
117 static const char cmdname[] = "zdb";
118 uint8_t dump_opt[256];
119
120 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
121
122 uint64_t *zopt_metaslab = NULL;
123 static unsigned zopt_metaslab_args = 0;
124
125 typedef struct zopt_object_range {
126 uint64_t zor_obj_start;
127 uint64_t zor_obj_end;
128 uint64_t zor_flags;
129 } zopt_object_range_t;
130 zopt_object_range_t *zopt_object_ranges = NULL;
131 static unsigned zopt_object_args = 0;
132
133 static int flagbits[256];
134
135 #define ZOR_FLAG_PLAIN_FILE 0x0001
136 #define ZOR_FLAG_DIRECTORY 0x0002
137 #define ZOR_FLAG_SPACE_MAP 0x0004
138 #define ZOR_FLAG_ZAP 0x0008
139 #define ZOR_FLAG_ALL_TYPES -1
140 #define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \
141 ZOR_FLAG_DIRECTORY | \
142 ZOR_FLAG_SPACE_MAP | \
143 ZOR_FLAG_ZAP)
144
145 #define ZDB_FLAG_CHECKSUM 0x0001
146 #define ZDB_FLAG_DECOMPRESS 0x0002
147 #define ZDB_FLAG_BSWAP 0x0004
148 #define ZDB_FLAG_GBH 0x0008
149 #define ZDB_FLAG_INDIRECT 0x0010
150 #define ZDB_FLAG_RAW 0x0020
151 #define ZDB_FLAG_PRINT_BLKPTR 0x0040
152 #define ZDB_FLAG_VERBOSE 0x0080
153
154 uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
155 static int leaked_objects = 0;
156 static range_tree_t *mos_refd_objs;
157
158 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
159 boolean_t);
160 static void mos_obj_refd(uint64_t);
161 static void mos_obj_refd_multiple(uint64_t);
162 static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
163 dmu_tx_t *tx);
164
165 typedef struct sublivelist_verify {
166 /* all ALLOC'd blkptr_t in one sub-livelist */
167 zfs_btree_t sv_all_allocs;
168
169 /* all FREE'd blkptr_t in one sub-livelist */
170 zfs_btree_t sv_all_frees;
171
172 /* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
173 zfs_btree_t sv_pair;
174
175 /* ALLOC's without a matching FREE, accumulates across sub-livelists */
176 zfs_btree_t sv_leftover;
177 } sublivelist_verify_t;
178
179 static int
180 livelist_compare(const void *larg, const void *rarg)
181 {
182 const blkptr_t *l = larg;
183 const blkptr_t *r = rarg;
184
185 /* Sort them according to dva[0] */
186 uint64_t l_dva0_vdev, r_dva0_vdev;
187 l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
188 r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
189 if (l_dva0_vdev < r_dva0_vdev)
190 return (-1);
191 else if (l_dva0_vdev > r_dva0_vdev)
192 return (+1);
193
194 /* if vdevs are equal, sort by offsets. */
195 uint64_t l_dva0_offset;
196 uint64_t r_dva0_offset;
197 l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
198 r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
199 if (l_dva0_offset < r_dva0_offset) {
200 return (-1);
201 } else if (l_dva0_offset > r_dva0_offset) {
202 return (+1);
203 }
204
205 /*
206 * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
207 * it's possible the offsets are equal. In that case, sort by txg
208 */
209 if (l->blk_birth < r->blk_birth) {
210 return (-1);
211 } else if (l->blk_birth > r->blk_birth) {
212 return (+1);
213 }
214 return (0);
215 }
216
217 typedef struct sublivelist_verify_block {
218 dva_t svb_dva;
219
220 /*
221 * We need this to check if the block marked as allocated
222 * in the livelist was freed (and potentially reallocated)
223 * in the metaslab spacemaps at a later TXG.
224 */
225 uint64_t svb_allocated_txg;
226 } sublivelist_verify_block_t;
227
228 static void zdb_print_blkptr(const blkptr_t *bp, int flags);
229
230 static int
231 sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
232 dmu_tx_t *tx)
233 {
234 ASSERT3P(tx, ==, NULL);
235 struct sublivelist_verify *sv = arg;
236 char blkbuf[BP_SPRINTF_LEN];
237 zfs_btree_index_t where;
238 if (free) {
239 zfs_btree_add(&sv->sv_pair, bp);
240 /* Check if the FREE is a duplicate */
241 if (zfs_btree_find(&sv->sv_all_frees, bp, &where) != NULL) {
242 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp,
243 free);
244 (void) printf("\tERROR: Duplicate FREE: %s\n", blkbuf);
245 } else {
246 zfs_btree_add_idx(&sv->sv_all_frees, bp, &where);
247 }
248 } else {
249 /* Check if the ALLOC has been freed */
250 if (zfs_btree_find(&sv->sv_pair, bp, &where) != NULL) {
251 zfs_btree_remove_idx(&sv->sv_pair, &where);
252 } else {
253 for (int i = 0; i < SPA_DVAS_PER_BP; i++) {
254 if (DVA_IS_EMPTY(&bp->blk_dva[i]))
255 break;
256 sublivelist_verify_block_t svb = {
257 .svb_dva = bp->blk_dva[i],
258 .svb_allocated_txg = bp->blk_birth
259 };
260
261 if (zfs_btree_find(&sv->sv_leftover, &svb,
262 &where) == NULL) {
263 zfs_btree_add_idx(&sv->sv_leftover,
264 &svb, &where);
265 }
266 }
267 }
268 /* Check if the ALLOC is a duplicate */
269 if (zfs_btree_find(&sv->sv_all_allocs, bp, &where) != NULL) {
270 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp,
271 free);
272 (void) printf("\tERROR: Duplicate ALLOC: %s\n", blkbuf);
273 } else {
274 zfs_btree_add_idx(&sv->sv_all_allocs, bp, &where);
275 }
276 }
277 return (0);
278 }
279
280 static int
281 sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
282 {
283 int err;
284 char blkbuf[BP_SPRINTF_LEN];
285 struct sublivelist_verify *sv = args;
286
287 zfs_btree_create(&sv->sv_all_allocs, livelist_compare,
288 sizeof (blkptr_t));
289
290 zfs_btree_create(&sv->sv_all_frees, livelist_compare,
291 sizeof (blkptr_t));
292
293 zfs_btree_create(&sv->sv_pair, livelist_compare,
294 sizeof (blkptr_t));
295
296 err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
297 sv, NULL);
298
299 zfs_btree_clear(&sv->sv_all_allocs);
300 zfs_btree_destroy(&sv->sv_all_allocs);
301
302 zfs_btree_clear(&sv->sv_all_frees);
303 zfs_btree_destroy(&sv->sv_all_frees);
304
305 blkptr_t *e;
306 zfs_btree_index_t *cookie = NULL;
307 while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) {
308 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), e, B_TRUE);
309 (void) printf("\tERROR: Unmatched FREE: %s\n", blkbuf);
310 }
311 zfs_btree_destroy(&sv->sv_pair);
312
313 return (err);
314 }
315
316 static int
317 livelist_block_compare(const void *larg, const void *rarg)
318 {
319 const sublivelist_verify_block_t *l = larg;
320 const sublivelist_verify_block_t *r = rarg;
321
322 if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva))
323 return (-1);
324 else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva))
325 return (+1);
326
327 if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva))
328 return (-1);
329 else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva))
330 return (+1);
331
332 if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva))
333 return (-1);
334 else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva))
335 return (+1);
336
337 return (0);
338 }
339
340 /*
341 * Check for errors in a livelist while tracking all unfreed ALLOCs in the
342 * sublivelist_verify_t: sv->sv_leftover
343 */
344 static void
345 livelist_verify(dsl_deadlist_t *dl, void *arg)
346 {
347 sublivelist_verify_t *sv = arg;
348 dsl_deadlist_iterate(dl, sublivelist_verify_func, sv);
349 }
350
351 /*
352 * Check for errors in the livelist entry and discard the intermediary
353 * data structures
354 */
355 /* ARGSUSED */
356 static int
357 sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
358 {
359 sublivelist_verify_t sv;
360 zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
361 sizeof (sublivelist_verify_block_t));
362 int err = sublivelist_verify_func(&sv, dle);
363 zfs_btree_clear(&sv.sv_leftover);
364 zfs_btree_destroy(&sv.sv_leftover);
365 return (err);
366 }
367
368 typedef struct metaslab_verify {
369 /*
370 * Tree containing all the leftover ALLOCs from the livelists
371 * that are part of this metaslab.
372 */
373 zfs_btree_t mv_livelist_allocs;
374
375 /*
376 * Metaslab information.
377 */
378 uint64_t mv_vdid;
379 uint64_t mv_msid;
380 uint64_t mv_start;
381 uint64_t mv_end;
382
383 /*
384 * What's currently allocated for this metaslab.
385 */
386 range_tree_t *mv_allocated;
387 } metaslab_verify_t;
388
389 typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg);
390
391 typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg,
392 void *arg);
393
394 typedef struct unflushed_iter_cb_arg {
395 spa_t *uic_spa;
396 uint64_t uic_txg;
397 void *uic_arg;
398 zdb_log_sm_cb_t uic_cb;
399 } unflushed_iter_cb_arg_t;
400
401 static int
402 iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg)
403 {
404 unflushed_iter_cb_arg_t *uic = arg;
405 return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg));
406 }
407
408 static void
409 iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg)
410 {
411 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
412 return;
413
414 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
415 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
416 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
417 space_map_t *sm = NULL;
418 VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
419 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
420
421 unflushed_iter_cb_arg_t uic = {
422 .uic_spa = spa,
423 .uic_txg = sls->sls_txg,
424 .uic_arg = arg,
425 .uic_cb = cb
426 };
427 VERIFY0(space_map_iterate(sm, space_map_length(sm),
428 iterate_through_spacemap_logs_cb, &uic));
429 space_map_close(sm);
430 }
431 spa_config_exit(spa, SCL_CONFIG, FTAG);
432 }
433
434 static void
435 verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
436 uint64_t offset, uint64_t size)
437 {
438 sublivelist_verify_block_t svb;
439 DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid);
440 DVA_SET_OFFSET(&svb.svb_dva, offset);
441 DVA_SET_ASIZE(&svb.svb_dva, size);
442 zfs_btree_index_t where;
443 uint64_t end_offset = offset + size;
444
445 /*
446 * Look for an exact match for spacemap entry in the livelist entries.
447 * Then, look for other livelist entries that fall within the range
448 * of the spacemap entry as it may have been condensed
449 */
450 sublivelist_verify_block_t *found =
451 zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where);
452 if (found == NULL) {
453 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where);
454 }
455 for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid &&
456 DVA_GET_OFFSET(&found->svb_dva) < end_offset;
457 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
458 if (found->svb_allocated_txg <= txg) {
459 (void) printf("ERROR: Livelist ALLOC [%llx:%llx] "
460 "from TXG %llx FREED at TXG %llx\n",
461 (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva),
462 (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
463 (u_longlong_t)found->svb_allocated_txg,
464 (u_longlong_t)txg);
465 }
466 }
467 }
468
469 static int
470 metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
471 {
472 metaslab_verify_t *mv = arg;
473 uint64_t offset = sme->sme_offset;
474 uint64_t size = sme->sme_run;
475 uint64_t txg = sme->sme_txg;
476
477 if (sme->sme_type == SM_ALLOC) {
478 if (range_tree_contains(mv->mv_allocated,
479 offset, size)) {
480 (void) printf("ERROR: DOUBLE ALLOC: "
481 "%llu [%llx:%llx] "
482 "%llu:%llu LOG_SM\n",
483 (u_longlong_t)txg, (u_longlong_t)offset,
484 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
485 (u_longlong_t)mv->mv_msid);
486 } else {
487 range_tree_add(mv->mv_allocated,
488 offset, size);
489 }
490 } else {
491 if (!range_tree_contains(mv->mv_allocated,
492 offset, size)) {
493 (void) printf("ERROR: DOUBLE FREE: "
494 "%llu [%llx:%llx] "
495 "%llu:%llu LOG_SM\n",
496 (u_longlong_t)txg, (u_longlong_t)offset,
497 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
498 (u_longlong_t)mv->mv_msid);
499 } else {
500 range_tree_remove(mv->mv_allocated,
501 offset, size);
502 }
503 }
504
505 if (sme->sme_type != SM_ALLOC) {
506 /*
507 * If something is freed in the spacemap, verify that
508 * it is not listed as allocated in the livelist.
509 */
510 verify_livelist_allocs(mv, txg, offset, size);
511 }
512 return (0);
513 }
514
515 static int
516 spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme,
517 uint64_t txg, void *arg)
518 {
519 metaslab_verify_t *mv = arg;
520 uint64_t offset = sme->sme_offset;
521 uint64_t vdev_id = sme->sme_vdev;
522
523 vdev_t *vd = vdev_lookup_top(spa, vdev_id);
524
525 /* skip indirect vdevs */
526 if (!vdev_is_concrete(vd))
527 return (0);
528
529 if (vdev_id != mv->mv_vdid)
530 return (0);
531
532 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
533 if (ms->ms_id != mv->mv_msid)
534 return (0);
535
536 if (txg < metaslab_unflushed_txg(ms))
537 return (0);
538
539
540 ASSERT3U(txg, ==, sme->sme_txg);
541 return (metaslab_spacemap_validation_cb(sme, mv));
542 }
543
544 static void
545 spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv)
546 {
547 iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv);
548 }
549
550 static void
551 spacemap_check_ms_sm(space_map_t *sm, metaslab_verify_t *mv)
552 {
553 if (sm == NULL)
554 return;
555
556 VERIFY0(space_map_iterate(sm, space_map_length(sm),
557 metaslab_spacemap_validation_cb, mv));
558 }
559
560 static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg);
561
562 /*
563 * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if
564 * they are part of that metaslab (mv_msid).
565 */
566 static void
567 mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
568 {
569 zfs_btree_index_t where;
570 sublivelist_verify_block_t *svb;
571 ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0);
572 for (svb = zfs_btree_first(&sv->sv_leftover, &where);
573 svb != NULL;
574 svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) {
575 if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid)
576 continue;
577
578 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start &&
579 (DVA_GET_OFFSET(&svb->svb_dva) +
580 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) {
581 (void) printf("ERROR: Found block that crosses "
582 "metaslab boundary: <%llu:%llx:%llx>\n",
583 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
584 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
585 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
586 continue;
587 }
588
589 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start)
590 continue;
591
592 if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end)
593 continue;
594
595 if ((DVA_GET_OFFSET(&svb->svb_dva) +
596 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) {
597 (void) printf("ERROR: Found block that crosses "
598 "metaslab boundary: <%llu:%llx:%llx>\n",
599 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
600 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
601 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
602 continue;
603 }
604
605 zfs_btree_add(&mv->mv_livelist_allocs, svb);
606 }
607
608 for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where);
609 svb != NULL;
610 svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
611 zfs_btree_remove(&sv->sv_leftover, svb);
612 }
613 }
614
615 /*
616 * [Livelist Check]
617 * Iterate through all the sublivelists and:
618 * - report leftover frees
619 * - report double ALLOCs/FREEs
620 * - record leftover ALLOCs together with their TXG [see Cross Check]
621 *
622 * [Spacemap Check]
623 * for each metaslab:
624 * - iterate over spacemap and then the metaslab's entries in the
625 * spacemap log, then report any double FREEs and ALLOCs (do not
626 * blow up).
627 *
628 * [Cross Check]
629 * After finishing the Livelist Check phase and while being in the
630 * Spacemap Check phase, we find all the recorded leftover ALLOCs
631 * of the livelist check that are part of the metaslab that we are
632 * currently looking at in the Spacemap Check. We report any entries
633 * that are marked as ALLOCs in the livelists but have been actually
634 * freed (and potentially allocated again) after their TXG stamp in
635 * the spacemaps. Also report any ALLOCs from the livelists that
636 * belong to indirect vdevs (e.g. their vdev completed removal).
637 *
638 * Note that this will miss Log Spacemap entries that cancelled each other
639 * out before being flushed to the metaslab, so we are not guaranteed
640 * to match all erroneous ALLOCs.
641 */
642 static void
643 livelist_metaslab_validate(spa_t *spa)
644 {
645 (void) printf("Verifying deleted livelist entries\n");
646
647 sublivelist_verify_t sv;
648 zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
649 sizeof (sublivelist_verify_block_t));
650 iterate_deleted_livelists(spa, livelist_verify, &sv);
651
652 (void) printf("Verifying metaslab entries\n");
653 vdev_t *rvd = spa->spa_root_vdev;
654 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
655 vdev_t *vd = rvd->vdev_child[c];
656
657 if (!vdev_is_concrete(vd))
658 continue;
659
660 for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) {
661 metaslab_t *m = vd->vdev_ms[mid];
662
663 (void) fprintf(stderr,
664 "\rverifying concrete vdev %llu, "
665 "metaslab %llu of %llu ...",
666 (longlong_t)vd->vdev_id,
667 (longlong_t)mid,
668 (longlong_t)vd->vdev_ms_count);
669
670 uint64_t shift, start;
671 range_seg_type_t type =
672 metaslab_calculate_range_tree_type(vd, m,
673 &start, &shift);
674 metaslab_verify_t mv;
675 mv.mv_allocated = range_tree_create(NULL,
676 type, NULL, start, shift);
677 mv.mv_vdid = vd->vdev_id;
678 mv.mv_msid = m->ms_id;
679 mv.mv_start = m->ms_start;
680 mv.mv_end = m->ms_start + m->ms_size;
681 zfs_btree_create(&mv.mv_livelist_allocs,
682 livelist_block_compare,
683 sizeof (sublivelist_verify_block_t));
684
685 mv_populate_livelist_allocs(&mv, &sv);
686
687 spacemap_check_ms_sm(m->ms_sm, &mv);
688 spacemap_check_sm_log(spa, &mv);
689
690 range_tree_vacate(mv.mv_allocated, NULL, NULL);
691 range_tree_destroy(mv.mv_allocated);
692 zfs_btree_clear(&mv.mv_livelist_allocs);
693 zfs_btree_destroy(&mv.mv_livelist_allocs);
694 }
695 }
696 (void) fprintf(stderr, "\n");
697
698 /*
699 * If there are any segments in the leftover tree after we walked
700 * through all the metaslabs in the concrete vdevs then this means
701 * that we have segments in the livelists that belong to indirect
702 * vdevs and are marked as allocated.
703 */
704 if (zfs_btree_numnodes(&sv.sv_leftover) == 0) {
705 zfs_btree_destroy(&sv.sv_leftover);
706 return;
707 }
708 (void) printf("ERROR: Found livelist blocks marked as allocated "
709 "for indirect vdevs:\n");
710
711 zfs_btree_index_t *where = NULL;
712 sublivelist_verify_block_t *svb;
713 while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) !=
714 NULL) {
715 int vdev_id = DVA_GET_VDEV(&svb->svb_dva);
716 ASSERT3U(vdev_id, <, rvd->vdev_children);
717 vdev_t *vd = rvd->vdev_child[vdev_id];
718 ASSERT(!vdev_is_concrete(vd));
719 (void) printf("<%d:%llx:%llx> TXG %llx\n",
720 vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
721 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva),
722 (u_longlong_t)svb->svb_allocated_txg);
723 }
724 (void) printf("\n");
725 zfs_btree_destroy(&sv.sv_leftover);
726 }
727
728 /*
729 * These libumem hooks provide a reasonable set of defaults for the allocator's
730 * debugging facilities.
731 */
732 const char *
733 _umem_debug_init(void)
734 {
735 return ("default,verbose"); /* $UMEM_DEBUG setting */
736 }
737
738 const char *
739 _umem_logging_init(void)
740 {
741 return ("fail,contents"); /* $UMEM_LOGGING setting */
742 }
743
744 static void
745 usage(void)
746 {
747 (void) fprintf(stderr,
748 "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] "
749 "[-I <inflight I/Os>]\n"
750 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
751 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
752 "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
753 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n"
754 "\t%s [-v] <bookmark>\n"
755 "\t%s -C [-A] [-U <cache>]\n"
756 "\t%s -l [-Aqu] <device>\n"
757 "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
758 "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
759 "\t%s -O <dataset> <path>\n"
760 "\t%s -r <dataset> <path> <destination>\n"
761 "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
762 "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
763 "\t%s -E [-A] word0:word1:...:word15\n"
764 "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
765 "<poolname>\n\n",
766 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
767 cmdname, cmdname, cmdname, cmdname);
768
769 (void) fprintf(stderr, " Dataset name must include at least one "
770 "separator character '/' or '@'\n");
771 (void) fprintf(stderr, " If dataset name is specified, only that "
772 "dataset is dumped\n");
773 (void) fprintf(stderr, " If object numbers or object number "
774 "ranges are specified, only those\n"
775 " objects or ranges are dumped.\n\n");
776 (void) fprintf(stderr,
777 " Object ranges take the form <start>:<end>[:<flags>]\n"
778 " start Starting object number\n"
779 " end Ending object number, or -1 for no upper bound\n"
780 " flags Optional flags to select object types:\n"
781 " A All objects (this is the default)\n"
782 " d ZFS directories\n"
783 " f ZFS files \n"
784 " m SPA space maps\n"
785 " z ZAPs\n"
786 " - Negate effect of next flag\n\n");
787 (void) fprintf(stderr, " Options to control amount of output:\n");
788 (void) fprintf(stderr, " -b block statistics\n");
789 (void) fprintf(stderr, " -c checksum all metadata (twice for "
790 "all data) blocks\n");
791 (void) fprintf(stderr, " -C config (or cachefile if alone)\n");
792 (void) fprintf(stderr, " -d dataset(s)\n");
793 (void) fprintf(stderr, " -D dedup statistics\n");
794 (void) fprintf(stderr, " -E decode and display block from an "
795 "embedded block pointer\n");
796 (void) fprintf(stderr, " -h pool history\n");
797 (void) fprintf(stderr, " -i intent logs\n");
798 (void) fprintf(stderr, " -l read label contents\n");
799 (void) fprintf(stderr, " -k examine the checkpointed state "
800 "of the pool\n");
801 (void) fprintf(stderr, " -L disable leak tracking (do not "
802 "load spacemaps)\n");
803 (void) fprintf(stderr, " -m metaslabs\n");
804 (void) fprintf(stderr, " -M metaslab groups\n");
805 (void) fprintf(stderr, " -O perform object lookups by path\n");
806 (void) fprintf(stderr, " -r copy an object by path to file\n");
807 (void) fprintf(stderr, " -R read and display block from a "
808 "device\n");
809 (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
810 (void) fprintf(stderr, " -S simulate dedup to measure effect\n");
811 (void) fprintf(stderr, " -v verbose (applies to all "
812 "others)\n");
813 (void) fprintf(stderr, " -y perform livelist and metaslab "
814 "validation on any livelists being deleted\n\n");
815 (void) fprintf(stderr, " Below options are intended for use "
816 "with other options:\n");
817 (void) fprintf(stderr, " -A ignore assertions (-A), enable "
818 "panic recovery (-AA) or both (-AAA)\n");
819 (void) fprintf(stderr, " -e pool is exported/destroyed/"
820 "has altroot/not in a cachefile\n");
821 (void) fprintf(stderr, " -F attempt automatic rewind within "
822 "safe range of transaction groups\n");
823 (void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before "
824 "exiting\n");
825 (void) fprintf(stderr, " -I <number of inflight I/Os> -- "
826 "specify the maximum number of\n "
827 "checksumming I/Os [default is 200]\n");
828 (void) fprintf(stderr, " -o <variable>=<value> set global "
829 "variable to an unsigned 32-bit integer\n");
830 (void) fprintf(stderr, " -p <path> -- use one or more with "
831 "-e to specify path to vdev dir\n");
832 (void) fprintf(stderr, " -P print numbers in parseable form\n");
833 (void) fprintf(stderr, " -q don't print label contents\n");
834 (void) fprintf(stderr, " -t <txg> -- highest txg to use when "
835 "searching for uberblocks\n");
836 (void) fprintf(stderr, " -u uberblock\n");
837 (void) fprintf(stderr, " -U <cachefile_path> -- use alternate "
838 "cachefile\n");
839 (void) fprintf(stderr, " -V do verbatim import\n");
840 (void) fprintf(stderr, " -x <dumpdir> -- "
841 "dump all read blocks into specified directory\n");
842 (void) fprintf(stderr, " -X attempt extreme rewind (does not "
843 "work with dataset)\n");
844 (void) fprintf(stderr, " -Y attempt all reconstruction "
845 "combinations for split blocks\n");
846 (void) fprintf(stderr, " -Z show ZSTD headers \n");
847 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
848 "to make only that option verbose\n");
849 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
850 exit(1);
851 }
852
853 static void
854 dump_debug_buffer(void)
855 {
856 if (dump_opt['G']) {
857 (void) printf("\n");
858 (void) fflush(stdout);
859 zfs_dbgmsg_print("zdb");
860 }
861 }
862
863 /*
864 * Called for usage errors that are discovered after a call to spa_open(),
865 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
866 */
867
868 static void
869 fatal(const char *fmt, ...)
870 {
871 va_list ap;
872
873 va_start(ap, fmt);
874 (void) fprintf(stderr, "%s: ", cmdname);
875 (void) vfprintf(stderr, fmt, ap);
876 va_end(ap);
877 (void) fprintf(stderr, "\n");
878
879 dump_debug_buffer();
880
881 exit(1);
882 }
883
884 /* ARGSUSED */
885 static void
886 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
887 {
888 nvlist_t *nv;
889 size_t nvsize = *(uint64_t *)data;
890 char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
891
892 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
893
894 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
895
896 umem_free(packed, nvsize);
897
898 dump_nvlist(nv, 8);
899
900 nvlist_free(nv);
901 }
902
903 /* ARGSUSED */
904 static void
905 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
906 {
907 spa_history_phys_t *shp = data;
908
909 if (shp == NULL)
910 return;
911
912 (void) printf("\t\tpool_create_len = %llu\n",
913 (u_longlong_t)shp->sh_pool_create_len);
914 (void) printf("\t\tphys_max_off = %llu\n",
915 (u_longlong_t)shp->sh_phys_max_off);
916 (void) printf("\t\tbof = %llu\n",
917 (u_longlong_t)shp->sh_bof);
918 (void) printf("\t\teof = %llu\n",
919 (u_longlong_t)shp->sh_eof);
920 (void) printf("\t\trecords_lost = %llu\n",
921 (u_longlong_t)shp->sh_records_lost);
922 }
923
924 static void
925 zdb_nicenum(uint64_t num, char *buf, size_t buflen)
926 {
927 if (dump_opt['P'])
928 (void) snprintf(buf, buflen, "%llu", (longlong_t)num);
929 else
930 nicenum(num, buf, sizeof (buf));
931 }
932
933 static const char histo_stars[] = "****************************************";
934 static const uint64_t histo_width = sizeof (histo_stars) - 1;
935
936 static void
937 dump_histogram(const uint64_t *histo, int size, int offset)
938 {
939 int i;
940 int minidx = size - 1;
941 int maxidx = 0;
942 uint64_t max = 0;
943
944 for (i = 0; i < size; i++) {
945 if (histo[i] > max)
946 max = histo[i];
947 if (histo[i] > 0 && i > maxidx)
948 maxidx = i;
949 if (histo[i] > 0 && i < minidx)
950 minidx = i;
951 }
952
953 if (max < histo_width)
954 max = histo_width;
955
956 for (i = minidx; i <= maxidx; i++) {
957 (void) printf("\t\t\t%3u: %6llu %s\n",
958 i + offset, (u_longlong_t)histo[i],
959 &histo_stars[(max - histo[i]) * histo_width / max]);
960 }
961 }
962
963 static void
964 dump_zap_stats(objset_t *os, uint64_t object)
965 {
966 int error;
967 zap_stats_t zs;
968
969 error = zap_get_stats(os, object, &zs);
970 if (error)
971 return;
972
973 if (zs.zs_ptrtbl_len == 0) {
974 ASSERT(zs.zs_num_blocks == 1);
975 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
976 (u_longlong_t)zs.zs_blocksize,
977 (u_longlong_t)zs.zs_num_entries);
978 return;
979 }
980
981 (void) printf("\tFat ZAP stats:\n");
982
983 (void) printf("\t\tPointer table:\n");
984 (void) printf("\t\t\t%llu elements\n",
985 (u_longlong_t)zs.zs_ptrtbl_len);
986 (void) printf("\t\t\tzt_blk: %llu\n",
987 (u_longlong_t)zs.zs_ptrtbl_zt_blk);
988 (void) printf("\t\t\tzt_numblks: %llu\n",
989 (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
990 (void) printf("\t\t\tzt_shift: %llu\n",
991 (u_longlong_t)zs.zs_ptrtbl_zt_shift);
992 (void) printf("\t\t\tzt_blks_copied: %llu\n",
993 (u_longlong_t)zs.zs_ptrtbl_blks_copied);
994 (void) printf("\t\t\tzt_nextblk: %llu\n",
995 (u_longlong_t)zs.zs_ptrtbl_nextblk);
996
997 (void) printf("\t\tZAP entries: %llu\n",
998 (u_longlong_t)zs.zs_num_entries);
999 (void) printf("\t\tLeaf blocks: %llu\n",
1000 (u_longlong_t)zs.zs_num_leafs);
1001 (void) printf("\t\tTotal blocks: %llu\n",
1002 (u_longlong_t)zs.zs_num_blocks);
1003 (void) printf("\t\tzap_block_type: 0x%llx\n",
1004 (u_longlong_t)zs.zs_block_type);
1005 (void) printf("\t\tzap_magic: 0x%llx\n",
1006 (u_longlong_t)zs.zs_magic);
1007 (void) printf("\t\tzap_salt: 0x%llx\n",
1008 (u_longlong_t)zs.zs_salt);
1009
1010 (void) printf("\t\tLeafs with 2^n pointers:\n");
1011 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
1012
1013 (void) printf("\t\tBlocks with n*5 entries:\n");
1014 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
1015
1016 (void) printf("\t\tBlocks n/10 full:\n");
1017 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
1018
1019 (void) printf("\t\tEntries with n chunks:\n");
1020 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
1021
1022 (void) printf("\t\tBuckets with n entries:\n");
1023 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
1024 }
1025
1026 /*ARGSUSED*/
1027 static void
1028 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
1029 {
1030 }
1031
1032 /*ARGSUSED*/
1033 static void
1034 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
1035 {
1036 (void) printf("\tUNKNOWN OBJECT TYPE\n");
1037 }
1038
1039 /*ARGSUSED*/
1040 static void
1041 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
1042 {
1043 }
1044
1045 /*ARGSUSED*/
1046 static void
1047 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
1048 {
1049 uint64_t *arr;
1050 uint64_t oursize;
1051 if (dump_opt['d'] < 6)
1052 return;
1053
1054 if (data == NULL) {
1055 dmu_object_info_t doi;
1056
1057 VERIFY0(dmu_object_info(os, object, &doi));
1058 size = doi.doi_max_offset;
1059 /*
1060 * We cap the size at 1 mebibyte here to prevent
1061 * allocation failures and nigh-infinite printing if the
1062 * object is extremely large.
1063 */
1064 oursize = MIN(size, 1 << 20);
1065 arr = kmem_alloc(oursize, KM_SLEEP);
1066
1067 int err = dmu_read(os, object, 0, oursize, arr, 0);
1068 if (err != 0) {
1069 (void) printf("got error %u from dmu_read\n", err);
1070 kmem_free(arr, oursize);
1071 return;
1072 }
1073 } else {
1074 /*
1075 * Even though the allocation is already done in this code path,
1076 * we still cap the size to prevent excessive printing.
1077 */
1078 oursize = MIN(size, 1 << 20);
1079 arr = data;
1080 }
1081
1082 if (size == 0) {
1083 (void) printf("\t\t[]\n");
1084 return;
1085 }
1086
1087 (void) printf("\t\t[%0llx", (u_longlong_t)arr[0]);
1088 for (size_t i = 1; i * sizeof (uint64_t) < oursize; i++) {
1089 if (i % 4 != 0)
1090 (void) printf(", %0llx", (u_longlong_t)arr[i]);
1091 else
1092 (void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]);
1093 }
1094 if (oursize != size)
1095 (void) printf(", ... ");
1096 (void) printf("]\n");
1097
1098 if (data == NULL)
1099 kmem_free(arr, oursize);
1100 }
1101
1102 /*ARGSUSED*/
1103 static void
1104 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
1105 {
1106 zap_cursor_t zc;
1107 zap_attribute_t attr;
1108 void *prop;
1109 unsigned i;
1110
1111 dump_zap_stats(os, object);
1112 (void) printf("\n");
1113
1114 for (zap_cursor_init(&zc, os, object);
1115 zap_cursor_retrieve(&zc, &attr) == 0;
1116 zap_cursor_advance(&zc)) {
1117 (void) printf("\t\t%s = ", attr.za_name);
1118 if (attr.za_num_integers == 0) {
1119 (void) printf("\n");
1120 continue;
1121 }
1122 prop = umem_zalloc(attr.za_num_integers *
1123 attr.za_integer_length, UMEM_NOFAIL);
1124 (void) zap_lookup(os, object, attr.za_name,
1125 attr.za_integer_length, attr.za_num_integers, prop);
1126 if (attr.za_integer_length == 1) {
1127 if (strcmp(attr.za_name,
1128 DSL_CRYPTO_KEY_MASTER_KEY) == 0 ||
1129 strcmp(attr.za_name,
1130 DSL_CRYPTO_KEY_HMAC_KEY) == 0 ||
1131 strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 ||
1132 strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 ||
1133 strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) {
1134 uint8_t *u8 = prop;
1135
1136 for (i = 0; i < attr.za_num_integers; i++) {
1137 (void) printf("%02x", u8[i]);
1138 }
1139 } else {
1140 (void) printf("%s", (char *)prop);
1141 }
1142 } else {
1143 for (i = 0; i < attr.za_num_integers; i++) {
1144 switch (attr.za_integer_length) {
1145 case 2:
1146 (void) printf("%u ",
1147 ((uint16_t *)prop)[i]);
1148 break;
1149 case 4:
1150 (void) printf("%u ",
1151 ((uint32_t *)prop)[i]);
1152 break;
1153 case 8:
1154 (void) printf("%lld ",
1155 (u_longlong_t)((int64_t *)prop)[i]);
1156 break;
1157 }
1158 }
1159 }
1160 (void) printf("\n");
1161 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
1162 }
1163 zap_cursor_fini(&zc);
1164 }
1165
1166 static void
1167 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
1168 {
1169 bpobj_phys_t *bpop = data;
1170 uint64_t i;
1171 char bytes[32], comp[32], uncomp[32];
1172
1173 /* make sure the output won't get truncated */
1174 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1175 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1176 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1177
1178 if (bpop == NULL)
1179 return;
1180
1181 zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
1182 zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
1183 zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
1184
1185 (void) printf("\t\tnum_blkptrs = %llu\n",
1186 (u_longlong_t)bpop->bpo_num_blkptrs);
1187 (void) printf("\t\tbytes = %s\n", bytes);
1188 if (size >= BPOBJ_SIZE_V1) {
1189 (void) printf("\t\tcomp = %s\n", comp);
1190 (void) printf("\t\tuncomp = %s\n", uncomp);
1191 }
1192 if (size >= BPOBJ_SIZE_V2) {
1193 (void) printf("\t\tsubobjs = %llu\n",
1194 (u_longlong_t)bpop->bpo_subobjs);
1195 (void) printf("\t\tnum_subobjs = %llu\n",
1196 (u_longlong_t)bpop->bpo_num_subobjs);
1197 }
1198 if (size >= sizeof (*bpop)) {
1199 (void) printf("\t\tnum_freed = %llu\n",
1200 (u_longlong_t)bpop->bpo_num_freed);
1201 }
1202
1203 if (dump_opt['d'] < 5)
1204 return;
1205
1206 for (i = 0; i < bpop->bpo_num_blkptrs; i++) {
1207 char blkbuf[BP_SPRINTF_LEN];
1208 blkptr_t bp;
1209
1210 int err = dmu_read(os, object,
1211 i * sizeof (bp), sizeof (bp), &bp, 0);
1212 if (err != 0) {
1213 (void) printf("got error %u from dmu_read\n", err);
1214 break;
1215 }
1216 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp,
1217 BP_GET_FREE(&bp));
1218 (void) printf("\t%s\n", blkbuf);
1219 }
1220 }
1221
1222 /* ARGSUSED */
1223 static void
1224 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
1225 {
1226 dmu_object_info_t doi;
1227 int64_t i;
1228
1229 VERIFY0(dmu_object_info(os, object, &doi));
1230 uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
1231
1232 int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
1233 if (err != 0) {
1234 (void) printf("got error %u from dmu_read\n", err);
1235 kmem_free(subobjs, doi.doi_max_offset);
1236 return;
1237 }
1238
1239 int64_t last_nonzero = -1;
1240 for (i = 0; i < doi.doi_max_offset / 8; i++) {
1241 if (subobjs[i] != 0)
1242 last_nonzero = i;
1243 }
1244
1245 for (i = 0; i <= last_nonzero; i++) {
1246 (void) printf("\t%llu\n", (u_longlong_t)subobjs[i]);
1247 }
1248 kmem_free(subobjs, doi.doi_max_offset);
1249 }
1250
1251 /*ARGSUSED*/
1252 static void
1253 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
1254 {
1255 dump_zap_stats(os, object);
1256 /* contents are printed elsewhere, properly decoded */
1257 }
1258
1259 /*ARGSUSED*/
1260 static void
1261 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
1262 {
1263 zap_cursor_t zc;
1264 zap_attribute_t attr;
1265
1266 dump_zap_stats(os, object);
1267 (void) printf("\n");
1268
1269 for (zap_cursor_init(&zc, os, object);
1270 zap_cursor_retrieve(&zc, &attr) == 0;
1271 zap_cursor_advance(&zc)) {
1272 (void) printf("\t\t%s = ", attr.za_name);
1273 if (attr.za_num_integers == 0) {
1274 (void) printf("\n");
1275 continue;
1276 }
1277 (void) printf(" %llx : [%d:%d:%d]\n",
1278 (u_longlong_t)attr.za_first_integer,
1279 (int)ATTR_LENGTH(attr.za_first_integer),
1280 (int)ATTR_BSWAP(attr.za_first_integer),
1281 (int)ATTR_NUM(attr.za_first_integer));
1282 }
1283 zap_cursor_fini(&zc);
1284 }
1285
1286 /*ARGSUSED*/
1287 static void
1288 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
1289 {
1290 zap_cursor_t zc;
1291 zap_attribute_t attr;
1292 uint16_t *layout_attrs;
1293 unsigned i;
1294
1295 dump_zap_stats(os, object);
1296 (void) printf("\n");
1297
1298 for (zap_cursor_init(&zc, os, object);
1299 zap_cursor_retrieve(&zc, &attr) == 0;
1300 zap_cursor_advance(&zc)) {
1301 (void) printf("\t\t%s = [", attr.za_name);
1302 if (attr.za_num_integers == 0) {
1303 (void) printf("\n");
1304 continue;
1305 }
1306
1307 VERIFY(attr.za_integer_length == 2);
1308 layout_attrs = umem_zalloc(attr.za_num_integers *
1309 attr.za_integer_length, UMEM_NOFAIL);
1310
1311 VERIFY(zap_lookup(os, object, attr.za_name,
1312 attr.za_integer_length,
1313 attr.za_num_integers, layout_attrs) == 0);
1314
1315 for (i = 0; i != attr.za_num_integers; i++)
1316 (void) printf(" %d ", (int)layout_attrs[i]);
1317 (void) printf("]\n");
1318 umem_free(layout_attrs,
1319 attr.za_num_integers * attr.za_integer_length);
1320 }
1321 zap_cursor_fini(&zc);
1322 }
1323
1324 /*ARGSUSED*/
1325 static void
1326 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
1327 {
1328 zap_cursor_t zc;
1329 zap_attribute_t attr;
1330 const char *typenames[] = {
1331 /* 0 */ "not specified",
1332 /* 1 */ "FIFO",
1333 /* 2 */ "Character Device",
1334 /* 3 */ "3 (invalid)",
1335 /* 4 */ "Directory",
1336 /* 5 */ "5 (invalid)",
1337 /* 6 */ "Block Device",
1338 /* 7 */ "7 (invalid)",
1339 /* 8 */ "Regular File",
1340 /* 9 */ "9 (invalid)",
1341 /* 10 */ "Symbolic Link",
1342 /* 11 */ "11 (invalid)",
1343 /* 12 */ "Socket",
1344 /* 13 */ "Door",
1345 /* 14 */ "Event Port",
1346 /* 15 */ "15 (invalid)",
1347 };
1348
1349 dump_zap_stats(os, object);
1350 (void) printf("\n");
1351
1352 for (zap_cursor_init(&zc, os, object);
1353 zap_cursor_retrieve(&zc, &attr) == 0;
1354 zap_cursor_advance(&zc)) {
1355 (void) printf("\t\t%s = %lld (type: %s)\n",
1356 attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
1357 typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
1358 }
1359 zap_cursor_fini(&zc);
1360 }
1361
1362 static int
1363 get_dtl_refcount(vdev_t *vd)
1364 {
1365 int refcount = 0;
1366
1367 if (vd->vdev_ops->vdev_op_leaf) {
1368 space_map_t *sm = vd->vdev_dtl_sm;
1369
1370 if (sm != NULL &&
1371 sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
1372 return (1);
1373 return (0);
1374 }
1375
1376 for (unsigned c = 0; c < vd->vdev_children; c++)
1377 refcount += get_dtl_refcount(vd->vdev_child[c]);
1378 return (refcount);
1379 }
1380
1381 static int
1382 get_metaslab_refcount(vdev_t *vd)
1383 {
1384 int refcount = 0;
1385
1386 if (vd->vdev_top == vd) {
1387 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
1388 space_map_t *sm = vd->vdev_ms[m]->ms_sm;
1389
1390 if (sm != NULL &&
1391 sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
1392 refcount++;
1393 }
1394 }
1395 for (unsigned c = 0; c < vd->vdev_children; c++)
1396 refcount += get_metaslab_refcount(vd->vdev_child[c]);
1397
1398 return (refcount);
1399 }
1400
1401 static int
1402 get_obsolete_refcount(vdev_t *vd)
1403 {
1404 uint64_t obsolete_sm_object;
1405 int refcount = 0;
1406
1407 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
1408 if (vd->vdev_top == vd && obsolete_sm_object != 0) {
1409 dmu_object_info_t doi;
1410 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
1411 obsolete_sm_object, &doi));
1412 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
1413 refcount++;
1414 }
1415 } else {
1416 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
1417 ASSERT3U(obsolete_sm_object, ==, 0);
1418 }
1419 for (unsigned c = 0; c < vd->vdev_children; c++) {
1420 refcount += get_obsolete_refcount(vd->vdev_child[c]);
1421 }
1422
1423 return (refcount);
1424 }
1425
1426 static int
1427 get_prev_obsolete_spacemap_refcount(spa_t *spa)
1428 {
1429 uint64_t prev_obj =
1430 spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object;
1431 if (prev_obj != 0) {
1432 dmu_object_info_t doi;
1433 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi));
1434 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
1435 return (1);
1436 }
1437 }
1438 return (0);
1439 }
1440
1441 static int
1442 get_checkpoint_refcount(vdev_t *vd)
1443 {
1444 int refcount = 0;
1445
1446 if (vd->vdev_top == vd && vd->vdev_top_zap != 0 &&
1447 zap_contains(spa_meta_objset(vd->vdev_spa),
1448 vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0)
1449 refcount++;
1450
1451 for (uint64_t c = 0; c < vd->vdev_children; c++)
1452 refcount += get_checkpoint_refcount(vd->vdev_child[c]);
1453
1454 return (refcount);
1455 }
1456
1457 static int
1458 get_log_spacemap_refcount(spa_t *spa)
1459 {
1460 return (avl_numnodes(&spa->spa_sm_logs_by_txg));
1461 }
1462
1463 static int
1464 verify_spacemap_refcounts(spa_t *spa)
1465 {
1466 uint64_t expected_refcount = 0;
1467 uint64_t actual_refcount;
1468
1469 (void) feature_get_refcount(spa,
1470 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
1471 &expected_refcount);
1472 actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
1473 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
1474 actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
1475 actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
1476 actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev);
1477 actual_refcount += get_log_spacemap_refcount(spa);
1478
1479 if (expected_refcount != actual_refcount) {
1480 (void) printf("space map refcount mismatch: expected %lld != "
1481 "actual %lld\n",
1482 (longlong_t)expected_refcount,
1483 (longlong_t)actual_refcount);
1484 return (2);
1485 }
1486 return (0);
1487 }
1488
1489 static void
1490 dump_spacemap(objset_t *os, space_map_t *sm)
1491 {
1492 const char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
1493 "INVALID", "INVALID", "INVALID", "INVALID" };
1494
1495 if (sm == NULL)
1496 return;
1497
1498 (void) printf("space map object %llu:\n",
1499 (longlong_t)sm->sm_object);
1500 (void) printf(" smp_length = 0x%llx\n",
1501 (longlong_t)sm->sm_phys->smp_length);
1502 (void) printf(" smp_alloc = 0x%llx\n",
1503 (longlong_t)sm->sm_phys->smp_alloc);
1504
1505 if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
1506 return;
1507
1508 /*
1509 * Print out the freelist entries in both encoded and decoded form.
1510 */
1511 uint8_t mapshift = sm->sm_shift;
1512 int64_t alloc = 0;
1513 uint64_t word, entry_id = 0;
1514 for (uint64_t offset = 0; offset < space_map_length(sm);
1515 offset += sizeof (word)) {
1516
1517 VERIFY0(dmu_read(os, space_map_object(sm), offset,
1518 sizeof (word), &word, DMU_READ_PREFETCH));
1519
1520 if (sm_entry_is_debug(word)) {
1521 uint64_t de_txg = SM_DEBUG_TXG_DECODE(word);
1522 uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word);
1523 if (de_txg == 0) {
1524 (void) printf(
1525 "\t [%6llu] PADDING\n",
1526 (u_longlong_t)entry_id);
1527 } else {
1528 (void) printf(
1529 "\t [%6llu] %s: txg %llu pass %llu\n",
1530 (u_longlong_t)entry_id,
1531 ddata[SM_DEBUG_ACTION_DECODE(word)],
1532 (u_longlong_t)de_txg,
1533 (u_longlong_t)de_sync_pass);
1534 }
1535 entry_id++;
1536 continue;
1537 }
1538
1539 uint8_t words;
1540 char entry_type;
1541 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
1542
1543 if (sm_entry_is_single_word(word)) {
1544 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
1545 'A' : 'F';
1546 entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
1547 sm->sm_start;
1548 entry_run = SM_RUN_DECODE(word) << mapshift;
1549 words = 1;
1550 } else {
1551 /* it is a two-word entry so we read another word */
1552 ASSERT(sm_entry_is_double_word(word));
1553
1554 uint64_t extra_word;
1555 offset += sizeof (extra_word);
1556 VERIFY0(dmu_read(os, space_map_object(sm), offset,
1557 sizeof (extra_word), &extra_word,
1558 DMU_READ_PREFETCH));
1559
1560 ASSERT3U(offset, <=, space_map_length(sm));
1561
1562 entry_run = SM2_RUN_DECODE(word) << mapshift;
1563 entry_vdev = SM2_VDEV_DECODE(word);
1564 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
1565 'A' : 'F';
1566 entry_off = (SM2_OFFSET_DECODE(extra_word) <<
1567 mapshift) + sm->sm_start;
1568 words = 2;
1569 }
1570
1571 (void) printf("\t [%6llu] %c range:"
1572 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
1573 (u_longlong_t)entry_id,
1574 entry_type, (u_longlong_t)entry_off,
1575 (u_longlong_t)(entry_off + entry_run),
1576 (u_longlong_t)entry_run,
1577 (u_longlong_t)entry_vdev, words);
1578
1579 if (entry_type == 'A')
1580 alloc += entry_run;
1581 else
1582 alloc -= entry_run;
1583 entry_id++;
1584 }
1585 if (alloc != space_map_allocated(sm)) {
1586 (void) printf("space_map_object alloc (%lld) INCONSISTENT "
1587 "with space map summary (%lld)\n",
1588 (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
1589 }
1590 }
1591
1592 static void
1593 dump_metaslab_stats(metaslab_t *msp)
1594 {
1595 char maxbuf[32];
1596 range_tree_t *rt = msp->ms_allocatable;
1597 zfs_btree_t *t = &msp->ms_allocatable_by_size;
1598 int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
1599
1600 /* max sure nicenum has enough space */
1601 CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
1602
1603 zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf));
1604
1605 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
1606 "segments", zfs_btree_numnodes(t), "maxsize", maxbuf,
1607 "freepct", free_pct);
1608 (void) printf("\tIn-memory histogram:\n");
1609 dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1610 }
1611
1612 static void
1613 dump_metaslab(metaslab_t *msp)
1614 {
1615 vdev_t *vd = msp->ms_group->mg_vd;
1616 spa_t *spa = vd->vdev_spa;
1617 space_map_t *sm = msp->ms_sm;
1618 char freebuf[32];
1619
1620 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
1621 sizeof (freebuf));
1622
1623 (void) printf(
1624 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
1625 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
1626 (u_longlong_t)space_map_object(sm), freebuf);
1627
1628 if (dump_opt['m'] > 2 && !dump_opt['L']) {
1629 mutex_enter(&msp->ms_lock);
1630 VERIFY0(metaslab_load(msp));
1631 range_tree_stat_verify(msp->ms_allocatable);
1632 dump_metaslab_stats(msp);
1633 metaslab_unload(msp);
1634 mutex_exit(&msp->ms_lock);
1635 }
1636
1637 if (dump_opt['m'] > 1 && sm != NULL &&
1638 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
1639 /*
1640 * The space map histogram represents free space in chunks
1641 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
1642 */
1643 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
1644 (u_longlong_t)msp->ms_fragmentation);
1645 dump_histogram(sm->sm_phys->smp_histogram,
1646 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
1647 }
1648
1649 if (vd->vdev_ops == &vdev_draid_ops)
1650 ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
1651 else
1652 ASSERT3U(msp->ms_size, ==, 1ULL << vd->vdev_ms_shift);
1653
1654 dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
1655
1656 if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
1657 (void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n",
1658 (u_longlong_t)metaslab_unflushed_txg(msp));
1659 }
1660 }
1661
1662 static void
1663 print_vdev_metaslab_header(vdev_t *vd)
1664 {
1665 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
1666 const char *bias_str = "";
1667 if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) {
1668 bias_str = VDEV_ALLOC_BIAS_LOG;
1669 } else if (alloc_bias == VDEV_BIAS_SPECIAL) {
1670 bias_str = VDEV_ALLOC_BIAS_SPECIAL;
1671 } else if (alloc_bias == VDEV_BIAS_DEDUP) {
1672 bias_str = VDEV_ALLOC_BIAS_DEDUP;
1673 }
1674
1675 uint64_t ms_flush_data_obj = 0;
1676 if (vd->vdev_top_zap != 0) {
1677 int error = zap_lookup(spa_meta_objset(vd->vdev_spa),
1678 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
1679 sizeof (uint64_t), 1, &ms_flush_data_obj);
1680 if (error != ENOENT) {
1681 ASSERT0(error);
1682 }
1683 }
1684
1685 (void) printf("\tvdev %10llu %s",
1686 (u_longlong_t)vd->vdev_id, bias_str);
1687
1688 if (ms_flush_data_obj != 0) {
1689 (void) printf(" ms_unflushed_phys object %llu",
1690 (u_longlong_t)ms_flush_data_obj);
1691 }
1692
1693 (void) printf("\n\t%-10s%5llu %-19s %-15s %-12s\n",
1694 "metaslabs", (u_longlong_t)vd->vdev_ms_count,
1695 "offset", "spacemap", "free");
1696 (void) printf("\t%15s %19s %15s %12s\n",
1697 "---------------", "-------------------",
1698 "---------------", "------------");
1699 }
1700
1701 static void
1702 dump_metaslab_groups(spa_t *spa)
1703 {
1704 vdev_t *rvd = spa->spa_root_vdev;
1705 metaslab_class_t *mc = spa_normal_class(spa);
1706 uint64_t fragmentation;
1707
1708 metaslab_class_histogram_verify(mc);
1709
1710 for (unsigned c = 0; c < rvd->vdev_children; c++) {
1711 vdev_t *tvd = rvd->vdev_child[c];
1712 metaslab_group_t *mg = tvd->vdev_mg;
1713
1714 if (mg == NULL || mg->mg_class != mc)
1715 continue;
1716
1717 metaslab_group_histogram_verify(mg);
1718 mg->mg_fragmentation = metaslab_group_fragmentation(mg);
1719
1720 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
1721 "fragmentation",
1722 (u_longlong_t)tvd->vdev_id,
1723 (u_longlong_t)tvd->vdev_ms_count);
1724 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
1725 (void) printf("%3s\n", "-");
1726 } else {
1727 (void) printf("%3llu%%\n",
1728 (u_longlong_t)mg->mg_fragmentation);
1729 }
1730 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1731 }
1732
1733 (void) printf("\tpool %s\tfragmentation", spa_name(spa));
1734 fragmentation = metaslab_class_fragmentation(mc);
1735 if (fragmentation == ZFS_FRAG_INVALID)
1736 (void) printf("\t%3s\n", "-");
1737 else
1738 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
1739 dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1740 }
1741
1742 static void
1743 print_vdev_indirect(vdev_t *vd)
1744 {
1745 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
1746 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
1747 vdev_indirect_births_t *vib = vd->vdev_indirect_births;
1748
1749 if (vim == NULL) {
1750 ASSERT3P(vib, ==, NULL);
1751 return;
1752 }
1753
1754 ASSERT3U(vdev_indirect_mapping_object(vim), ==,
1755 vic->vic_mapping_object);
1756 ASSERT3U(vdev_indirect_births_object(vib), ==,
1757 vic->vic_births_object);
1758
1759 (void) printf("indirect births obj %llu:\n",
1760 (longlong_t)vic->vic_births_object);
1761 (void) printf(" vib_count = %llu\n",
1762 (longlong_t)vdev_indirect_births_count(vib));
1763 for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) {
1764 vdev_indirect_birth_entry_phys_t *cur_vibe =
1765 &vib->vib_entries[i];
1766 (void) printf("\toffset %llx -> txg %llu\n",
1767 (longlong_t)cur_vibe->vibe_offset,
1768 (longlong_t)cur_vibe->vibe_phys_birth_txg);
1769 }
1770 (void) printf("\n");
1771
1772 (void) printf("indirect mapping obj %llu:\n",
1773 (longlong_t)vic->vic_mapping_object);
1774 (void) printf(" vim_max_offset = 0x%llx\n",
1775 (longlong_t)vdev_indirect_mapping_max_offset(vim));
1776 (void) printf(" vim_bytes_mapped = 0x%llx\n",
1777 (longlong_t)vdev_indirect_mapping_bytes_mapped(vim));
1778 (void) printf(" vim_count = %llu\n",
1779 (longlong_t)vdev_indirect_mapping_num_entries(vim));
1780
1781 if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3)
1782 return;
1783
1784 uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim);
1785
1786 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
1787 vdev_indirect_mapping_entry_phys_t *vimep =
1788 &vim->vim_entries[i];
1789 (void) printf("\t<%llx:%llx:%llx> -> "
1790 "<%llx:%llx:%llx> (%x obsolete)\n",
1791 (longlong_t)vd->vdev_id,
1792 (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
1793 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1794 (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst),
1795 (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst),
1796 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1797 counts[i]);
1798 }
1799 (void) printf("\n");
1800
1801 uint64_t obsolete_sm_object;
1802 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
1803 if (obsolete_sm_object != 0) {
1804 objset_t *mos = vd->vdev_spa->spa_meta_objset;
1805 (void) printf("obsolete space map object %llu:\n",
1806 (u_longlong_t)obsolete_sm_object);
1807 ASSERT(vd->vdev_obsolete_sm != NULL);
1808 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==,
1809 obsolete_sm_object);
1810 dump_spacemap(mos, vd->vdev_obsolete_sm);
1811 (void) printf("\n");
1812 }
1813 }
1814
1815 static void
1816 dump_metaslabs(spa_t *spa)
1817 {
1818 vdev_t *vd, *rvd = spa->spa_root_vdev;
1819 uint64_t m, c = 0, children = rvd->vdev_children;
1820
1821 (void) printf("\nMetaslabs:\n");
1822
1823 if (!dump_opt['d'] && zopt_metaslab_args > 0) {
1824 c = zopt_metaslab[0];
1825
1826 if (c >= children)
1827 (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
1828
1829 if (zopt_metaslab_args > 1) {
1830 vd = rvd->vdev_child[c];
1831 print_vdev_metaslab_header(vd);
1832
1833 for (m = 1; m < zopt_metaslab_args; m++) {
1834 if (zopt_metaslab[m] < vd->vdev_ms_count)
1835 dump_metaslab(
1836 vd->vdev_ms[zopt_metaslab[m]]);
1837 else
1838 (void) fprintf(stderr, "bad metaslab "
1839 "number %llu\n",
1840 (u_longlong_t)zopt_metaslab[m]);
1841 }
1842 (void) printf("\n");
1843 return;
1844 }
1845 children = c + 1;
1846 }
1847 for (; c < children; c++) {
1848 vd = rvd->vdev_child[c];
1849 print_vdev_metaslab_header(vd);
1850
1851 print_vdev_indirect(vd);
1852
1853 for (m = 0; m < vd->vdev_ms_count; m++)
1854 dump_metaslab(vd->vdev_ms[m]);
1855 (void) printf("\n");
1856 }
1857 }
1858
1859 static void
1860 dump_log_spacemaps(spa_t *spa)
1861 {
1862 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
1863 return;
1864
1865 (void) printf("\nLog Space Maps in Pool:\n");
1866 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
1867 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
1868 space_map_t *sm = NULL;
1869 VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
1870 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
1871
1872 (void) printf("Log Spacemap object %llu txg %llu\n",
1873 (u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg);
1874 dump_spacemap(spa->spa_meta_objset, sm);
1875 space_map_close(sm);
1876 }
1877 (void) printf("\n");
1878 }
1879
1880 static void
1881 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
1882 {
1883 const ddt_phys_t *ddp = dde->dde_phys;
1884 const ddt_key_t *ddk = &dde->dde_key;
1885 const char *types[4] = { "ditto", "single", "double", "triple" };
1886 char blkbuf[BP_SPRINTF_LEN];
1887 blkptr_t blk;
1888 int p;
1889
1890 for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1891 if (ddp->ddp_phys_birth == 0)
1892 continue;
1893 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
1894 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
1895 (void) printf("index %llx refcnt %llu %s %s\n",
1896 (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
1897 types[p], blkbuf);
1898 }
1899 }
1900
1901 static void
1902 dump_dedup_ratio(const ddt_stat_t *dds)
1903 {
1904 double rL, rP, rD, D, dedup, compress, copies;
1905
1906 if (dds->dds_blocks == 0)
1907 return;
1908
1909 rL = (double)dds->dds_ref_lsize;
1910 rP = (double)dds->dds_ref_psize;
1911 rD = (double)dds->dds_ref_dsize;
1912 D = (double)dds->dds_dsize;
1913
1914 dedup = rD / D;
1915 compress = rL / rP;
1916 copies = rD / rP;
1917
1918 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
1919 "dedup * compress / copies = %.2f\n\n",
1920 dedup, compress, copies, dedup * compress / copies);
1921 }
1922
1923 static void
1924 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
1925 {
1926 char name[DDT_NAMELEN];
1927 ddt_entry_t dde;
1928 uint64_t walk = 0;
1929 dmu_object_info_t doi;
1930 uint64_t count, dspace, mspace;
1931 int error;
1932
1933 error = ddt_object_info(ddt, type, class, &doi);
1934
1935 if (error == ENOENT)
1936 return;
1937 ASSERT(error == 0);
1938
1939 error = ddt_object_count(ddt, type, class, &count);
1940 ASSERT(error == 0);
1941 if (count == 0)
1942 return;
1943
1944 dspace = doi.doi_physical_blocks_512 << 9;
1945 mspace = doi.doi_fill_count * doi.doi_data_block_size;
1946
1947 ddt_object_name(ddt, type, class, name);
1948
1949 (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
1950 name,
1951 (u_longlong_t)count,
1952 (u_longlong_t)(dspace / count),
1953 (u_longlong_t)(mspace / count));
1954
1955 if (dump_opt['D'] < 3)
1956 return;
1957
1958 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
1959
1960 if (dump_opt['D'] < 4)
1961 return;
1962
1963 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
1964 return;
1965
1966 (void) printf("%s contents:\n\n", name);
1967
1968 while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
1969 dump_dde(ddt, &dde, walk);
1970
1971 ASSERT3U(error, ==, ENOENT);
1972
1973 (void) printf("\n");
1974 }
1975
1976 static void
1977 dump_all_ddts(spa_t *spa)
1978 {
1979 ddt_histogram_t ddh_total;
1980 ddt_stat_t dds_total;
1981
1982 bzero(&ddh_total, sizeof (ddh_total));
1983 bzero(&dds_total, sizeof (dds_total));
1984
1985 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
1986 ddt_t *ddt = spa->spa_ddt[c];
1987 for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
1988 for (enum ddt_class class = 0; class < DDT_CLASSES;
1989 class++) {
1990 dump_ddt(ddt, type, class);
1991 }
1992 }
1993 }
1994
1995 ddt_get_dedup_stats(spa, &dds_total);
1996
1997 if (dds_total.dds_blocks == 0) {
1998 (void) printf("All DDTs are empty\n");
1999 return;
2000 }
2001
2002 (void) printf("\n");
2003
2004 if (dump_opt['D'] > 1) {
2005 (void) printf("DDT histogram (aggregated over all DDTs):\n");
2006 ddt_get_dedup_histogram(spa, &ddh_total);
2007 zpool_dump_ddt(&dds_total, &ddh_total);
2008 }
2009
2010 dump_dedup_ratio(&dds_total);
2011 }
2012
2013 static void
2014 dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
2015 {
2016 char *prefix = arg;
2017
2018 (void) printf("%s [%llu,%llu) length %llu\n",
2019 prefix,
2020 (u_longlong_t)start,
2021 (u_longlong_t)(start + size),
2022 (u_longlong_t)(size));
2023 }
2024
2025 static void
2026 dump_dtl(vdev_t *vd, int indent)
2027 {
2028 spa_t *spa = vd->vdev_spa;
2029 boolean_t required;
2030 const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
2031 "outage" };
2032 char prefix[256];
2033
2034 spa_vdev_state_enter(spa, SCL_NONE);
2035 required = vdev_dtl_required(vd);
2036 (void) spa_vdev_state_exit(spa, NULL, 0);
2037
2038 if (indent == 0)
2039 (void) printf("\nDirty time logs:\n\n");
2040
2041 (void) printf("\t%*s%s [%s]\n", indent, "",
2042 vd->vdev_path ? vd->vdev_path :
2043 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
2044 required ? "DTL-required" : "DTL-expendable");
2045
2046 for (int t = 0; t < DTL_TYPES; t++) {
2047 range_tree_t *rt = vd->vdev_dtl[t];
2048 if (range_tree_space(rt) == 0)
2049 continue;
2050 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
2051 indent + 2, "", name[t]);
2052 range_tree_walk(rt, dump_dtl_seg, prefix);
2053 if (dump_opt['d'] > 5 && vd->vdev_children == 0)
2054 dump_spacemap(spa->spa_meta_objset,
2055 vd->vdev_dtl_sm);
2056 }
2057
2058 for (unsigned c = 0; c < vd->vdev_children; c++)
2059 dump_dtl(vd->vdev_child[c], indent + 4);
2060 }
2061
2062 static void
2063 dump_history(spa_t *spa)
2064 {
2065 nvlist_t **events = NULL;
2066 char *buf;
2067 uint64_t resid, len, off = 0;
2068 uint_t num = 0;
2069 int error;
2070 char tbuf[30];
2071
2072 if ((buf = malloc(SPA_OLD_MAXBLOCKSIZE)) == NULL) {
2073 (void) fprintf(stderr, "%s: unable to allocate I/O buffer\n",
2074 __func__);
2075 return;
2076 }
2077
2078 do {
2079 len = SPA_OLD_MAXBLOCKSIZE;
2080
2081 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
2082 (void) fprintf(stderr, "Unable to read history: "
2083 "error %d\n", error);
2084 free(buf);
2085 return;
2086 }
2087
2088 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
2089 break;
2090
2091 off -= resid;
2092 } while (len != 0);
2093
2094 (void) printf("\nHistory:\n");
2095 for (unsigned i = 0; i < num; i++) {
2096 boolean_t printed = B_FALSE;
2097
2098 if (nvlist_exists(events[i], ZPOOL_HIST_TIME)) {
2099 time_t tsec;
2100 struct tm t;
2101
2102 tsec = fnvlist_lookup_uint64(events[i],
2103 ZPOOL_HIST_TIME);
2104 (void) localtime_r(&tsec, &t);
2105 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
2106 } else {
2107 tbuf[0] = '\0';
2108 }
2109
2110 if (nvlist_exists(events[i], ZPOOL_HIST_CMD)) {
2111 (void) printf("%s %s\n", tbuf,
2112 fnvlist_lookup_string(events[i], ZPOOL_HIST_CMD));
2113 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_EVENT)) {
2114 uint64_t ievent;
2115
2116 ievent = fnvlist_lookup_uint64(events[i],
2117 ZPOOL_HIST_INT_EVENT);
2118 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
2119 goto next;
2120
2121 (void) printf(" %s [internal %s txg:%ju] %s\n",
2122 tbuf,
2123 zfs_history_event_names[ievent],
2124 fnvlist_lookup_uint64(events[i],
2125 ZPOOL_HIST_TXG),
2126 fnvlist_lookup_string(events[i],
2127 ZPOOL_HIST_INT_STR));
2128 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_NAME)) {
2129 (void) printf("%s [txg:%ju] %s", tbuf,
2130 fnvlist_lookup_uint64(events[i],
2131 ZPOOL_HIST_TXG),
2132 fnvlist_lookup_string(events[i],
2133 ZPOOL_HIST_INT_NAME));
2134
2135 if (nvlist_exists(events[i], ZPOOL_HIST_DSNAME)) {
2136 (void) printf("%s (%llu)",
2137 fnvlist_lookup_string(events[i],
2138 ZPOOL_HIST_DSNAME),
2139 (u_longlong_t)fnvlist_lookup_uint64(
2140 events[i],
2141 ZPOOL_HIST_DSID));
2142 }
2143
2144 (void) printf(" %s\n", fnvlist_lookup_string(events[i],
2145 ZPOOL_HIST_INT_STR));
2146 } else if (nvlist_exists(events[i], ZPOOL_HIST_IOCTL)) {
2147 (void) printf("%s ioctl %s\n", tbuf,
2148 fnvlist_lookup_string(events[i],
2149 ZPOOL_HIST_IOCTL));
2150
2151 if (nvlist_exists(events[i], ZPOOL_HIST_INPUT_NVL)) {
2152 (void) printf(" input:\n");
2153 dump_nvlist(fnvlist_lookup_nvlist(events[i],
2154 ZPOOL_HIST_INPUT_NVL), 8);
2155 }
2156 if (nvlist_exists(events[i], ZPOOL_HIST_OUTPUT_NVL)) {
2157 (void) printf(" output:\n");
2158 dump_nvlist(fnvlist_lookup_nvlist(events[i],
2159 ZPOOL_HIST_OUTPUT_NVL), 8);
2160 }
2161 if (nvlist_exists(events[i], ZPOOL_HIST_ERRNO)) {
2162 (void) printf(" errno: %lld\n",
2163 (longlong_t)fnvlist_lookup_int64(events[i],
2164 ZPOOL_HIST_ERRNO));
2165 }
2166 } else {
2167 goto next;
2168 }
2169
2170 printed = B_TRUE;
2171 next:
2172 if (dump_opt['h'] > 1) {
2173 if (!printed)
2174 (void) printf("unrecognized record:\n");
2175 dump_nvlist(events[i], 2);
2176 }
2177 }
2178 free(buf);
2179 }
2180
2181 /*ARGSUSED*/
2182 static void
2183 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
2184 {
2185 }
2186
2187 static uint64_t
2188 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
2189 const zbookmark_phys_t *zb)
2190 {
2191 if (dnp == NULL) {
2192 ASSERT(zb->zb_level < 0);
2193 if (zb->zb_object == 0)
2194 return (zb->zb_blkid);
2195 return (zb->zb_blkid * BP_GET_LSIZE(bp));
2196 }
2197
2198 ASSERT(zb->zb_level >= 0);
2199
2200 return ((zb->zb_blkid <<
2201 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
2202 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
2203 }
2204
2205 static void
2206 snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
2207 const blkptr_t *bp)
2208 {
2209 abd_t *pabd;
2210 void *buf;
2211 zio_t *zio;
2212 zfs_zstdhdr_t zstd_hdr;
2213 int error;
2214
2215 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD)
2216 return;
2217
2218 if (BP_IS_HOLE(bp))
2219 return;
2220
2221 if (BP_IS_EMBEDDED(bp)) {
2222 buf = malloc(SPA_MAXBLOCKSIZE);
2223 if (buf == NULL) {
2224 (void) fprintf(stderr, "out of memory\n");
2225 exit(1);
2226 }
2227 decode_embedded_bp_compressed(bp, buf);
2228 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
2229 free(buf);
2230 zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
2231 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
2232 (void) snprintf(blkbuf + strlen(blkbuf),
2233 buflen - strlen(blkbuf),
2234 " ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
2235 zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
2236 return;
2237 }
2238
2239 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
2240 zio = zio_root(spa, NULL, NULL, 0);
2241
2242 /* Decrypt but don't decompress so we can read the compression header */
2243 zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL,
2244 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS,
2245 NULL));
2246 error = zio_wait(zio);
2247 if (error) {
2248 (void) fprintf(stderr, "read failed: %d\n", error);
2249 return;
2250 }
2251 buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp));
2252 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
2253 zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
2254 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
2255
2256 (void) snprintf(blkbuf + strlen(blkbuf),
2257 buflen - strlen(blkbuf),
2258 " ZSTD:size=%u:version=%u:level=%u:NORMAL",
2259 zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
2260
2261 abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
2262 }
2263
2264 static void
2265 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
2266 boolean_t bp_freed)
2267 {
2268 const dva_t *dva = bp->blk_dva;
2269 int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
2270 int i;
2271
2272 if (dump_opt['b'] >= 6) {
2273 snprintf_blkptr(blkbuf, buflen, bp);
2274 if (bp_freed) {
2275 (void) snprintf(blkbuf + strlen(blkbuf),
2276 buflen - strlen(blkbuf), " %s", "FREE");
2277 }
2278 return;
2279 }
2280
2281 if (BP_IS_EMBEDDED(bp)) {
2282 (void) sprintf(blkbuf,
2283 "EMBEDDED et=%u %llxL/%llxP B=%llu",
2284 (int)BPE_GET_ETYPE(bp),
2285 (u_longlong_t)BPE_GET_LSIZE(bp),
2286 (u_longlong_t)BPE_GET_PSIZE(bp),
2287 (u_longlong_t)bp->blk_birth);
2288 return;
2289 }
2290
2291 blkbuf[0] = '\0';
2292
2293 for (i = 0; i < ndvas; i++)
2294 (void) snprintf(blkbuf + strlen(blkbuf),
2295 buflen - strlen(blkbuf), "%llu:%llx:%llx ",
2296 (u_longlong_t)DVA_GET_VDEV(&dva[i]),
2297 (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
2298 (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
2299
2300 if (BP_IS_HOLE(bp)) {
2301 (void) snprintf(blkbuf + strlen(blkbuf),
2302 buflen - strlen(blkbuf),
2303 "%llxL B=%llu",
2304 (u_longlong_t)BP_GET_LSIZE(bp),
2305 (u_longlong_t)bp->blk_birth);
2306 } else {
2307 (void) snprintf(blkbuf + strlen(blkbuf),
2308 buflen - strlen(blkbuf),
2309 "%llxL/%llxP F=%llu B=%llu/%llu",
2310 (u_longlong_t)BP_GET_LSIZE(bp),
2311 (u_longlong_t)BP_GET_PSIZE(bp),
2312 (u_longlong_t)BP_GET_FILL(bp),
2313 (u_longlong_t)bp->blk_birth,
2314 (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
2315 if (bp_freed)
2316 (void) snprintf(blkbuf + strlen(blkbuf),
2317 buflen - strlen(blkbuf), " %s", "FREE");
2318 (void) snprintf(blkbuf + strlen(blkbuf),
2319 buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx",
2320 (u_longlong_t)bp->blk_cksum.zc_word[0],
2321 (u_longlong_t)bp->blk_cksum.zc_word[1],
2322 (u_longlong_t)bp->blk_cksum.zc_word[2],
2323 (u_longlong_t)bp->blk_cksum.zc_word[3]);
2324 }
2325 }
2326
2327 static void
2328 print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
2329 const dnode_phys_t *dnp)
2330 {
2331 char blkbuf[BP_SPRINTF_LEN];
2332 int l;
2333
2334 if (!BP_IS_EMBEDDED(bp)) {
2335 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
2336 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
2337 }
2338
2339 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
2340
2341 ASSERT(zb->zb_level >= 0);
2342
2343 for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
2344 if (l == zb->zb_level) {
2345 (void) printf("L%llx", (u_longlong_t)zb->zb_level);
2346 } else {
2347 (void) printf(" ");
2348 }
2349 }
2350
2351 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE);
2352 if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD)
2353 snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp);
2354 (void) printf("%s\n", blkbuf);
2355 }
2356
2357 static int
2358 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
2359 blkptr_t *bp, const zbookmark_phys_t *zb)
2360 {
2361 int err = 0;
2362
2363 if (bp->blk_birth == 0)
2364 return (0);
2365
2366 print_indirect(spa, bp, zb, dnp);
2367
2368 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
2369 arc_flags_t flags = ARC_FLAG_WAIT;
2370 int i;
2371 blkptr_t *cbp;
2372 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
2373 arc_buf_t *buf;
2374 uint64_t fill = 0;
2375 ASSERT(!BP_IS_REDACTED(bp));
2376
2377 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
2378 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
2379 if (err)
2380 return (err);
2381 ASSERT(buf->b_data);
2382
2383 /* recursively visit blocks below this */
2384 cbp = buf->b_data;
2385 for (i = 0; i < epb; i++, cbp++) {
2386 zbookmark_phys_t czb;
2387
2388 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
2389 zb->zb_level - 1,
2390 zb->zb_blkid * epb + i);
2391 err = visit_indirect(spa, dnp, cbp, &czb);
2392 if (err)
2393 break;
2394 fill += BP_GET_FILL(cbp);
2395 }
2396 if (!err)
2397 ASSERT3U(fill, ==, BP_GET_FILL(bp));
2398 arc_buf_destroy(buf, &buf);
2399 }
2400
2401 return (err);
2402 }
2403
2404 /*ARGSUSED*/
2405 static void
2406 dump_indirect(dnode_t *dn)
2407 {
2408 dnode_phys_t *dnp = dn->dn_phys;
2409 int j;
2410 zbookmark_phys_t czb;
2411
2412 (void) printf("Indirect blocks:\n");
2413
2414 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
2415 dn->dn_object, dnp->dn_nlevels - 1, 0);
2416 for (j = 0; j < dnp->dn_nblkptr; j++) {
2417 czb.zb_blkid = j;
2418 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
2419 &dnp->dn_blkptr[j], &czb);
2420 }
2421
2422 (void) printf("\n");
2423 }
2424
2425 /*ARGSUSED*/
2426 static void
2427 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
2428 {
2429 dsl_dir_phys_t *dd = data;
2430 time_t crtime;
2431 char nice[32];
2432
2433 /* make sure nicenum has enough space */
2434 CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ);
2435
2436 if (dd == NULL)
2437 return;
2438
2439 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
2440
2441 crtime = dd->dd_creation_time;
2442 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
2443 (void) printf("\t\thead_dataset_obj = %llu\n",
2444 (u_longlong_t)dd->dd_head_dataset_obj);
2445 (void) printf("\t\tparent_dir_obj = %llu\n",
2446 (u_longlong_t)dd->dd_parent_obj);
2447 (void) printf("\t\torigin_obj = %llu\n",
2448 (u_longlong_t)dd->dd_origin_obj);
2449 (void) printf("\t\tchild_dir_zapobj = %llu\n",
2450 (u_longlong_t)dd->dd_child_dir_zapobj);
2451 zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
2452 (void) printf("\t\tused_bytes = %s\n", nice);
2453 zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
2454 (void) printf("\t\tcompressed_bytes = %s\n", nice);
2455 zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
2456 (void) printf("\t\tuncompressed_bytes = %s\n", nice);
2457 zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
2458 (void) printf("\t\tquota = %s\n", nice);
2459 zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
2460 (void) printf("\t\treserved = %s\n", nice);
2461 (void) printf("\t\tprops_zapobj = %llu\n",
2462 (u_longlong_t)dd->dd_props_zapobj);
2463 (void) printf("\t\tdeleg_zapobj = %llu\n",
2464 (u_longlong_t)dd->dd_deleg_zapobj);
2465 (void) printf("\t\tflags = %llx\n",
2466 (u_longlong_t)dd->dd_flags);
2467
2468 #define DO(which) \
2469 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
2470 sizeof (nice)); \
2471 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
2472 DO(HEAD);
2473 DO(SNAP);
2474 DO(CHILD);
2475 DO(CHILD_RSRV);
2476 DO(REFRSRV);
2477 #undef DO
2478 (void) printf("\t\tclones = %llu\n",
2479 (u_longlong_t)dd->dd_clones);
2480 }
2481
2482 /*ARGSUSED*/
2483 static void
2484 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
2485 {
2486 dsl_dataset_phys_t *ds = data;
2487 time_t crtime;
2488 char used[32], compressed[32], uncompressed[32], unique[32];
2489 char blkbuf[BP_SPRINTF_LEN];
2490
2491 /* make sure nicenum has enough space */
2492 CTASSERT(sizeof (used) >= NN_NUMBUF_SZ);
2493 CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ);
2494 CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ);
2495 CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ);
2496
2497 if (ds == NULL)
2498 return;
2499
2500 ASSERT(size == sizeof (*ds));
2501 crtime = ds->ds_creation_time;
2502 zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
2503 zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
2504 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
2505 sizeof (uncompressed));
2506 zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
2507 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
2508
2509 (void) printf("\t\tdir_obj = %llu\n",
2510 (u_longlong_t)ds->ds_dir_obj);
2511 (void) printf("\t\tprev_snap_obj = %llu\n",
2512 (u_longlong_t)ds->ds_prev_snap_obj);
2513 (void) printf("\t\tprev_snap_txg = %llu\n",
2514 (u_longlong_t)ds->ds_prev_snap_txg);
2515 (void) printf("\t\tnext_snap_obj = %llu\n",
2516 (u_longlong_t)ds->ds_next_snap_obj);
2517 (void) printf("\t\tsnapnames_zapobj = %llu\n",
2518 (u_longlong_t)ds->ds_snapnames_zapobj);
2519 (void) printf("\t\tnum_children = %llu\n",
2520 (u_longlong_t)ds->ds_num_children);
2521 (void) printf("\t\tuserrefs_obj = %llu\n",
2522 (u_longlong_t)ds->ds_userrefs_obj);
2523 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
2524 (void) printf("\t\tcreation_txg = %llu\n",
2525 (u_longlong_t)ds->ds_creation_txg);
2526 (void) printf("\t\tdeadlist_obj = %llu\n",
2527 (u_longlong_t)ds->ds_deadlist_obj);
2528 (void) printf("\t\tused_bytes = %s\n", used);
2529 (void) printf("\t\tcompressed_bytes = %s\n", compressed);
2530 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
2531 (void) printf("\t\tunique = %s\n", unique);
2532 (void) printf("\t\tfsid_guid = %llu\n",
2533 (u_longlong_t)ds->ds_fsid_guid);
2534 (void) printf("\t\tguid = %llu\n",
2535 (u_longlong_t)ds->ds_guid);
2536 (void) printf("\t\tflags = %llx\n",
2537 (u_longlong_t)ds->ds_flags);
2538 (void) printf("\t\tnext_clones_obj = %llu\n",
2539 (u_longlong_t)ds->ds_next_clones_obj);
2540 (void) printf("\t\tprops_obj = %llu\n",
2541 (u_longlong_t)ds->ds_props_obj);
2542 (void) printf("\t\tbp = %s\n", blkbuf);
2543 }
2544
2545 /* ARGSUSED */
2546 static int
2547 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2548 {
2549 char blkbuf[BP_SPRINTF_LEN];
2550
2551 if (bp->blk_birth != 0) {
2552 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2553 (void) printf("\t%s\n", blkbuf);
2554 }
2555 return (0);
2556 }
2557
2558 static void
2559 dump_bptree(objset_t *os, uint64_t obj, const char *name)
2560 {
2561 char bytes[32];
2562 bptree_phys_t *bt;
2563 dmu_buf_t *db;
2564
2565 /* make sure nicenum has enough space */
2566 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
2567
2568 if (dump_opt['d'] < 3)
2569 return;
2570
2571 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
2572 bt = db->db_data;
2573 zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
2574 (void) printf("\n %s: %llu datasets, %s\n",
2575 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
2576 dmu_buf_rele(db, FTAG);
2577
2578 if (dump_opt['d'] < 5)
2579 return;
2580
2581 (void) printf("\n");
2582
2583 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
2584 }
2585
2586 /* ARGSUSED */
2587 static int
2588 dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
2589 {
2590 char blkbuf[BP_SPRINTF_LEN];
2591
2592 ASSERT(bp->blk_birth != 0);
2593 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed);
2594 (void) printf("\t%s\n", blkbuf);
2595 return (0);
2596 }
2597
2598 static void
2599 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
2600 {
2601 char bytes[32];
2602 char comp[32];
2603 char uncomp[32];
2604 uint64_t i;
2605
2606 /* make sure nicenum has enough space */
2607 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
2608 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
2609 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
2610
2611 if (dump_opt['d'] < 3)
2612 return;
2613
2614 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
2615 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
2616 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
2617 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
2618 if (bpo->bpo_havefreed) {
2619 (void) printf(" %*s: object %llu, %llu local "
2620 "blkptrs, %llu freed, %llu subobjs in object %llu, "
2621 "%s (%s/%s comp)\n",
2622 indent * 8, name,
2623 (u_longlong_t)bpo->bpo_object,
2624 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2625 (u_longlong_t)bpo->bpo_phys->bpo_num_freed,
2626 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
2627 (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
2628 bytes, comp, uncomp);
2629 } else {
2630 (void) printf(" %*s: object %llu, %llu local "
2631 "blkptrs, %llu subobjs in object %llu, "
2632 "%s (%s/%s comp)\n",
2633 indent * 8, name,
2634 (u_longlong_t)bpo->bpo_object,
2635 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2636 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
2637 (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
2638 bytes, comp, uncomp);
2639 }
2640
2641 for (i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
2642 uint64_t subobj;
2643 bpobj_t subbpo;
2644 int error;
2645 VERIFY0(dmu_read(bpo->bpo_os,
2646 bpo->bpo_phys->bpo_subobjs,
2647 i * sizeof (subobj), sizeof (subobj), &subobj, 0));
2648 error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
2649 if (error != 0) {
2650 (void) printf("ERROR %u while trying to open "
2651 "subobj id %llu\n",
2652 error, (u_longlong_t)subobj);
2653 continue;
2654 }
2655 dump_full_bpobj(&subbpo, "subobj", indent + 1);
2656 bpobj_close(&subbpo);
2657 }
2658 } else {
2659 if (bpo->bpo_havefreed) {
2660 (void) printf(" %*s: object %llu, %llu blkptrs, "
2661 "%llu freed, %s\n",
2662 indent * 8, name,
2663 (u_longlong_t)bpo->bpo_object,
2664 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2665 (u_longlong_t)bpo->bpo_phys->bpo_num_freed,
2666 bytes);
2667 } else {
2668 (void) printf(" %*s: object %llu, %llu blkptrs, "
2669 "%s\n",
2670 indent * 8, name,
2671 (u_longlong_t)bpo->bpo_object,
2672 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2673 bytes);
2674 }
2675 }
2676
2677 if (dump_opt['d'] < 5)
2678 return;
2679
2680
2681 if (indent == 0) {
2682 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
2683 (void) printf("\n");
2684 }
2685 }
2686
2687 static int
2688 dump_bookmark(dsl_pool_t *dp, char *name, boolean_t print_redact,
2689 boolean_t print_list)
2690 {
2691 int err = 0;
2692 zfs_bookmark_phys_t prop;
2693 objset_t *mos = dp->dp_spa->spa_meta_objset;
2694 err = dsl_bookmark_lookup(dp, name, NULL, &prop);
2695
2696 if (err != 0) {
2697 return (err);
2698 }
2699
2700 (void) printf("\t#%s: ", strchr(name, '#') + 1);
2701 (void) printf("{guid: %llx creation_txg: %llu creation_time: "
2702 "%llu redaction_obj: %llu}\n", (u_longlong_t)prop.zbm_guid,
2703 (u_longlong_t)prop.zbm_creation_txg,
2704 (u_longlong_t)prop.zbm_creation_time,
2705 (u_longlong_t)prop.zbm_redaction_obj);
2706
2707 IMPLY(print_list, print_redact);
2708 if (!print_redact || prop.zbm_redaction_obj == 0)
2709 return (0);
2710
2711 redaction_list_t *rl;
2712 VERIFY0(dsl_redaction_list_hold_obj(dp,
2713 prop.zbm_redaction_obj, FTAG, &rl));
2714
2715 redaction_list_phys_t *rlp = rl->rl_phys;
2716 (void) printf("\tRedacted:\n\t\tProgress: ");
2717 if (rlp->rlp_last_object != UINT64_MAX ||
2718 rlp->rlp_last_blkid != UINT64_MAX) {
2719 (void) printf("%llu %llu (incomplete)\n",
2720 (u_longlong_t)rlp->rlp_last_object,
2721 (u_longlong_t)rlp->rlp_last_blkid);
2722 } else {
2723 (void) printf("complete\n");
2724 }
2725 (void) printf("\t\tSnapshots: [");
2726 for (unsigned int i = 0; i < rlp->rlp_num_snaps; i++) {
2727 if (i > 0)
2728 (void) printf(", ");
2729 (void) printf("%0llu",
2730 (u_longlong_t)rlp->rlp_snaps[i]);
2731 }
2732 (void) printf("]\n\t\tLength: %llu\n",
2733 (u_longlong_t)rlp->rlp_num_entries);
2734
2735 if (!print_list) {
2736 dsl_redaction_list_rele(rl, FTAG);
2737 return (0);
2738 }
2739
2740 if (rlp->rlp_num_entries == 0) {
2741 dsl_redaction_list_rele(rl, FTAG);
2742 (void) printf("\t\tRedaction List: []\n\n");
2743 return (0);
2744 }
2745
2746 redact_block_phys_t *rbp_buf;
2747 uint64_t size;
2748 dmu_object_info_t doi;
2749
2750 VERIFY0(dmu_object_info(mos, prop.zbm_redaction_obj, &doi));
2751 size = doi.doi_max_offset;
2752 rbp_buf = kmem_alloc(size, KM_SLEEP);
2753
2754 err = dmu_read(mos, prop.zbm_redaction_obj, 0, size,
2755 rbp_buf, 0);
2756 if (err != 0) {
2757 dsl_redaction_list_rele(rl, FTAG);
2758 kmem_free(rbp_buf, size);
2759 return (err);
2760 }
2761
2762 (void) printf("\t\tRedaction List: [{object: %llx, offset: "
2763 "%llx, blksz: %x, count: %llx}",
2764 (u_longlong_t)rbp_buf[0].rbp_object,
2765 (u_longlong_t)rbp_buf[0].rbp_blkid,
2766 (uint_t)(redact_block_get_size(&rbp_buf[0])),
2767 (u_longlong_t)redact_block_get_count(&rbp_buf[0]));
2768
2769 for (size_t i = 1; i < rlp->rlp_num_entries; i++) {
2770 (void) printf(",\n\t\t{object: %llx, offset: %llx, "
2771 "blksz: %x, count: %llx}",
2772 (u_longlong_t)rbp_buf[i].rbp_object,
2773 (u_longlong_t)rbp_buf[i].rbp_blkid,
2774 (uint_t)(redact_block_get_size(&rbp_buf[i])),
2775 (u_longlong_t)redact_block_get_count(&rbp_buf[i]));
2776 }
2777 dsl_redaction_list_rele(rl, FTAG);
2778 kmem_free(rbp_buf, size);
2779 (void) printf("]\n\n");
2780 return (0);
2781 }
2782
2783 static void
2784 dump_bookmarks(objset_t *os, int verbosity)
2785 {
2786 zap_cursor_t zc;
2787 zap_attribute_t attr;
2788 dsl_dataset_t *ds = dmu_objset_ds(os);
2789 dsl_pool_t *dp = spa_get_dsl(os->os_spa);
2790 objset_t *mos = os->os_spa->spa_meta_objset;
2791 if (verbosity < 4)
2792 return;
2793 dsl_pool_config_enter(dp, FTAG);
2794
2795 for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj);
2796 zap_cursor_retrieve(&zc, &attr) == 0;
2797 zap_cursor_advance(&zc)) {
2798 char osname[ZFS_MAX_DATASET_NAME_LEN];
2799 char buf[ZFS_MAX_DATASET_NAME_LEN];
2800 dmu_objset_name(os, osname);
2801 VERIFY3S(0, <=, snprintf(buf, sizeof (buf), "%s#%s", osname,
2802 attr.za_name));
2803 (void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6);
2804 }
2805 zap_cursor_fini(&zc);
2806 dsl_pool_config_exit(dp, FTAG);
2807 }
2808
2809 static void
2810 bpobj_count_refd(bpobj_t *bpo)
2811 {
2812 mos_obj_refd(bpo->bpo_object);
2813
2814 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
2815 mos_obj_refd(bpo->bpo_phys->bpo_subobjs);
2816 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
2817 uint64_t subobj;
2818 bpobj_t subbpo;
2819 int error;
2820 VERIFY0(dmu_read(bpo->bpo_os,
2821 bpo->bpo_phys->bpo_subobjs,
2822 i * sizeof (subobj), sizeof (subobj), &subobj, 0));
2823 error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
2824 if (error != 0) {
2825 (void) printf("ERROR %u while trying to open "
2826 "subobj id %llu\n",
2827 error, (u_longlong_t)subobj);
2828 continue;
2829 }
2830 bpobj_count_refd(&subbpo);
2831 bpobj_close(&subbpo);
2832 }
2833 }
2834 }
2835
2836 static int
2837 dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle)
2838 {
2839 spa_t *spa = arg;
2840 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj;
2841 if (dle->dle_bpobj.bpo_object != empty_bpobj)
2842 bpobj_count_refd(&dle->dle_bpobj);
2843 return (0);
2844 }
2845
2846 static int
2847 dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle)
2848 {
2849 ASSERT(arg == NULL);
2850 if (dump_opt['d'] >= 5) {
2851 char buf[128];
2852 (void) snprintf(buf, sizeof (buf),
2853 "mintxg %llu -> obj %llu",
2854 (longlong_t)dle->dle_mintxg,
2855 (longlong_t)dle->dle_bpobj.bpo_object);
2856
2857 dump_full_bpobj(&dle->dle_bpobj, buf, 0);
2858 } else {
2859 (void) printf("mintxg %llu -> obj %llu\n",
2860 (longlong_t)dle->dle_mintxg,
2861 (longlong_t)dle->dle_bpobj.bpo_object);
2862 }
2863 return (0);
2864 }
2865
2866 static void
2867 dump_blkptr_list(dsl_deadlist_t *dl, char *name)
2868 {
2869 char bytes[32];
2870 char comp[32];
2871 char uncomp[32];
2872 char entries[32];
2873 spa_t *spa = dmu_objset_spa(dl->dl_os);
2874 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj;
2875
2876 if (dl->dl_oldfmt) {
2877 if (dl->dl_bpobj.bpo_object != empty_bpobj)
2878 bpobj_count_refd(&dl->dl_bpobj);
2879 } else {
2880 mos_obj_refd(dl->dl_object);
2881 dsl_deadlist_iterate(dl, dsl_deadlist_entry_count_refd, spa);
2882 }
2883
2884 /* make sure nicenum has enough space */
2885 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
2886 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
2887 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
2888 CTASSERT(sizeof (entries) >= NN_NUMBUF_SZ);
2889
2890 if (dump_opt['d'] < 3)
2891 return;
2892
2893 if (dl->dl_oldfmt) {
2894 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
2895 return;
2896 }
2897
2898 zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
2899 zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
2900 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
2901 zdb_nicenum(avl_numnodes(&dl->dl_tree), entries, sizeof (entries));
2902 (void) printf("\n %s: %s (%s/%s comp), %s entries\n",
2903 name, bytes, comp, uncomp, entries);
2904
2905 if (dump_opt['d'] < 4)
2906 return;
2907
2908 (void) printf("\n");
2909
2910 dsl_deadlist_iterate(dl, dsl_deadlist_entry_dump, NULL);
2911 }
2912
2913 static int
2914 verify_dd_livelist(objset_t *os)
2915 {
2916 uint64_t ll_used, used, ll_comp, comp, ll_uncomp, uncomp;
2917 dsl_pool_t *dp = spa_get_dsl(os->os_spa);
2918 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
2919
2920 ASSERT(!dmu_objset_is_snapshot(os));
2921 if (!dsl_deadlist_is_open(&dd->dd_livelist))
2922 return (0);
2923
2924 /* Iterate through the livelist to check for duplicates */
2925 dsl_deadlist_iterate(&dd->dd_livelist, sublivelist_verify_lightweight,
2926 NULL);
2927
2928 dsl_pool_config_enter(dp, FTAG);
2929 dsl_deadlist_space(&dd->dd_livelist, &ll_used,
2930 &ll_comp, &ll_uncomp);
2931
2932 dsl_dataset_t *origin_ds;
2933 ASSERT(dsl_pool_config_held(dp));
2934 VERIFY0(dsl_dataset_hold_obj(dp,
2935 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin_ds));
2936 VERIFY0(dsl_dataset_space_written(origin_ds, os->os_dsl_dataset,
2937 &used, &comp, &uncomp));
2938 dsl_dataset_rele(origin_ds, FTAG);
2939 dsl_pool_config_exit(dp, FTAG);
2940 /*
2941 * It's possible that the dataset's uncomp space is larger than the
2942 * livelist's because livelists do not track embedded block pointers
2943 */
2944 if (used != ll_used || comp != ll_comp || uncomp < ll_uncomp) {
2945 char nice_used[32], nice_comp[32], nice_uncomp[32];
2946 (void) printf("Discrepancy in space accounting:\n");
2947 zdb_nicenum(used, nice_used, sizeof (nice_used));
2948 zdb_nicenum(comp, nice_comp, sizeof (nice_comp));
2949 zdb_nicenum(uncomp, nice_uncomp, sizeof (nice_uncomp));
2950 (void) printf("dir: used %s, comp %s, uncomp %s\n",
2951 nice_used, nice_comp, nice_uncomp);
2952 zdb_nicenum(ll_used, nice_used, sizeof (nice_used));
2953 zdb_nicenum(ll_comp, nice_comp, sizeof (nice_comp));
2954 zdb_nicenum(ll_uncomp, nice_uncomp, sizeof (nice_uncomp));
2955 (void) printf("livelist: used %s, comp %s, uncomp %s\n",
2956 nice_used, nice_comp, nice_uncomp);
2957 return (1);
2958 }
2959 return (0);
2960 }
2961
2962 static avl_tree_t idx_tree;
2963 static avl_tree_t domain_tree;
2964 static boolean_t fuid_table_loaded;
2965 static objset_t *sa_os = NULL;
2966 static sa_attr_type_t *sa_attr_table = NULL;
2967
2968 static int
2969 open_objset(const char *path, void *tag, objset_t **osp)
2970 {
2971 int err;
2972 uint64_t sa_attrs = 0;
2973 uint64_t version = 0;
2974
2975 VERIFY3P(sa_os, ==, NULL);
2976 /*
2977 * We can't own an objset if it's redacted. Therefore, we do this
2978 * dance: hold the objset, then acquire a long hold on its dataset, then
2979 * release the pool (which is held as part of holding the objset).
2980 */
2981 err = dmu_objset_hold(path, tag, osp);
2982 if (err != 0) {
2983 (void) fprintf(stderr, "failed to hold dataset '%s': %s\n",
2984 path, strerror(err));
2985 return (err);
2986 }
2987 dsl_dataset_long_hold(dmu_objset_ds(*osp), tag);
2988 dsl_pool_rele(dmu_objset_pool(*osp), tag);
2989
2990 if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) {
2991 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2992 8, 1, &version);
2993 if (version >= ZPL_VERSION_SA) {
2994 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
2995 8, 1, &sa_attrs);
2996 }
2997 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
2998 &sa_attr_table);
2999 if (err != 0) {
3000 (void) fprintf(stderr, "sa_setup failed: %s\n",
3001 strerror(err));
3002 dsl_dataset_long_rele(dmu_objset_ds(*osp), tag);
3003 dsl_dataset_rele(dmu_objset_ds(*osp), tag);
3004 *osp = NULL;
3005 }
3006 }
3007 sa_os = *osp;
3008
3009 return (0);
3010 }
3011
3012 static void
3013 close_objset(objset_t *os, void *tag)
3014 {
3015 VERIFY3P(os, ==, sa_os);
3016 if (os->os_sa != NULL)
3017 sa_tear_down(os);
3018 dsl_dataset_long_rele(dmu_objset_ds(os), tag);
3019 dsl_dataset_rele(dmu_objset_ds(os), tag);
3020 sa_attr_table = NULL;
3021 sa_os = NULL;
3022 }
3023
3024 static void
3025 fuid_table_destroy(void)
3026 {
3027 if (fuid_table_loaded) {
3028 zfs_fuid_table_destroy(&idx_tree, &domain_tree);
3029 fuid_table_loaded = B_FALSE;
3030 }
3031 }
3032
3033 /*
3034 * print uid or gid information.
3035 * For normal POSIX id just the id is printed in decimal format.
3036 * For CIFS files with FUID the fuid is printed in hex followed by
3037 * the domain-rid string.
3038 */
3039 static void
3040 print_idstr(uint64_t id, const char *id_type)
3041 {
3042 if (FUID_INDEX(id)) {
3043 char *domain;
3044
3045 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
3046 (void) printf("\t%s %llx [%s-%d]\n", id_type,
3047 (u_longlong_t)id, domain, (int)FUID_RID(id));
3048 } else {
3049 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id);
3050 }
3051
3052 }
3053
3054 static void
3055 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
3056 {
3057 uint32_t uid_idx, gid_idx;
3058
3059 uid_idx = FUID_INDEX(uid);
3060 gid_idx = FUID_INDEX(gid);
3061
3062 /* Load domain table, if not already loaded */
3063 if (!fuid_table_loaded && (uid_idx || gid_idx)) {
3064 uint64_t fuid_obj;
3065
3066 /* first find the fuid object. It lives in the master node */
3067 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
3068 8, 1, &fuid_obj) == 0);
3069 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
3070 (void) zfs_fuid_table_load(os, fuid_obj,
3071 &idx_tree, &domain_tree);
3072 fuid_table_loaded = B_TRUE;
3073 }
3074
3075 print_idstr(uid, "uid");
3076 print_idstr(gid, "gid");
3077 }
3078
3079 static void
3080 dump_znode_sa_xattr(sa_handle_t *hdl)
3081 {
3082 nvlist_t *sa_xattr;
3083 nvpair_t *elem = NULL;
3084 int sa_xattr_size = 0;
3085 int sa_xattr_entries = 0;
3086 int error;
3087 char *sa_xattr_packed;
3088
3089 error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size);
3090 if (error || sa_xattr_size == 0)
3091 return;
3092
3093 sa_xattr_packed = malloc(sa_xattr_size);
3094 if (sa_xattr_packed == NULL)
3095 return;
3096
3097 error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR],
3098 sa_xattr_packed, sa_xattr_size);
3099 if (error) {
3100 free(sa_xattr_packed);
3101 return;
3102 }
3103
3104 error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0);
3105 if (error) {
3106 free(sa_xattr_packed);
3107 return;
3108 }
3109
3110 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL)
3111 sa_xattr_entries++;
3112
3113 (void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
3114 sa_xattr_size, sa_xattr_entries);
3115 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
3116 uchar_t *value;
3117 uint_t cnt, idx;
3118
3119 (void) printf("\t\t%s = ", nvpair_name(elem));
3120 nvpair_value_byte_array(elem, &value, &cnt);
3121 for (idx = 0; idx < cnt; ++idx) {
3122 if (isprint(value[idx]))
3123 (void) putchar(value[idx]);
3124 else
3125 (void) printf("\\%3.3o", value[idx]);
3126 }
3127 (void) putchar('\n');
3128 }
3129
3130 nvlist_free(sa_xattr);
3131 free(sa_xattr_packed);
3132 }
3133
3134 static void
3135 dump_znode_symlink(sa_handle_t *hdl)
3136 {
3137 int sa_symlink_size = 0;
3138 char linktarget[MAXPATHLEN];
3139 linktarget[0] = '\0';
3140 int error;
3141
3142 error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size);
3143 if (error || sa_symlink_size == 0) {
3144 return;
3145 }
3146 if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK],
3147 &linktarget, sa_symlink_size) == 0)
3148 (void) printf("\ttarget %s\n", linktarget);
3149 }
3150
3151 /*ARGSUSED*/
3152 static void
3153 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
3154 {
3155 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
3156 sa_handle_t *hdl;
3157 uint64_t xattr, rdev, gen;
3158 uint64_t uid, gid, mode, fsize, parent, links;
3159 uint64_t pflags;
3160 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
3161 time_t z_crtime, z_atime, z_mtime, z_ctime;
3162 sa_bulk_attr_t bulk[12];
3163 int idx = 0;
3164 int error;
3165
3166 VERIFY3P(os, ==, sa_os);
3167 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
3168 (void) printf("Failed to get handle for SA znode\n");
3169 return;
3170 }
3171
3172 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
3173 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
3174 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
3175 &links, 8);
3176 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
3177 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
3178 &mode, 8);
3179 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
3180 NULL, &parent, 8);
3181 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
3182 &fsize, 8);
3183 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
3184 acctm, 16);
3185 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
3186 modtm, 16);
3187 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
3188 crtm, 16);
3189 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
3190 chgtm, 16);
3191 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
3192 &pflags, 8);
3193
3194 if (sa_bulk_lookup(hdl, bulk, idx)) {
3195 (void) sa_handle_destroy(hdl);
3196 return;
3197 }
3198
3199 z_crtime = (time_t)crtm[0];
3200 z_atime = (time_t)acctm[0];
3201 z_mtime = (time_t)modtm[0];
3202 z_ctime = (time_t)chgtm[0];
3203
3204 if (dump_opt['d'] > 4) {
3205 error = zfs_obj_to_path(os, object, path, sizeof (path));
3206 if (error == ESTALE) {
3207 (void) snprintf(path, sizeof (path), "on delete queue");
3208 } else if (error != 0) {
3209 leaked_objects++;
3210 (void) snprintf(path, sizeof (path),
3211 "path not found, possibly leaked");
3212 }
3213 (void) printf("\tpath %s\n", path);
3214 }
3215
3216 if (S_ISLNK(mode))
3217 dump_znode_symlink(hdl);
3218 dump_uidgid(os, uid, gid);
3219 (void) printf("\tatime %s", ctime(&z_atime));
3220 (void) printf("\tmtime %s", ctime(&z_mtime));
3221 (void) printf("\tctime %s", ctime(&z_ctime));
3222 (void) printf("\tcrtime %s", ctime(&z_crtime));
3223 (void) printf("\tgen %llu\n", (u_longlong_t)gen);
3224 (void) printf("\tmode %llo\n", (u_longlong_t)mode);
3225 (void) printf("\tsize %llu\n", (u_longlong_t)fsize);
3226 (void) printf("\tparent %llu\n", (u_longlong_t)parent);
3227 (void) printf("\tlinks %llu\n", (u_longlong_t)links);
3228 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
3229 if (dmu_objset_projectquota_enabled(os) && (pflags & ZFS_PROJID)) {
3230 uint64_t projid;
3231
3232 if (sa_lookup(hdl, sa_attr_table[ZPL_PROJID], &projid,
3233 sizeof (uint64_t)) == 0)
3234 (void) printf("\tprojid %llu\n", (u_longlong_t)projid);
3235 }
3236 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
3237 sizeof (uint64_t)) == 0)
3238 (void) printf("\txattr %llu\n", (u_longlong_t)xattr);
3239 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
3240 sizeof (uint64_t)) == 0)
3241 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
3242 dump_znode_sa_xattr(hdl);
3243 sa_handle_destroy(hdl);
3244 }
3245
3246 /*ARGSUSED*/
3247 static void
3248 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
3249 {
3250 }
3251
3252 /*ARGSUSED*/
3253 static void
3254 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
3255 {
3256 }
3257
3258 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
3259 dump_none, /* unallocated */
3260 dump_zap, /* object directory */
3261 dump_uint64, /* object array */
3262 dump_none, /* packed nvlist */
3263 dump_packed_nvlist, /* packed nvlist size */
3264 dump_none, /* bpobj */
3265 dump_bpobj, /* bpobj header */
3266 dump_none, /* SPA space map header */
3267 dump_none, /* SPA space map */
3268 dump_none, /* ZIL intent log */
3269 dump_dnode, /* DMU dnode */
3270 dump_dmu_objset, /* DMU objset */
3271 dump_dsl_dir, /* DSL directory */
3272 dump_zap, /* DSL directory child map */
3273 dump_zap, /* DSL dataset snap map */
3274 dump_zap, /* DSL props */
3275 dump_dsl_dataset, /* DSL dataset */
3276 dump_znode, /* ZFS znode */
3277 dump_acl, /* ZFS V0 ACL */
3278 dump_uint8, /* ZFS plain file */
3279 dump_zpldir, /* ZFS directory */
3280 dump_zap, /* ZFS master node */
3281 dump_zap, /* ZFS delete queue */
3282 dump_uint8, /* zvol object */
3283 dump_zap, /* zvol prop */
3284 dump_uint8, /* other uint8[] */
3285 dump_uint64, /* other uint64[] */
3286 dump_zap, /* other ZAP */
3287 dump_zap, /* persistent error log */
3288 dump_uint8, /* SPA history */
3289 dump_history_offsets, /* SPA history offsets */
3290 dump_zap, /* Pool properties */
3291 dump_zap, /* DSL permissions */
3292 dump_acl, /* ZFS ACL */
3293 dump_uint8, /* ZFS SYSACL */
3294 dump_none, /* FUID nvlist */
3295 dump_packed_nvlist, /* FUID nvlist size */
3296 dump_zap, /* DSL dataset next clones */
3297 dump_zap, /* DSL scrub queue */
3298 dump_zap, /* ZFS user/group/project used */
3299 dump_zap, /* ZFS user/group/project quota */
3300 dump_zap, /* snapshot refcount tags */
3301 dump_ddt_zap, /* DDT ZAP object */
3302 dump_zap, /* DDT statistics */
3303 dump_znode, /* SA object */
3304 dump_zap, /* SA Master Node */
3305 dump_sa_attrs, /* SA attribute registration */
3306 dump_sa_layouts, /* SA attribute layouts */
3307 dump_zap, /* DSL scrub translations */
3308 dump_none, /* fake dedup BP */
3309 dump_zap, /* deadlist */
3310 dump_none, /* deadlist hdr */
3311 dump_zap, /* dsl clones */
3312 dump_bpobj_subobjs, /* bpobj subobjs */
3313 dump_unknown, /* Unknown type, must be last */
3314 };
3315
3316 static boolean_t
3317 match_object_type(dmu_object_type_t obj_type, uint64_t flags)
3318 {
3319 boolean_t match = B_TRUE;
3320
3321 switch (obj_type) {
3322 case DMU_OT_DIRECTORY_CONTENTS:
3323 if (!(flags & ZOR_FLAG_DIRECTORY))
3324 match = B_FALSE;
3325 break;
3326 case DMU_OT_PLAIN_FILE_CONTENTS:
3327 if (!(flags & ZOR_FLAG_PLAIN_FILE))
3328 match = B_FALSE;
3329 break;
3330 case DMU_OT_SPACE_MAP:
3331 if (!(flags & ZOR_FLAG_SPACE_MAP))
3332 match = B_FALSE;
3333 break;
3334 default:
3335 if (strcmp(zdb_ot_name(obj_type), "zap") == 0) {
3336 if (!(flags & ZOR_FLAG_ZAP))
3337 match = B_FALSE;
3338 break;
3339 }
3340
3341 /*
3342 * If all bits except some of the supported flags are
3343 * set, the user combined the all-types flag (A) with
3344 * a negated flag to exclude some types (e.g. A-f to
3345 * show all object types except plain files).
3346 */
3347 if ((flags | ZOR_SUPPORTED_FLAGS) != ZOR_FLAG_ALL_TYPES)
3348 match = B_FALSE;
3349
3350 break;
3351 }
3352
3353 return (match);
3354 }
3355
3356 static void
3357 dump_object(objset_t *os, uint64_t object, int verbosity,
3358 boolean_t *print_header, uint64_t *dnode_slots_used, uint64_t flags)
3359 {
3360 dmu_buf_t *db = NULL;
3361 dmu_object_info_t doi;
3362 dnode_t *dn;
3363 boolean_t dnode_held = B_FALSE;
3364 void *bonus = NULL;
3365 size_t bsize = 0;
3366 char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
3367 char bonus_size[32];
3368 char aux[50];
3369 int error;
3370
3371 /* make sure nicenum has enough space */
3372 CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ);
3373 CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ);
3374 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
3375 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
3376 CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
3377
3378 if (*print_header) {
3379 (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n",
3380 "Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
3381 "lsize", "%full", "type");
3382 *print_header = 0;
3383 }
3384
3385 if (object == 0) {
3386 dn = DMU_META_DNODE(os);
3387 dmu_object_info_from_dnode(dn, &doi);
3388 } else {
3389 /*
3390 * Encrypted datasets will have sensitive bonus buffers
3391 * encrypted. Therefore we cannot hold the bonus buffer and
3392 * must hold the dnode itself instead.
3393 */
3394 error = dmu_object_info(os, object, &doi);
3395 if (error)
3396 fatal("dmu_object_info() failed, errno %u", error);
3397
3398 if (os->os_encrypted &&
3399 DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) {
3400 error = dnode_hold(os, object, FTAG, &dn);
3401 if (error)
3402 fatal("dnode_hold() failed, errno %u", error);
3403 dnode_held = B_TRUE;
3404 } else {
3405 error = dmu_bonus_hold(os, object, FTAG, &db);
3406 if (error)
3407 fatal("dmu_bonus_hold(%llu) failed, errno %u",
3408 object, error);
3409 bonus = db->db_data;
3410 bsize = db->db_size;
3411 dn = DB_DNODE((dmu_buf_impl_t *)db);
3412 }
3413 }
3414
3415 /*
3416 * Default to showing all object types if no flags were specified.
3417 */
3418 if (flags != 0 && flags != ZOR_FLAG_ALL_TYPES &&
3419 !match_object_type(doi.doi_type, flags))
3420 goto out;
3421
3422 if (dnode_slots_used)
3423 *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
3424
3425 zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
3426 zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
3427 zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
3428 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
3429 zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
3430 zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize));
3431 (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
3432 doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
3433 doi.doi_max_offset);
3434
3435 aux[0] = '\0';
3436
3437 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
3438 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
3439 " (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum));
3440 }
3441
3442 if (doi.doi_compress == ZIO_COMPRESS_INHERIT &&
3443 ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) {
3444 const char *compname = NULL;
3445 if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION,
3446 ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel),
3447 &compname) == 0) {
3448 (void) snprintf(aux + strlen(aux),
3449 sizeof (aux) - strlen(aux), " (Z=inherit=%s)",
3450 compname);
3451 } else {
3452 (void) snprintf(aux + strlen(aux),
3453 sizeof (aux) - strlen(aux),
3454 " (Z=inherit=%s-unknown)",
3455 ZDB_COMPRESS_NAME(os->os_compress));
3456 }
3457 } else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) {
3458 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
3459 " (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress));
3460 } else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
3461 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
3462 " (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress));
3463 }
3464
3465 (void) printf("%10lld %3u %5s %5s %5s %6s %5s %6s %s%s\n",
3466 (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
3467 asize, dnsize, lsize, fill, zdb_ot_name(doi.doi_type), aux);
3468
3469 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
3470 (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n",
3471 "", "", "", "", "", "", bonus_size, "bonus",
3472 zdb_ot_name(doi.doi_bonus_type));
3473 }
3474
3475 if (verbosity >= 4) {
3476 (void) printf("\tdnode flags: %s%s%s%s\n",
3477 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
3478 "USED_BYTES " : "",
3479 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
3480 "USERUSED_ACCOUNTED " : "",
3481 (dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) ?
3482 "USEROBJUSED_ACCOUNTED " : "",
3483 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
3484 "SPILL_BLKPTR" : "");
3485 (void) printf("\tdnode maxblkid: %llu\n",
3486 (longlong_t)dn->dn_phys->dn_maxblkid);
3487
3488 if (!dnode_held) {
3489 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os,
3490 object, bonus, bsize);
3491 } else {
3492 (void) printf("\t\t(bonus encrypted)\n");
3493 }
3494
3495 if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) {
3496 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object,
3497 NULL, 0);
3498 } else {
3499 (void) printf("\t\t(object encrypted)\n");
3500 }
3501
3502 *print_header = B_TRUE;
3503 }
3504
3505 if (verbosity >= 5)
3506 dump_indirect(dn);
3507
3508 if (verbosity >= 5) {
3509 /*
3510 * Report the list of segments that comprise the object.
3511 */
3512 uint64_t start = 0;
3513 uint64_t end;
3514 uint64_t blkfill = 1;
3515 int minlvl = 1;
3516
3517 if (dn->dn_type == DMU_OT_DNODE) {
3518 minlvl = 0;
3519 blkfill = DNODES_PER_BLOCK;
3520 }
3521
3522 for (;;) {
3523 char segsize[32];
3524 /* make sure nicenum has enough space */
3525 CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ);
3526 error = dnode_next_offset(dn,
3527 0, &start, minlvl, blkfill, 0);
3528 if (error)
3529 break;
3530 end = start;
3531 error = dnode_next_offset(dn,
3532 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
3533 zdb_nicenum(end - start, segsize, sizeof (segsize));
3534 (void) printf("\t\tsegment [%016llx, %016llx)"
3535 " size %5s\n", (u_longlong_t)start,
3536 (u_longlong_t)end, segsize);
3537 if (error)
3538 break;
3539 start = end;
3540 }
3541 }
3542
3543 out:
3544 if (db != NULL)
3545 dmu_buf_rele(db, FTAG);
3546 if (dnode_held)
3547 dnode_rele(dn, FTAG);
3548 }
3549
3550 static void
3551 count_dir_mos_objects(dsl_dir_t *dd)
3552 {
3553 mos_obj_refd(dd->dd_object);
3554 mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj);
3555 mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj);
3556 mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj);
3557 mos_obj_refd(dsl_dir_phys(dd)->dd_clones);
3558
3559 /*
3560 * The dd_crypto_obj can be referenced by multiple dsl_dir's.
3561 * Ignore the references after the first one.
3562 */
3563 mos_obj_refd_multiple(dd->dd_crypto_obj);
3564 }
3565
3566 static void
3567 count_ds_mos_objects(dsl_dataset_t *ds)
3568 {
3569 mos_obj_refd(ds->ds_object);
3570 mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj);
3571 mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj);
3572 mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj);
3573 mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
3574 mos_obj_refd(ds->ds_bookmarks_obj);
3575
3576 if (!dsl_dataset_is_snapshot(ds)) {
3577 count_dir_mos_objects(ds->ds_dir);
3578 }
3579 }
3580
3581 static const char *objset_types[DMU_OST_NUMTYPES] = {
3582 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
3583
3584 /*
3585 * Parse a string denoting a range of object IDs of the form
3586 * <start>[:<end>[:flags]], and store the results in zor.
3587 * Return 0 on success. On error, return 1 and update the msg
3588 * pointer to point to a descriptive error message.
3589 */
3590 static int
3591 parse_object_range(char *range, zopt_object_range_t *zor, char **msg)
3592 {
3593 uint64_t flags = 0;
3594 char *p, *s, *dup, *flagstr;
3595 size_t len;
3596 int i;
3597 int rc = 0;
3598
3599 if (strchr(range, ':') == NULL) {
3600 zor->zor_obj_start = strtoull(range, &p, 0);
3601 if (*p != '\0') {
3602 *msg = "Invalid characters in object ID";
3603 rc = 1;
3604 }
3605 zor->zor_obj_end = zor->zor_obj_start;
3606 return (rc);
3607 }
3608
3609 if (strchr(range, ':') == range) {
3610 *msg = "Invalid leading colon";
3611 rc = 1;
3612 return (rc);
3613 }
3614
3615 len = strlen(range);
3616 if (range[len - 1] == ':') {
3617 *msg = "Invalid trailing colon";
3618 rc = 1;
3619 return (rc);
3620 }
3621
3622 dup = strdup(range);
3623 s = strtok(dup, ":");
3624 zor->zor_obj_start = strtoull(s, &p, 0);
3625
3626 if (*p != '\0') {
3627 *msg = "Invalid characters in start object ID";
3628 rc = 1;
3629 goto out;
3630 }
3631
3632 s = strtok(NULL, ":");
3633 zor->zor_obj_end = strtoull(s, &p, 0);
3634
3635 if (*p != '\0') {
3636 *msg = "Invalid characters in end object ID";
3637 rc = 1;
3638 goto out;
3639 }
3640
3641 if (zor->zor_obj_start > zor->zor_obj_end) {
3642 *msg = "Start object ID may not exceed end object ID";
3643 rc = 1;
3644 goto out;
3645 }
3646
3647 s = strtok(NULL, ":");
3648 if (s == NULL) {
3649 zor->zor_flags = ZOR_FLAG_ALL_TYPES;
3650 goto out;
3651 } else if (strtok(NULL, ":") != NULL) {
3652 *msg = "Invalid colon-delimited field after flags";
3653 rc = 1;
3654 goto out;
3655 }
3656
3657 flagstr = s;
3658 for (i = 0; flagstr[i]; i++) {
3659 int bit;
3660 boolean_t negation = (flagstr[i] == '-');
3661
3662 if (negation) {
3663 i++;
3664 if (flagstr[i] == '\0') {
3665 *msg = "Invalid trailing negation operator";
3666 rc = 1;
3667 goto out;
3668 }
3669 }
3670 bit = flagbits[(uchar_t)flagstr[i]];
3671 if (bit == 0) {
3672 *msg = "Invalid flag";
3673 rc = 1;
3674 goto out;
3675 }
3676 if (negation)
3677 flags &= ~bit;
3678 else
3679 flags |= bit;
3680 }
3681 zor->zor_flags = flags;
3682
3683 out:
3684 free(dup);
3685 return (rc);
3686 }
3687
3688 static void
3689 dump_objset(objset_t *os)
3690 {
3691 dmu_objset_stats_t dds = { 0 };
3692 uint64_t object, object_count;
3693 uint64_t refdbytes, usedobjs, scratch;
3694 char numbuf[32];
3695 char blkbuf[BP_SPRINTF_LEN + 20];
3696 char osname[ZFS_MAX_DATASET_NAME_LEN];
3697 const char *type = "UNKNOWN";
3698 int verbosity = dump_opt['d'];
3699 boolean_t print_header;
3700 unsigned i;
3701 int error;
3702 uint64_t total_slots_used = 0;
3703 uint64_t max_slot_used = 0;
3704 uint64_t dnode_slots;
3705 uint64_t obj_start;
3706 uint64_t obj_end;
3707 uint64_t flags;
3708
3709 /* make sure nicenum has enough space */
3710 CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
3711
3712 dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
3713 dmu_objset_fast_stat(os, &dds);
3714 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
3715
3716 print_header = B_TRUE;
3717
3718 if (dds.dds_type < DMU_OST_NUMTYPES)
3719 type = objset_types[dds.dds_type];
3720
3721 if (dds.dds_type == DMU_OST_META) {
3722 dds.dds_creation_txg = TXG_INITIAL;
3723 usedobjs = BP_GET_FILL(os->os_rootbp);
3724 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
3725 dd_used_bytes;
3726 } else {
3727 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
3728 }
3729
3730 ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
3731
3732 zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
3733
3734 if (verbosity >= 4) {
3735 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
3736 (void) snprintf_blkptr(blkbuf + strlen(blkbuf),
3737 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
3738 } else {
3739 blkbuf[0] = '\0';
3740 }
3741
3742 dmu_objset_name(os, osname);
3743
3744 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
3745 "%s, %llu objects%s%s\n",
3746 osname, type, (u_longlong_t)dmu_objset_id(os),
3747 (u_longlong_t)dds.dds_creation_txg,
3748 numbuf, (u_longlong_t)usedobjs, blkbuf,
3749 (dds.dds_inconsistent) ? " (inconsistent)" : "");
3750
3751 for (i = 0; i < zopt_object_args; i++) {
3752 obj_start = zopt_object_ranges[i].zor_obj_start;
3753 obj_end = zopt_object_ranges[i].zor_obj_end;
3754 flags = zopt_object_ranges[i].zor_flags;
3755
3756 object = obj_start;
3757 if (object == 0 || obj_start == obj_end)
3758 dump_object(os, object, verbosity, &print_header, NULL,
3759 flags);
3760 else
3761 object--;
3762
3763 while ((dmu_object_next(os, &object, B_FALSE, 0) == 0) &&
3764 object <= obj_end) {
3765 dump_object(os, object, verbosity, &print_header, NULL,
3766 flags);
3767 }
3768 }
3769
3770 if (zopt_object_args > 0) {
3771 (void) printf("\n");
3772 return;
3773 }
3774
3775 if (dump_opt['i'] != 0 || verbosity >= 2)
3776 dump_intent_log(dmu_objset_zil(os));
3777
3778 if (dmu_objset_ds(os) != NULL) {
3779 dsl_dataset_t *ds = dmu_objset_ds(os);
3780 dump_blkptr_list(&ds->ds_deadlist, "Deadlist");
3781 if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
3782 !dmu_objset_is_snapshot(os)) {
3783 dump_blkptr_list(&ds->ds_dir->dd_livelist, "Livelist");
3784 if (verify_dd_livelist(os) != 0)
3785 fatal("livelist is incorrect");
3786 }
3787
3788 if (dsl_dataset_remap_deadlist_exists(ds)) {
3789 (void) printf("ds_remap_deadlist:\n");
3790 dump_blkptr_list(&ds->ds_remap_deadlist, "Deadlist");
3791 }
3792 count_ds_mos_objects(ds);
3793 }
3794
3795 if (dmu_objset_ds(os) != NULL)
3796 dump_bookmarks(os, verbosity);
3797
3798 if (verbosity < 2)
3799 return;
3800
3801 if (BP_IS_HOLE(os->os_rootbp))
3802 return;
3803
3804 dump_object(os, 0, verbosity, &print_header, NULL, 0);
3805 object_count = 0;
3806 if (DMU_USERUSED_DNODE(os) != NULL &&
3807 DMU_USERUSED_DNODE(os)->dn_type != 0) {
3808 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
3809 NULL, 0);
3810 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
3811 NULL, 0);
3812 }
3813
3814 if (DMU_PROJECTUSED_DNODE(os) != NULL &&
3815 DMU_PROJECTUSED_DNODE(os)->dn_type != 0)
3816 dump_object(os, DMU_PROJECTUSED_OBJECT, verbosity,
3817 &print_header, NULL, 0);
3818
3819 object = 0;
3820 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
3821 dump_object(os, object, verbosity, &print_header, &dnode_slots,
3822 0);
3823 object_count++;
3824 total_slots_used += dnode_slots;
3825 max_slot_used = object + dnode_slots - 1;
3826 }
3827
3828 (void) printf("\n");
3829
3830 (void) printf(" Dnode slots:\n");
3831 (void) printf("\tTotal used: %10llu\n",
3832 (u_longlong_t)total_slots_used);
3833 (void) printf("\tMax used: %10llu\n",
3834 (u_longlong_t)max_slot_used);
3835 (void) printf("\tPercent empty: %10lf\n",
3836 (double)(max_slot_used - total_slots_used)*100 /
3837 (double)max_slot_used);
3838 (void) printf("\n");
3839
3840 if (error != ESRCH) {
3841 (void) fprintf(stderr, "dmu_object_next() = %d\n", error);
3842 abort();
3843 }
3844
3845 ASSERT3U(object_count, ==, usedobjs);
3846
3847 if (leaked_objects != 0) {
3848 (void) printf("%d potentially leaked objects detected\n",
3849 leaked_objects);
3850 leaked_objects = 0;
3851 }
3852 }
3853
3854 static void
3855 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
3856 {
3857 time_t timestamp = ub->ub_timestamp;
3858
3859 (void) printf("%s", header ? header : "");
3860 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
3861 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
3862 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
3863 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
3864 (void) printf("\ttimestamp = %llu UTC = %s",
3865 (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
3866
3867 (void) printf("\tmmp_magic = %016llx\n",
3868 (u_longlong_t)ub->ub_mmp_magic);
3869 if (MMP_VALID(ub)) {
3870 (void) printf("\tmmp_delay = %0llu\n",
3871 (u_longlong_t)ub->ub_mmp_delay);
3872 if (MMP_SEQ_VALID(ub))
3873 (void) printf("\tmmp_seq = %u\n",
3874 (unsigned int) MMP_SEQ(ub));
3875 if (MMP_FAIL_INT_VALID(ub))
3876 (void) printf("\tmmp_fail = %u\n",
3877 (unsigned int) MMP_FAIL_INT(ub));
3878 if (MMP_INTERVAL_VALID(ub))
3879 (void) printf("\tmmp_write = %u\n",
3880 (unsigned int) MMP_INTERVAL(ub));
3881 /* After MMP_* to make summarize_uberblock_mmp cleaner */
3882 (void) printf("\tmmp_valid = %x\n",
3883 (unsigned int) ub->ub_mmp_config & 0xFF);
3884 }
3885
3886 if (dump_opt['u'] >= 4) {
3887 char blkbuf[BP_SPRINTF_LEN];
3888 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
3889 (void) printf("\trootbp = %s\n", blkbuf);
3890 }
3891 (void) printf("\tcheckpoint_txg = %llu\n",
3892 (u_longlong_t)ub->ub_checkpoint_txg);
3893 (void) printf("%s", footer ? footer : "");
3894 }
3895
3896 static void
3897 dump_config(spa_t *spa)
3898 {
3899 dmu_buf_t *db;
3900 size_t nvsize = 0;
3901 int error = 0;
3902
3903
3904 error = dmu_bonus_hold(spa->spa_meta_objset,
3905 spa->spa_config_object, FTAG, &db);
3906
3907 if (error == 0) {
3908 nvsize = *(uint64_t *)db->db_data;
3909 dmu_buf_rele(db, FTAG);
3910
3911 (void) printf("\nMOS Configuration:\n");
3912 dump_packed_nvlist(spa->spa_meta_objset,
3913 spa->spa_config_object, (void *)&nvsize, 1);
3914 } else {
3915 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
3916 (u_longlong_t)spa->spa_config_object, error);
3917 }
3918 }
3919
3920 static void
3921 dump_cachefile(const char *cachefile)
3922 {
3923 int fd;
3924 struct stat64 statbuf;
3925 char *buf;
3926 nvlist_t *config;
3927
3928 if ((fd = open64(cachefile, O_RDONLY)) < 0) {
3929 (void) printf("cannot open '%s': %s\n", cachefile,
3930 strerror(errno));
3931 exit(1);
3932 }
3933
3934 if (fstat64(fd, &statbuf) != 0) {
3935 (void) printf("failed to stat '%s': %s\n", cachefile,
3936 strerror(errno));
3937 exit(1);
3938 }
3939
3940 if ((buf = malloc(statbuf.st_size)) == NULL) {
3941 (void) fprintf(stderr, "failed to allocate %llu bytes\n",
3942 (u_longlong_t)statbuf.st_size);
3943 exit(1);
3944 }
3945
3946 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
3947 (void) fprintf(stderr, "failed to read %llu bytes\n",
3948 (u_longlong_t)statbuf.st_size);
3949 exit(1);
3950 }
3951
3952 (void) close(fd);
3953
3954 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
3955 (void) fprintf(stderr, "failed to unpack nvlist\n");
3956 exit(1);
3957 }
3958
3959 free(buf);
3960
3961 dump_nvlist(config, 0);
3962
3963 nvlist_free(config);
3964 }
3965
3966 /*
3967 * ZFS label nvlist stats
3968 */
3969 typedef struct zdb_nvl_stats {
3970 int zns_list_count;
3971 int zns_leaf_count;
3972 size_t zns_leaf_largest;
3973 size_t zns_leaf_total;
3974 nvlist_t *zns_string;
3975 nvlist_t *zns_uint64;
3976 nvlist_t *zns_boolean;
3977 } zdb_nvl_stats_t;
3978
3979 static void
3980 collect_nvlist_stats(nvlist_t *nvl, zdb_nvl_stats_t *stats)
3981 {
3982 nvlist_t *list, **array;
3983 nvpair_t *nvp = NULL;
3984 char *name;
3985 uint_t i, items;
3986
3987 stats->zns_list_count++;
3988
3989 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
3990 name = nvpair_name(nvp);
3991
3992 switch (nvpair_type(nvp)) {
3993 case DATA_TYPE_STRING:
3994 fnvlist_add_string(stats->zns_string, name,
3995 fnvpair_value_string(nvp));
3996 break;
3997 case DATA_TYPE_UINT64:
3998 fnvlist_add_uint64(stats->zns_uint64, name,
3999 fnvpair_value_uint64(nvp));
4000 break;
4001 case DATA_TYPE_BOOLEAN:
4002 fnvlist_add_boolean(stats->zns_boolean, name);
4003 break;
4004 case DATA_TYPE_NVLIST:
4005 if (nvpair_value_nvlist(nvp, &list) == 0)
4006 collect_nvlist_stats(list, stats);
4007 break;
4008 case DATA_TYPE_NVLIST_ARRAY:
4009 if (nvpair_value_nvlist_array(nvp, &array, &items) != 0)
4010 break;
4011
4012 for (i = 0; i < items; i++) {
4013 collect_nvlist_stats(array[i], stats);
4014
4015 /* collect stats on leaf vdev */
4016 if (strcmp(name, "children") == 0) {
4017 size_t size;
4018
4019 (void) nvlist_size(array[i], &size,
4020 NV_ENCODE_XDR);
4021 stats->zns_leaf_total += size;
4022 if (size > stats->zns_leaf_largest)
4023 stats->zns_leaf_largest = size;
4024 stats->zns_leaf_count++;
4025 }
4026 }
4027 break;
4028 default:
4029 (void) printf("skip type %d!\n", (int)nvpair_type(nvp));
4030 }
4031 }
4032 }
4033
4034 static void
4035 dump_nvlist_stats(nvlist_t *nvl, size_t cap)
4036 {
4037 zdb_nvl_stats_t stats = { 0 };
4038 size_t size, sum = 0, total;
4039 size_t noise;
4040
4041 /* requires nvlist with non-unique names for stat collection */
4042 VERIFY0(nvlist_alloc(&stats.zns_string, 0, 0));
4043 VERIFY0(nvlist_alloc(&stats.zns_uint64, 0, 0));
4044 VERIFY0(nvlist_alloc(&stats.zns_boolean, 0, 0));
4045 VERIFY0(nvlist_size(stats.zns_boolean, &noise, NV_ENCODE_XDR));
4046
4047 (void) printf("\n\nZFS Label NVList Config Stats:\n");
4048
4049 VERIFY0(nvlist_size(nvl, &total, NV_ENCODE_XDR));
4050 (void) printf(" %d bytes used, %d bytes free (using %4.1f%%)\n\n",
4051 (int)total, (int)(cap - total), 100.0 * total / cap);
4052
4053 collect_nvlist_stats(nvl, &stats);
4054
4055 VERIFY0(nvlist_size(stats.zns_uint64, &size, NV_ENCODE_XDR));
4056 size -= noise;
4057 sum += size;
4058 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "integers:",
4059 (int)fnvlist_num_pairs(stats.zns_uint64),
4060 (int)size, 100.0 * size / total);
4061
4062 VERIFY0(nvlist_size(stats.zns_string, &size, NV_ENCODE_XDR));
4063 size -= noise;
4064 sum += size;
4065 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "strings:",
4066 (int)fnvlist_num_pairs(stats.zns_string),
4067 (int)size, 100.0 * size / total);
4068
4069 VERIFY0(nvlist_size(stats.zns_boolean, &size, NV_ENCODE_XDR));
4070 size -= noise;
4071 sum += size;
4072 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "booleans:",
4073 (int)fnvlist_num_pairs(stats.zns_boolean),
4074 (int)size, 100.0 * size / total);
4075
4076 size = total - sum; /* treat remainder as nvlist overhead */
4077 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n\n", "nvlists:",
4078 stats.zns_list_count, (int)size, 100.0 * size / total);
4079
4080 if (stats.zns_leaf_count > 0) {
4081 size_t average = stats.zns_leaf_total / stats.zns_leaf_count;
4082
4083 (void) printf("%12s %4d %6d bytes average\n", "leaf vdevs:",
4084 stats.zns_leaf_count, (int)average);
4085 (void) printf("%24d bytes largest\n",
4086 (int)stats.zns_leaf_largest);
4087
4088 if (dump_opt['l'] >= 3 && average > 0)
4089 (void) printf(" space for %d additional leaf vdevs\n",
4090 (int)((cap - total) / average));
4091 }
4092 (void) printf("\n");
4093
4094 nvlist_free(stats.zns_string);
4095 nvlist_free(stats.zns_uint64);
4096 nvlist_free(stats.zns_boolean);
4097 }
4098
4099 typedef struct cksum_record {
4100 zio_cksum_t cksum;
4101 boolean_t labels[VDEV_LABELS];
4102 avl_node_t link;
4103 } cksum_record_t;
4104
4105 static int
4106 cksum_record_compare(const void *x1, const void *x2)
4107 {
4108 const cksum_record_t *l = (cksum_record_t *)x1;
4109 const cksum_record_t *r = (cksum_record_t *)x2;
4110 int arraysize = ARRAY_SIZE(l->cksum.zc_word);
4111 int difference;
4112
4113 for (int i = 0; i < arraysize; i++) {
4114 difference = TREE_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]);
4115 if (difference)
4116 break;
4117 }
4118
4119 return (difference);
4120 }
4121
4122 static cksum_record_t *
4123 cksum_record_alloc(zio_cksum_t *cksum, int l)
4124 {
4125 cksum_record_t *rec;
4126
4127 rec = umem_zalloc(sizeof (*rec), UMEM_NOFAIL);
4128 rec->cksum = *cksum;
4129 rec->labels[l] = B_TRUE;
4130
4131 return (rec);
4132 }
4133
4134 static cksum_record_t *
4135 cksum_record_lookup(avl_tree_t *tree, zio_cksum_t *cksum)
4136 {
4137 cksum_record_t lookup = { .cksum = *cksum };
4138 avl_index_t where;
4139
4140 return (avl_find(tree, &lookup, &where));
4141 }
4142
4143 static cksum_record_t *
4144 cksum_record_insert(avl_tree_t *tree, zio_cksum_t *cksum, int l)
4145 {
4146 cksum_record_t *rec;
4147
4148 rec = cksum_record_lookup(tree, cksum);
4149 if (rec) {
4150 rec->labels[l] = B_TRUE;
4151 } else {
4152 rec = cksum_record_alloc(cksum, l);
4153 avl_add(tree, rec);
4154 }
4155
4156 return (rec);
4157 }
4158
4159 static int
4160 first_label(cksum_record_t *rec)
4161 {
4162 for (int i = 0; i < VDEV_LABELS; i++)
4163 if (rec->labels[i])
4164 return (i);
4165
4166 return (-1);
4167 }
4168
4169 static void
4170 print_label_numbers(char *prefix, cksum_record_t *rec)
4171 {
4172 printf("%s", prefix);
4173 for (int i = 0; i < VDEV_LABELS; i++)
4174 if (rec->labels[i] == B_TRUE)
4175 printf("%d ", i);
4176 printf("\n");
4177 }
4178
4179 #define MAX_UBERBLOCK_COUNT (VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT)
4180
4181 typedef struct zdb_label {
4182 vdev_label_t label;
4183 nvlist_t *config_nv;
4184 cksum_record_t *config;
4185 cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT];
4186 boolean_t header_printed;
4187 boolean_t read_failed;
4188 } zdb_label_t;
4189
4190 static void
4191 print_label_header(zdb_label_t *label, int l)
4192 {
4193
4194 if (dump_opt['q'])
4195 return;
4196
4197 if (label->header_printed == B_TRUE)
4198 return;
4199
4200 (void) printf("------------------------------------\n");
4201 (void) printf("LABEL %d\n", l);
4202 (void) printf("------------------------------------\n");
4203
4204 label->header_printed = B_TRUE;
4205 }
4206
4207 static void
4208 print_l2arc_header(void)
4209 {
4210 (void) printf("------------------------------------\n");
4211 (void) printf("L2ARC device header\n");
4212 (void) printf("------------------------------------\n");
4213 }
4214
4215 static void
4216 print_l2arc_log_blocks(void)
4217 {
4218 (void) printf("------------------------------------\n");
4219 (void) printf("L2ARC device log blocks\n");
4220 (void) printf("------------------------------------\n");
4221 }
4222
4223 static void
4224 dump_l2arc_log_entries(uint64_t log_entries,
4225 l2arc_log_ent_phys_t *le, uint64_t i)
4226 {
4227 for (int j = 0; j < log_entries; j++) {
4228 dva_t dva = le[j].le_dva;
4229 (void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, "
4230 "vdev: %llu, offset: %llu\n",
4231 (u_longlong_t)i, j + 1,
4232 (u_longlong_t)DVA_GET_ASIZE(&dva),
4233 (u_longlong_t)DVA_GET_VDEV(&dva),
4234 (u_longlong_t)DVA_GET_OFFSET(&dva));
4235 (void) printf("|\t\t\t\tbirth: %llu\n",
4236 (u_longlong_t)le[j].le_birth);
4237 (void) printf("|\t\t\t\tlsize: %llu\n",
4238 (u_longlong_t)L2BLK_GET_LSIZE((&le[j])->le_prop));
4239 (void) printf("|\t\t\t\tpsize: %llu\n",
4240 (u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop));
4241 (void) printf("|\t\t\t\tcompr: %llu\n",
4242 (u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop));
4243 (void) printf("|\t\t\t\tcomplevel: %llu\n",
4244 (u_longlong_t)(&le[j])->le_complevel);
4245 (void) printf("|\t\t\t\ttype: %llu\n",
4246 (u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop));
4247 (void) printf("|\t\t\t\tprotected: %llu\n",
4248 (u_longlong_t)L2BLK_GET_PROTECTED((&le[j])->le_prop));
4249 (void) printf("|\t\t\t\tprefetch: %llu\n",
4250 (u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop));
4251 (void) printf("|\t\t\t\taddress: %llu\n",
4252 (u_longlong_t)le[j].le_daddr);
4253 (void) printf("|\t\t\t\tARC state: %llu\n",
4254 (u_longlong_t)L2BLK_GET_STATE((&le[j])->le_prop));
4255 (void) printf("|\n");
4256 }
4257 (void) printf("\n");
4258 }
4259
4260 static void
4261 dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps)
4262 {
4263 (void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps.lbp_daddr);
4264 (void) printf("|\t\tpayload_asize: %llu\n",
4265 (u_longlong_t)lbps.lbp_payload_asize);
4266 (void) printf("|\t\tpayload_start: %llu\n",
4267 (u_longlong_t)lbps.lbp_payload_start);
4268 (void) printf("|\t\tlsize: %llu\n",
4269 (u_longlong_t)L2BLK_GET_LSIZE((&lbps)->lbp_prop));
4270 (void) printf("|\t\tasize: %llu\n",
4271 (u_longlong_t)L2BLK_GET_PSIZE((&lbps)->lbp_prop));
4272 (void) printf("|\t\tcompralgo: %llu\n",
4273 (u_longlong_t)L2BLK_GET_COMPRESS((&lbps)->lbp_prop));
4274 (void) printf("|\t\tcksumalgo: %llu\n",
4275 (u_longlong_t)L2BLK_GET_CHECKSUM((&lbps)->lbp_prop));
4276 (void) printf("|\n\n");
4277 }
4278
4279 static void
4280 dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr,
4281 l2arc_dev_hdr_phys_t *rebuild)
4282 {
4283 l2arc_log_blk_phys_t this_lb;
4284 uint64_t asize;
4285 l2arc_log_blkptr_t lbps[2];
4286 abd_t *abd;
4287 zio_cksum_t cksum;
4288 int failed = 0;
4289 l2arc_dev_t dev;
4290
4291 if (!dump_opt['q'])
4292 print_l2arc_log_blocks();
4293 bcopy((&l2dhdr)->dh_start_lbps, lbps, sizeof (lbps));
4294
4295 dev.l2ad_evict = l2dhdr.dh_evict;
4296 dev.l2ad_start = l2dhdr.dh_start;
4297 dev.l2ad_end = l2dhdr.dh_end;
4298
4299 if (l2dhdr.dh_start_lbps[0].lbp_daddr == 0) {
4300 /* no log blocks to read */
4301 if (!dump_opt['q']) {
4302 (void) printf("No log blocks to read\n");
4303 (void) printf("\n");
4304 }
4305 return;
4306 } else {
4307 dev.l2ad_hand = lbps[0].lbp_daddr +
4308 L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
4309 }
4310
4311 dev.l2ad_first = !!(l2dhdr.dh_flags & L2ARC_DEV_HDR_EVICT_FIRST);
4312
4313 for (;;) {
4314 if (!l2arc_log_blkptr_valid(&dev, &lbps[0]))
4315 break;
4316
4317 /* L2BLK_GET_PSIZE returns aligned size for log blocks */
4318 asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
4319 if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) {
4320 if (!dump_opt['q']) {
4321 (void) printf("Error while reading next log "
4322 "block\n\n");
4323 }
4324 break;
4325 }
4326
4327 fletcher_4_native_varsize(&this_lb, asize, &cksum);
4328 if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) {
4329 failed++;
4330 if (!dump_opt['q']) {
4331 (void) printf("Invalid cksum\n");
4332 dump_l2arc_log_blkptr(lbps[0]);
4333 }
4334 break;
4335 }
4336
4337 switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
4338 case ZIO_COMPRESS_OFF:
4339 break;
4340 default:
4341 abd = abd_alloc_for_io(asize, B_TRUE);
4342 abd_copy_from_buf_off(abd, &this_lb, 0, asize);
4343 zio_decompress_data(L2BLK_GET_COMPRESS(
4344 (&lbps[0])->lbp_prop), abd, &this_lb,
4345 asize, sizeof (this_lb), NULL);
4346 abd_free(abd);
4347 break;
4348 }
4349
4350 if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
4351 byteswap_uint64_array(&this_lb, sizeof (this_lb));
4352 if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) {
4353 if (!dump_opt['q'])
4354 (void) printf("Invalid log block magic\n\n");
4355 break;
4356 }
4357
4358 rebuild->dh_lb_count++;
4359 rebuild->dh_lb_asize += asize;
4360 if (dump_opt['l'] > 1 && !dump_opt['q']) {
4361 (void) printf("lb[%4llu]\tmagic: %llu\n",
4362 (u_longlong_t)rebuild->dh_lb_count,
4363 (u_longlong_t)this_lb.lb_magic);
4364 dump_l2arc_log_blkptr(lbps[0]);
4365 }
4366
4367 if (dump_opt['l'] > 2 && !dump_opt['q'])
4368 dump_l2arc_log_entries(l2dhdr.dh_log_entries,
4369 this_lb.lb_entries,
4370 rebuild->dh_lb_count);
4371
4372 if (l2arc_range_check_overlap(lbps[1].lbp_payload_start,
4373 lbps[0].lbp_payload_start, dev.l2ad_evict) &&
4374 !dev.l2ad_first)
4375 break;
4376
4377 lbps[0] = lbps[1];
4378 lbps[1] = this_lb.lb_prev_lbp;
4379 }
4380
4381 if (!dump_opt['q']) {
4382 (void) printf("log_blk_count:\t %llu with valid cksum\n",
4383 (u_longlong_t)rebuild->dh_lb_count);
4384 (void) printf("\t\t %d with invalid cksum\n", failed);
4385 (void) printf("log_blk_asize:\t %llu\n\n",
4386 (u_longlong_t)rebuild->dh_lb_asize);
4387 }
4388 }
4389
4390 static int
4391 dump_l2arc_header(int fd)
4392 {
4393 l2arc_dev_hdr_phys_t l2dhdr, rebuild;
4394 int error = B_FALSE;
4395
4396 bzero(&l2dhdr, sizeof (l2dhdr));
4397 bzero(&rebuild, sizeof (rebuild));
4398
4399 if (pread64(fd, &l2dhdr, sizeof (l2dhdr),
4400 VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) {
4401 error = B_TRUE;
4402 } else {
4403 if (l2dhdr.dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC))
4404 byteswap_uint64_array(&l2dhdr, sizeof (l2dhdr));
4405
4406 if (l2dhdr.dh_magic != L2ARC_DEV_HDR_MAGIC)
4407 error = B_TRUE;
4408 }
4409
4410 if (error) {
4411 (void) printf("L2ARC device header not found\n\n");
4412 /* Do not return an error here for backward compatibility */
4413 return (0);
4414 } else if (!dump_opt['q']) {
4415 print_l2arc_header();
4416
4417 (void) printf(" magic: %llu\n",
4418 (u_longlong_t)l2dhdr.dh_magic);
4419 (void) printf(" version: %llu\n",
4420 (u_longlong_t)l2dhdr.dh_version);
4421 (void) printf(" pool_guid: %llu\n",
4422 (u_longlong_t)l2dhdr.dh_spa_guid);
4423 (void) printf(" flags: %llu\n",
4424 (u_longlong_t)l2dhdr.dh_flags);
4425 (void) printf(" start_lbps[0]: %llu\n",
4426 (u_longlong_t)
4427 l2dhdr.dh_start_lbps[0].lbp_daddr);
4428 (void) printf(" start_lbps[1]: %llu\n",
4429 (u_longlong_t)
4430 l2dhdr.dh_start_lbps[1].lbp_daddr);
4431 (void) printf(" log_blk_ent: %llu\n",
4432 (u_longlong_t)l2dhdr.dh_log_entries);
4433 (void) printf(" start: %llu\n",
4434 (u_longlong_t)l2dhdr.dh_start);
4435 (void) printf(" end: %llu\n",
4436 (u_longlong_t)l2dhdr.dh_end);
4437 (void) printf(" evict: %llu\n",
4438 (u_longlong_t)l2dhdr.dh_evict);
4439 (void) printf(" lb_asize_refcount: %llu\n",
4440 (u_longlong_t)l2dhdr.dh_lb_asize);
4441 (void) printf(" lb_count_refcount: %llu\n",
4442 (u_longlong_t)l2dhdr.dh_lb_count);
4443 (void) printf(" trim_action_time: %llu\n",
4444 (u_longlong_t)l2dhdr.dh_trim_action_time);
4445 (void) printf(" trim_state: %llu\n\n",
4446 (u_longlong_t)l2dhdr.dh_trim_state);
4447 }
4448
4449 dump_l2arc_log_blocks(fd, l2dhdr, &rebuild);
4450 /*
4451 * The total aligned size of log blocks and the number of log blocks
4452 * reported in the header of the device may be less than what zdb
4453 * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild().
4454 * This happens because dump_l2arc_log_blocks() lacks the memory
4455 * pressure valve that l2arc_rebuild() has. Thus, if we are on a system
4456 * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize
4457 * and dh_lb_count will be lower to begin with than what exists on the
4458 * device. This is normal and zdb should not exit with an error. The
4459 * opposite case should never happen though, the values reported in the
4460 * header should never be higher than what dump_l2arc_log_blocks() and
4461 * l2arc_rebuild() report. If this happens there is a leak in the
4462 * accounting of log blocks.
4463 */
4464 if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize ||
4465 l2dhdr.dh_lb_count > rebuild.dh_lb_count)
4466 return (1);
4467
4468 return (0);
4469 }
4470
4471 static void
4472 dump_config_from_label(zdb_label_t *label, size_t buflen, int l)
4473 {
4474 if (dump_opt['q'])
4475 return;
4476
4477 if ((dump_opt['l'] < 3) && (first_label(label->config) != l))
4478 return;
4479
4480 print_label_header(label, l);
4481 dump_nvlist(label->config_nv, 4);
4482 print_label_numbers(" labels = ", label->config);
4483
4484 if (dump_opt['l'] >= 2)
4485 dump_nvlist_stats(label->config_nv, buflen);
4486 }
4487
4488 #define ZDB_MAX_UB_HEADER_SIZE 32
4489
4490 static void
4491 dump_label_uberblocks(zdb_label_t *label, uint64_t ashift, int label_num)
4492 {
4493
4494 vdev_t vd;
4495 char header[ZDB_MAX_UB_HEADER_SIZE];
4496
4497 vd.vdev_ashift = ashift;
4498 vd.vdev_top = &vd;
4499
4500 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) {
4501 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i);
4502 uberblock_t *ub = (void *)((char *)&label->label + uoff);
4503 cksum_record_t *rec = label->uberblocks[i];
4504
4505 if (rec == NULL) {
4506 if (dump_opt['u'] >= 2) {
4507 print_label_header(label, label_num);
4508 (void) printf(" Uberblock[%d] invalid\n", i);
4509 }
4510 continue;
4511 }
4512
4513 if ((dump_opt['u'] < 3) && (first_label(rec) != label_num))
4514 continue;
4515
4516 if ((dump_opt['u'] < 4) &&
4517 (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay &&
4518 (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL))
4519 continue;
4520
4521 print_label_header(label, label_num);
4522 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
4523 " Uberblock[%d]\n", i);
4524 dump_uberblock(ub, header, "");
4525 print_label_numbers(" labels = ", rec);
4526 }
4527 }
4528
4529 static char curpath[PATH_MAX];
4530
4531 /*
4532 * Iterate through the path components, recursively passing
4533 * current one's obj and remaining path until we find the obj
4534 * for the last one.
4535 */
4536 static int
4537 dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj)
4538 {
4539 int err;
4540 boolean_t header = B_TRUE;
4541 uint64_t child_obj;
4542 char *s;
4543 dmu_buf_t *db;
4544 dmu_object_info_t doi;
4545
4546 if ((s = strchr(name, '/')) != NULL)
4547 *s = '\0';
4548 err = zap_lookup(os, obj, name, 8, 1, &child_obj);
4549
4550 (void) strlcat(curpath, name, sizeof (curpath));
4551
4552 if (err != 0) {
4553 (void) fprintf(stderr, "failed to lookup %s: %s\n",
4554 curpath, strerror(err));
4555 return (err);
4556 }
4557
4558 child_obj = ZFS_DIRENT_OBJ(child_obj);
4559 err = sa_buf_hold(os, child_obj, FTAG, &db);
4560 if (err != 0) {
4561 (void) fprintf(stderr,
4562 "failed to get SA dbuf for obj %llu: %s\n",
4563 (u_longlong_t)child_obj, strerror(err));
4564 return (EINVAL);
4565 }
4566 dmu_object_info_from_db(db, &doi);
4567 sa_buf_rele(db, FTAG);
4568
4569 if (doi.doi_bonus_type != DMU_OT_SA &&
4570 doi.doi_bonus_type != DMU_OT_ZNODE) {
4571 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
4572 doi.doi_bonus_type, (u_longlong_t)child_obj);
4573 return (EINVAL);
4574 }
4575
4576 if (dump_opt['v'] > 6) {
4577 (void) printf("obj=%llu %s type=%d bonustype=%d\n",
4578 (u_longlong_t)child_obj, curpath, doi.doi_type,
4579 doi.doi_bonus_type);
4580 }
4581
4582 (void) strlcat(curpath, "/", sizeof (curpath));
4583
4584 switch (doi.doi_type) {
4585 case DMU_OT_DIRECTORY_CONTENTS:
4586 if (s != NULL && *(s + 1) != '\0')
4587 return (dump_path_impl(os, child_obj, s + 1, retobj));
4588 /*FALLTHROUGH*/
4589 case DMU_OT_PLAIN_FILE_CONTENTS:
4590 if (retobj != NULL) {
4591 *retobj = child_obj;
4592 } else {
4593 dump_object(os, child_obj, dump_opt['v'], &header,
4594 NULL, 0);
4595 }
4596 return (0);
4597 default:
4598 (void) fprintf(stderr, "object %llu has non-file/directory "
4599 "type %d\n", (u_longlong_t)obj, doi.doi_type);
4600 break;
4601 }
4602
4603 return (EINVAL);
4604 }
4605
4606 /*
4607 * Dump the blocks for the object specified by path inside the dataset.
4608 */
4609 static int
4610 dump_path(char *ds, char *path, uint64_t *retobj)
4611 {
4612 int err;
4613 objset_t *os;
4614 uint64_t root_obj;
4615
4616 err = open_objset(ds, FTAG, &os);
4617 if (err != 0)
4618 return (err);
4619
4620 err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
4621 if (err != 0) {
4622 (void) fprintf(stderr, "can't lookup root znode: %s\n",
4623 strerror(err));
4624 close_objset(os, FTAG);
4625 return (EINVAL);
4626 }
4627
4628 (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
4629
4630 err = dump_path_impl(os, root_obj, path, retobj);
4631
4632 close_objset(os, FTAG);
4633 return (err);
4634 }
4635
4636 static int
4637 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
4638 {
4639 int err = 0;
4640 uint64_t size, readsize, oursize, offset;
4641 ssize_t writesize;
4642 sa_handle_t *hdl;
4643
4644 (void) printf("Copying object %" PRIu64 " to file %s\n", srcobj,
4645 destfile);
4646
4647 VERIFY3P(os, ==, sa_os);
4648 if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) {
4649 (void) printf("Failed to get handle for SA znode\n");
4650 return (err);
4651 }
4652 if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) {
4653 (void) sa_handle_destroy(hdl);
4654 return (err);
4655 }
4656 (void) sa_handle_destroy(hdl);
4657
4658 (void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj,
4659 size);
4660 if (size == 0) {
4661 return (EINVAL);
4662 }
4663
4664 int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
4665 /*
4666 * We cap the size at 1 mebibyte here to prevent
4667 * allocation failures and nigh-infinite printing if the
4668 * object is extremely large.
4669 */
4670 oursize = MIN(size, 1 << 20);
4671 offset = 0;
4672 char *buf = kmem_alloc(oursize, KM_NOSLEEP);
4673 if (buf == NULL) {
4674 return (ENOMEM);
4675 }
4676
4677 while (offset < size) {
4678 readsize = MIN(size - offset, 1 << 20);
4679 err = dmu_read(os, srcobj, offset, readsize, buf, 0);
4680 if (err != 0) {
4681 (void) printf("got error %u from dmu_read\n", err);
4682 kmem_free(buf, oursize);
4683 return (err);
4684 }
4685 if (dump_opt['v'] > 3) {
4686 (void) printf("Read offset=%" PRIu64 " size=%" PRIu64
4687 " error=%d\n", offset, readsize, err);
4688 }
4689
4690 writesize = write(fd, buf, readsize);
4691 if (writesize < 0) {
4692 err = errno;
4693 break;
4694 } else if (writesize != readsize) {
4695 /* Incomplete write */
4696 (void) fprintf(stderr, "Short write, only wrote %llu of"
4697 " %" PRIu64 " bytes, exiting...\n",
4698 (u_longlong_t)writesize, readsize);
4699 break;
4700 }
4701
4702 offset += readsize;
4703 }
4704
4705 (void) close(fd);
4706
4707 if (buf != NULL)
4708 kmem_free(buf, oursize);
4709
4710 return (err);
4711 }
4712
4713 static int
4714 dump_label(const char *dev)
4715 {
4716 char path[MAXPATHLEN];
4717 zdb_label_t labels[VDEV_LABELS];
4718 uint64_t psize, ashift, l2cache;
4719 struct stat64 statbuf;
4720 boolean_t config_found = B_FALSE;
4721 boolean_t error = B_FALSE;
4722 boolean_t read_l2arc_header = B_FALSE;
4723 avl_tree_t config_tree;
4724 avl_tree_t uberblock_tree;
4725 void *node, *cookie;
4726 int fd;
4727
4728 bzero(labels, sizeof (labels));
4729
4730 /*
4731 * Check if we were given absolute path and use it as is.
4732 * Otherwise if the provided vdev name doesn't point to a file,
4733 * try prepending expected disk paths and partition numbers.
4734 */
4735 (void) strlcpy(path, dev, sizeof (path));
4736 if (dev[0] != '/' && stat64(path, &statbuf) != 0) {
4737 int error;
4738
4739 error = zfs_resolve_shortname(dev, path, MAXPATHLEN);
4740 if (error == 0 && zfs_dev_is_whole_disk(path)) {
4741 if (zfs_append_partition(path, MAXPATHLEN) == -1)
4742 error = ENOENT;
4743 }
4744
4745 if (error || (stat64(path, &statbuf) != 0)) {
4746 (void) printf("failed to find device %s, try "
4747 "specifying absolute path instead\n", dev);
4748 return (1);
4749 }
4750 }
4751
4752 if ((fd = open64(path, O_RDONLY)) < 0) {
4753 (void) printf("cannot open '%s': %s\n", path, strerror(errno));
4754 exit(1);
4755 }
4756
4757 if (fstat64_blk(fd, &statbuf) != 0) {
4758 (void) printf("failed to stat '%s': %s\n", path,
4759 strerror(errno));
4760 (void) close(fd);
4761 exit(1);
4762 }
4763
4764 if (S_ISBLK(statbuf.st_mode) && zfs_dev_flush(fd) != 0)
4765 (void) printf("failed to invalidate cache '%s' : %s\n", path,
4766 strerror(errno));
4767
4768 avl_create(&config_tree, cksum_record_compare,
4769 sizeof (cksum_record_t), offsetof(cksum_record_t, link));
4770 avl_create(&uberblock_tree, cksum_record_compare,
4771 sizeof (cksum_record_t), offsetof(cksum_record_t, link));
4772
4773 psize = statbuf.st_size;
4774 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
4775 ashift = SPA_MINBLOCKSHIFT;
4776
4777 /*
4778 * 1. Read the label from disk
4779 * 2. Unpack the configuration and insert in config tree.
4780 * 3. Traverse all uberblocks and insert in uberblock tree.
4781 */
4782 for (int l = 0; l < VDEV_LABELS; l++) {
4783 zdb_label_t *label = &labels[l];
4784 char *buf = label->label.vl_vdev_phys.vp_nvlist;
4785 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist);
4786 nvlist_t *config;
4787 cksum_record_t *rec;
4788 zio_cksum_t cksum;
4789 vdev_t vd;
4790
4791 if (pread64(fd, &label->label, sizeof (label->label),
4792 vdev_label_offset(psize, l, 0)) != sizeof (label->label)) {
4793 if (!dump_opt['q'])
4794 (void) printf("failed to read label %d\n", l);
4795 label->read_failed = B_TRUE;
4796 error = B_TRUE;
4797 continue;
4798 }
4799
4800 label->read_failed = B_FALSE;
4801
4802 if (nvlist_unpack(buf, buflen, &config, 0) == 0) {
4803 nvlist_t *vdev_tree = NULL;
4804 size_t size;
4805
4806 if ((nvlist_lookup_nvlist(config,
4807 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
4808 (nvlist_lookup_uint64(vdev_tree,
4809 ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
4810 ashift = SPA_MINBLOCKSHIFT;
4811
4812 if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0)
4813 size = buflen;
4814
4815 /* If the device is a cache device clear the header. */
4816 if (!read_l2arc_header) {
4817 if (nvlist_lookup_uint64(config,
4818 ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 &&
4819 l2cache == POOL_STATE_L2CACHE) {
4820 read_l2arc_header = B_TRUE;
4821 }
4822 }
4823
4824 fletcher_4_native_varsize(buf, size, &cksum);
4825 rec = cksum_record_insert(&config_tree, &cksum, l);
4826
4827 label->config = rec;
4828 label->config_nv = config;
4829 config_found = B_TRUE;
4830 } else {
4831 error = B_TRUE;
4832 }
4833
4834 vd.vdev_ashift = ashift;
4835 vd.vdev_top = &vd;
4836
4837 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) {
4838 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i);
4839 uberblock_t *ub = (void *)((char *)label + uoff);
4840
4841 if (uberblock_verify(ub))
4842 continue;
4843
4844 fletcher_4_native_varsize(ub, sizeof (*ub), &cksum);
4845 rec = cksum_record_insert(&uberblock_tree, &cksum, l);
4846
4847 label->uberblocks[i] = rec;
4848 }
4849 }
4850
4851 /*
4852 * Dump the label and uberblocks.
4853 */
4854 for (int l = 0; l < VDEV_LABELS; l++) {
4855 zdb_label_t *label = &labels[l];
4856 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist);
4857
4858 if (label->read_failed == B_TRUE)
4859 continue;
4860
4861 if (label->config_nv) {
4862 dump_config_from_label(label, buflen, l);
4863 } else {
4864 if (!dump_opt['q'])
4865 (void) printf("failed to unpack label %d\n", l);
4866 }
4867
4868 if (dump_opt['u'])
4869 dump_label_uberblocks(label, ashift, l);
4870
4871 nvlist_free(label->config_nv);
4872 }
4873
4874 /*
4875 * Dump the L2ARC header, if existent.
4876 */
4877 if (read_l2arc_header)
4878 error |= dump_l2arc_header(fd);
4879
4880 cookie = NULL;
4881 while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL)
4882 umem_free(node, sizeof (cksum_record_t));
4883
4884 cookie = NULL;
4885 while ((node = avl_destroy_nodes(&uberblock_tree, &cookie)) != NULL)
4886 umem_free(node, sizeof (cksum_record_t));
4887
4888 avl_destroy(&config_tree);
4889 avl_destroy(&uberblock_tree);
4890
4891 (void) close(fd);
4892
4893 return (config_found == B_FALSE ? 2 :
4894 (error == B_TRUE ? 1 : 0));
4895 }
4896
4897 static uint64_t dataset_feature_count[SPA_FEATURES];
4898 static uint64_t global_feature_count[SPA_FEATURES];
4899 static uint64_t remap_deadlist_count = 0;
4900
4901 /*ARGSUSED*/
4902 static int
4903 dump_one_objset(const char *dsname, void *arg)
4904 {
4905 int error;
4906 objset_t *os;
4907 spa_feature_t f;
4908
4909 error = open_objset(dsname, FTAG, &os);
4910 if (error != 0)
4911 return (0);
4912
4913 for (f = 0; f < SPA_FEATURES; f++) {
4914 if (!dsl_dataset_feature_is_active(dmu_objset_ds(os), f))
4915 continue;
4916 ASSERT(spa_feature_table[f].fi_flags &
4917 ZFEATURE_FLAG_PER_DATASET);
4918 dataset_feature_count[f]++;
4919 }
4920
4921 if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) {
4922 remap_deadlist_count++;
4923 }
4924
4925 for (dsl_bookmark_node_t *dbn =
4926 avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL;
4927 dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) {
4928 mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj);
4929 if (dbn->dbn_phys.zbm_redaction_obj != 0)
4930 global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++;
4931 if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)
4932 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++;
4933 }
4934
4935 if (dsl_deadlist_is_open(&dmu_objset_ds(os)->ds_dir->dd_livelist) &&
4936 !dmu_objset_is_snapshot(os)) {
4937 global_feature_count[SPA_FEATURE_LIVELIST]++;
4938 }
4939
4940 dump_objset(os);
4941 close_objset(os, FTAG);
4942 fuid_table_destroy();
4943 return (0);
4944 }
4945
4946 /*
4947 * Block statistics.
4948 */
4949 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
4950 typedef struct zdb_blkstats {
4951 uint64_t zb_asize;
4952 uint64_t zb_lsize;
4953 uint64_t zb_psize;
4954 uint64_t zb_count;
4955 uint64_t zb_gangs;
4956 uint64_t zb_ditto_samevdev;
4957 uint64_t zb_ditto_same_ms;
4958 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
4959 } zdb_blkstats_t;
4960
4961 /*
4962 * Extended object types to report deferred frees and dedup auto-ditto blocks.
4963 */
4964 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
4965 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
4966 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2)
4967 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3)
4968
4969 static const char *zdb_ot_extname[] = {
4970 "deferred free",
4971 "dedup ditto",
4972 "other",
4973 "Total",
4974 };
4975
4976 #define ZB_TOTAL DN_MAX_LEVELS
4977 #define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)
4978
4979 typedef struct zdb_cb {
4980 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
4981 uint64_t zcb_removing_size;
4982 uint64_t zcb_checkpoint_size;
4983 uint64_t zcb_dedup_asize;
4984 uint64_t zcb_dedup_blocks;
4985 uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
4986 uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
4987 uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
4988 uint64_t zcb_psize_len[SPA_MAX_FOR_16M];
4989 uint64_t zcb_lsize_len[SPA_MAX_FOR_16M];
4990 uint64_t zcb_asize_len[SPA_MAX_FOR_16M];
4991 uint64_t zcb_psize_total;
4992 uint64_t zcb_lsize_total;
4993 uint64_t zcb_asize_total;
4994 uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
4995 uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
4996 [BPE_PAYLOAD_SIZE + 1];
4997 uint64_t zcb_start;
4998 hrtime_t zcb_lastprint;
4999 uint64_t zcb_totalasize;
5000 uint64_t zcb_errors[256];
5001 int zcb_readfails;
5002 int zcb_haderrors;
5003 spa_t *zcb_spa;
5004 uint32_t **zcb_vd_obsolete_counts;
5005 } zdb_cb_t;
5006
5007 /* test if two DVA offsets from same vdev are within the same metaslab */
5008 static boolean_t
5009 same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2)
5010 {
5011 vdev_t *vd = vdev_lookup_top(spa, vdev);
5012 uint64_t ms_shift = vd->vdev_ms_shift;
5013
5014 return ((off1 >> ms_shift) == (off2 >> ms_shift));
5015 }
5016
5017 /*
5018 * Used to simplify reporting of the histogram data.
5019 */
5020 typedef struct one_histo {
5021 char *name;
5022 uint64_t *count;
5023 uint64_t *len;
5024 uint64_t cumulative;
5025 } one_histo_t;
5026
5027 /*
5028 * The number of separate histograms processed for psize, lsize and asize.
5029 */
5030 #define NUM_HISTO 3
5031
5032 /*
5033 * This routine will create a fixed column size output of three different
5034 * histograms showing by blocksize of 512 - 2^ SPA_MAX_FOR_16M
5035 * the count, length and cumulative length of the psize, lsize and
5036 * asize blocks.
5037 *
5038 * All three types of blocks are listed on a single line
5039 *
5040 * By default the table is printed in nicenumber format (e.g. 123K) but
5041 * if the '-P' parameter is specified then the full raw number (parseable)
5042 * is printed out.
5043 */
5044 static void
5045 dump_size_histograms(zdb_cb_t *zcb)
5046 {
5047 /*
5048 * A temporary buffer that allows us to convert a number into
5049 * a string using zdb_nicenumber to allow either raw or human
5050 * readable numbers to be output.
5051 */
5052 char numbuf[32];
5053
5054 /*
5055 * Define titles which are used in the headers of the tables
5056 * printed by this routine.
5057 */
5058 const char blocksize_title1[] = "block";
5059 const char blocksize_title2[] = "size";
5060 const char count_title[] = "Count";
5061 const char length_title[] = "Size";
5062 const char cumulative_title[] = "Cum.";
5063
5064 /*
5065 * Setup the histogram arrays (psize, lsize, and asize).
5066 */
5067 one_histo_t parm_histo[NUM_HISTO];
5068
5069 parm_histo[0].name = "psize";
5070 parm_histo[0].count = zcb->zcb_psize_count;
5071 parm_histo[0].len = zcb->zcb_psize_len;
5072 parm_histo[0].cumulative = 0;
5073
5074 parm_histo[1].name = "lsize";
5075 parm_histo[1].count = zcb->zcb_lsize_count;
5076 parm_histo[1].len = zcb->zcb_lsize_len;
5077 parm_histo[1].cumulative = 0;
5078
5079 parm_histo[2].name = "asize";
5080 parm_histo[2].count = zcb->zcb_asize_count;
5081 parm_histo[2].len = zcb->zcb_asize_len;
5082 parm_histo[2].cumulative = 0;
5083
5084
5085 (void) printf("\nBlock Size Histogram\n");
5086 /*
5087 * Print the first line titles
5088 */
5089 if (dump_opt['P'])
5090 (void) printf("\n%s\t", blocksize_title1);
5091 else
5092 (void) printf("\n%7s ", blocksize_title1);
5093
5094 for (int j = 0; j < NUM_HISTO; j++) {
5095 if (dump_opt['P']) {
5096 if (j < NUM_HISTO - 1) {
5097 (void) printf("%s\t\t\t", parm_histo[j].name);
5098 } else {
5099 /* Don't print trailing spaces */
5100 (void) printf(" %s", parm_histo[j].name);
5101 }
5102 } else {
5103 if (j < NUM_HISTO - 1) {
5104 /* Left aligned strings in the output */
5105 (void) printf("%-7s ",
5106 parm_histo[j].name);
5107 } else {
5108 /* Don't print trailing spaces */
5109 (void) printf("%s", parm_histo[j].name);
5110 }
5111 }
5112 }
5113 (void) printf("\n");
5114
5115 /*
5116 * Print the second line titles
5117 */
5118 if (dump_opt['P']) {
5119 (void) printf("%s\t", blocksize_title2);
5120 } else {
5121 (void) printf("%7s ", blocksize_title2);
5122 }
5123
5124 for (int i = 0; i < NUM_HISTO; i++) {
5125 if (dump_opt['P']) {
5126 (void) printf("%s\t%s\t%s\t",
5127 count_title, length_title, cumulative_title);
5128 } else {
5129 (void) printf("%7s%7s%7s",
5130 count_title, length_title, cumulative_title);
5131 }
5132 }
5133 (void) printf("\n");
5134
5135 /*
5136 * Print the rows
5137 */
5138 for (int i = SPA_MINBLOCKSHIFT; i < SPA_MAX_FOR_16M; i++) {
5139
5140 /*
5141 * Print the first column showing the blocksize
5142 */
5143 zdb_nicenum((1ULL << i), numbuf, sizeof (numbuf));
5144
5145 if (dump_opt['P']) {
5146 printf("%s", numbuf);
5147 } else {
5148 printf("%7s:", numbuf);
5149 }
5150
5151 /*
5152 * Print the remaining set of 3 columns per size:
5153 * for psize, lsize and asize
5154 */
5155 for (int j = 0; j < NUM_HISTO; j++) {
5156 parm_histo[j].cumulative += parm_histo[j].len[i];
5157
5158 zdb_nicenum(parm_histo[j].count[i],
5159 numbuf, sizeof (numbuf));
5160 if (dump_opt['P'])
5161 (void) printf("\t%s", numbuf);
5162 else
5163 (void) printf("%7s", numbuf);
5164
5165 zdb_nicenum(parm_histo[j].len[i],
5166 numbuf, sizeof (numbuf));
5167 if (dump_opt['P'])
5168 (void) printf("\t%s", numbuf);
5169 else
5170 (void) printf("%7s", numbuf);
5171
5172 zdb_nicenum(parm_histo[j].cumulative,
5173 numbuf, sizeof (numbuf));
5174 if (dump_opt['P'])
5175 (void) printf("\t%s", numbuf);
5176 else
5177 (void) printf("%7s", numbuf);
5178 }
5179 (void) printf("\n");
5180 }
5181 }
5182
5183 static void
5184 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
5185 dmu_object_type_t type)
5186 {
5187 uint64_t refcnt = 0;
5188 int i;
5189
5190 ASSERT(type < ZDB_OT_TOTAL);
5191
5192 if (zilog && zil_bp_tree_add(zilog, bp) != 0)
5193 return;
5194
5195 spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
5196
5197 for (i = 0; i < 4; i++) {
5198 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
5199 int t = (i & 1) ? type : ZDB_OT_TOTAL;
5200 int equal;
5201 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
5202
5203 zb->zb_asize += BP_GET_ASIZE(bp);
5204 zb->zb_lsize += BP_GET_LSIZE(bp);
5205 zb->zb_psize += BP_GET_PSIZE(bp);
5206 zb->zb_count++;
5207
5208 /*
5209 * The histogram is only big enough to record blocks up to
5210 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
5211 * "other", bucket.
5212 */
5213 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
5214 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
5215 zb->zb_psize_histogram[idx]++;
5216
5217 zb->zb_gangs += BP_COUNT_GANG(bp);
5218
5219 switch (BP_GET_NDVAS(bp)) {
5220 case 2:
5221 if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5222 DVA_GET_VDEV(&bp->blk_dva[1])) {
5223 zb->zb_ditto_samevdev++;
5224
5225 if (same_metaslab(zcb->zcb_spa,
5226 DVA_GET_VDEV(&bp->blk_dva[0]),
5227 DVA_GET_OFFSET(&bp->blk_dva[0]),
5228 DVA_GET_OFFSET(&bp->blk_dva[1])))
5229 zb->zb_ditto_same_ms++;
5230 }
5231 break;
5232 case 3:
5233 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5234 DVA_GET_VDEV(&bp->blk_dva[1])) +
5235 (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5236 DVA_GET_VDEV(&bp->blk_dva[2])) +
5237 (DVA_GET_VDEV(&bp->blk_dva[1]) ==
5238 DVA_GET_VDEV(&bp->blk_dva[2]));
5239 if (equal != 0) {
5240 zb->zb_ditto_samevdev++;
5241
5242 if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5243 DVA_GET_VDEV(&bp->blk_dva[1]) &&
5244 same_metaslab(zcb->zcb_spa,
5245 DVA_GET_VDEV(&bp->blk_dva[0]),
5246 DVA_GET_OFFSET(&bp->blk_dva[0]),
5247 DVA_GET_OFFSET(&bp->blk_dva[1])))
5248 zb->zb_ditto_same_ms++;
5249 else if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5250 DVA_GET_VDEV(&bp->blk_dva[2]) &&
5251 same_metaslab(zcb->zcb_spa,
5252 DVA_GET_VDEV(&bp->blk_dva[0]),
5253 DVA_GET_OFFSET(&bp->blk_dva[0]),
5254 DVA_GET_OFFSET(&bp->blk_dva[2])))
5255 zb->zb_ditto_same_ms++;
5256 else if (DVA_GET_VDEV(&bp->blk_dva[1]) ==
5257 DVA_GET_VDEV(&bp->blk_dva[2]) &&
5258 same_metaslab(zcb->zcb_spa,
5259 DVA_GET_VDEV(&bp->blk_dva[1]),
5260 DVA_GET_OFFSET(&bp->blk_dva[1]),
5261 DVA_GET_OFFSET(&bp->blk_dva[2])))
5262 zb->zb_ditto_same_ms++;
5263 }
5264 break;
5265 }
5266 }
5267
5268 spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG);
5269
5270 if (BP_IS_EMBEDDED(bp)) {
5271 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
5272 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
5273 [BPE_GET_PSIZE(bp)]++;
5274 return;
5275 }
5276 /*
5277 * The binning histogram bins by powers of two up to
5278 * SPA_MAXBLOCKSIZE rather than creating bins for
5279 * every possible blocksize found in the pool.
5280 */
5281 int bin = highbit64(BP_GET_PSIZE(bp)) - 1;
5282
5283 zcb->zcb_psize_count[bin]++;
5284 zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp);
5285 zcb->zcb_psize_total += BP_GET_PSIZE(bp);
5286
5287 bin = highbit64(BP_GET_LSIZE(bp)) - 1;
5288
5289 zcb->zcb_lsize_count[bin]++;
5290 zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp);
5291 zcb->zcb_lsize_total += BP_GET_LSIZE(bp);
5292
5293 bin = highbit64(BP_GET_ASIZE(bp)) - 1;
5294
5295 zcb->zcb_asize_count[bin]++;
5296 zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
5297 zcb->zcb_asize_total += BP_GET_ASIZE(bp);
5298
5299 if (dump_opt['L'])
5300 return;
5301
5302 if (BP_GET_DEDUP(bp)) {
5303 ddt_t *ddt;
5304 ddt_entry_t *dde;
5305
5306 ddt = ddt_select(zcb->zcb_spa, bp);
5307 ddt_enter(ddt);
5308 dde = ddt_lookup(ddt, bp, B_FALSE);
5309
5310 if (dde == NULL) {
5311 refcnt = 0;
5312 } else {
5313 ddt_phys_t *ddp = ddt_phys_select(dde, bp);
5314 ddt_phys_decref(ddp);
5315 refcnt = ddp->ddp_refcnt;
5316 if (ddt_phys_total_refcnt(dde) == 0)
5317 ddt_remove(ddt, dde);
5318 }
5319 ddt_exit(ddt);
5320 }
5321
5322 VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
5323 refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa),
5324 bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
5325 }
5326
5327 static void
5328 zdb_blkptr_done(zio_t *zio)
5329 {
5330 spa_t *spa = zio->io_spa;
5331 blkptr_t *bp = zio->io_bp;
5332 int ioerr = zio->io_error;
5333 zdb_cb_t *zcb = zio->io_private;
5334 zbookmark_phys_t *zb = &zio->io_bookmark;
5335
5336 mutex_enter(&spa->spa_scrub_lock);
5337 spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
5338 cv_broadcast(&spa->spa_scrub_io_cv);
5339
5340 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
5341 char blkbuf[BP_SPRINTF_LEN];
5342
5343 zcb->zcb_haderrors = 1;
5344 zcb->zcb_errors[ioerr]++;
5345
5346 if (dump_opt['b'] >= 2)
5347 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
5348 else
5349 blkbuf[0] = '\0';
5350
5351 (void) printf("zdb_blkptr_cb: "
5352 "Got error %d reading "
5353 "<%llu, %llu, %lld, %llx> %s -- skipping\n",
5354 ioerr,
5355 (u_longlong_t)zb->zb_objset,
5356 (u_longlong_t)zb->zb_object,
5357 (u_longlong_t)zb->zb_level,
5358 (u_longlong_t)zb->zb_blkid,
5359 blkbuf);
5360 }
5361 mutex_exit(&spa->spa_scrub_lock);
5362
5363 abd_free(zio->io_abd);
5364 }
5365
5366 static int
5367 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
5368 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
5369 {
5370 zdb_cb_t *zcb = arg;
5371 dmu_object_type_t type;
5372 boolean_t is_metadata;
5373
5374 if (zb->zb_level == ZB_DNODE_LEVEL)
5375 return (0);
5376
5377 if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
5378 char blkbuf[BP_SPRINTF_LEN];
5379 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
5380 (void) printf("objset %llu object %llu "
5381 "level %lld offset 0x%llx %s\n",
5382 (u_longlong_t)zb->zb_objset,
5383 (u_longlong_t)zb->zb_object,
5384 (longlong_t)zb->zb_level,
5385 (u_longlong_t)blkid2offset(dnp, bp, zb),
5386 blkbuf);
5387 }
5388
5389 if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
5390 return (0);
5391
5392 type = BP_GET_TYPE(bp);
5393
5394 zdb_count_block(zcb, zilog, bp,
5395 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
5396
5397 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
5398
5399 if (!BP_IS_EMBEDDED(bp) &&
5400 (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
5401 size_t size = BP_GET_PSIZE(bp);
5402 abd_t *abd = abd_alloc(size, B_FALSE);
5403 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
5404
5405 /* If it's an intent log block, failure is expected. */
5406 if (zb->zb_level == ZB_ZIL_LEVEL)
5407 flags |= ZIO_FLAG_SPECULATIVE;
5408
5409 mutex_enter(&spa->spa_scrub_lock);
5410 while (spa->spa_load_verify_bytes > max_inflight_bytes)
5411 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
5412 spa->spa_load_verify_bytes += size;
5413 mutex_exit(&spa->spa_scrub_lock);
5414
5415 zio_nowait(zio_read(NULL, spa, bp, abd, size,
5416 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
5417 }
5418
5419 zcb->zcb_readfails = 0;
5420
5421 /* only call gethrtime() every 100 blocks */
5422 static int iters;
5423 if (++iters > 100)
5424 iters = 0;
5425 else
5426 return (0);
5427
5428 if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
5429 uint64_t now = gethrtime();
5430 char buf[10];
5431 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
5432 int kb_per_sec =
5433 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
5434 int sec_remaining =
5435 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
5436
5437 /* make sure nicenum has enough space */
5438 CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ);
5439
5440 zfs_nicebytes(bytes, buf, sizeof (buf));
5441 (void) fprintf(stderr,
5442 "\r%5s completed (%4dMB/s) "
5443 "estimated time remaining: %uhr %02umin %02usec ",
5444 buf, kb_per_sec / 1024,
5445 sec_remaining / 60 / 60,
5446 sec_remaining / 60 % 60,
5447 sec_remaining % 60);
5448
5449 zcb->zcb_lastprint = now;
5450 }
5451
5452 return (0);
5453 }
5454
5455 static void
5456 zdb_leak(void *arg, uint64_t start, uint64_t size)
5457 {
5458 vdev_t *vd = arg;
5459
5460 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
5461 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
5462 }
5463
5464 static metaslab_ops_t zdb_metaslab_ops = {
5465 NULL /* alloc */
5466 };
5467
5468 /* ARGSUSED */
5469 static int
5470 load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme,
5471 uint64_t txg, void *arg)
5472 {
5473 spa_vdev_removal_t *svr = arg;
5474
5475 uint64_t offset = sme->sme_offset;
5476 uint64_t size = sme->sme_run;
5477
5478 /* skip vdevs we don't care about */
5479 if (sme->sme_vdev != svr->svr_vdev_id)
5480 return (0);
5481
5482 vdev_t *vd = vdev_lookup_top(spa, sme->sme_vdev);
5483 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
5484 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
5485
5486 if (txg < metaslab_unflushed_txg(ms))
5487 return (0);
5488
5489 if (sme->sme_type == SM_ALLOC)
5490 range_tree_add(svr->svr_allocd_segs, offset, size);
5491 else
5492 range_tree_remove(svr->svr_allocd_segs, offset, size);
5493
5494 return (0);
5495 }
5496
5497 /* ARGSUSED */
5498 static void
5499 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
5500 uint64_t size, void *arg)
5501 {
5502 /*
5503 * This callback was called through a remap from
5504 * a device being removed. Therefore, the vdev that
5505 * this callback is applied to is a concrete
5506 * vdev.
5507 */
5508 ASSERT(vdev_is_concrete(vd));
5509
5510 VERIFY0(metaslab_claim_impl(vd, offset, size,
5511 spa_min_claim_txg(vd->vdev_spa)));
5512 }
5513
5514 static void
5515 claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
5516 {
5517 vdev_t *vd = arg;
5518
5519 vdev_indirect_ops.vdev_op_remap(vd, offset, size,
5520 claim_segment_impl_cb, NULL);
5521 }
5522
5523 /*
5524 * After accounting for all allocated blocks that are directly referenced,
5525 * we might have missed a reference to a block from a partially complete
5526 * (and thus unused) indirect mapping object. We perform a secondary pass
5527 * through the metaslabs we have already mapped and claim the destination
5528 * blocks.
5529 */
5530 static void
5531 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
5532 {
5533 if (dump_opt['L'])
5534 return;
5535
5536 if (spa->spa_vdev_removal == NULL)
5537 return;
5538
5539 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
5540
5541 spa_vdev_removal_t *svr = spa->spa_vdev_removal;
5542 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
5543 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
5544
5545 ASSERT0(range_tree_space(svr->svr_allocd_segs));
5546
5547 range_tree_t *allocs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
5548 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
5549 metaslab_t *msp = vd->vdev_ms[msi];
5550
5551 ASSERT0(range_tree_space(allocs));
5552 if (msp->ms_sm != NULL)
5553 VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC));
5554 range_tree_vacate(allocs, range_tree_add, svr->svr_allocd_segs);
5555 }
5556 range_tree_destroy(allocs);
5557
5558 iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr);
5559
5560 /*
5561 * Clear everything past what has been synced,
5562 * because we have not allocated mappings for
5563 * it yet.
5564 */
5565 range_tree_clear(svr->svr_allocd_segs,
5566 vdev_indirect_mapping_max_offset(vim),
5567 vd->vdev_asize - vdev_indirect_mapping_max_offset(vim));
5568
5569 zcb->zcb_removing_size += range_tree_space(svr->svr_allocd_segs);
5570 range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
5571
5572 spa_config_exit(spa, SCL_CONFIG, FTAG);
5573 }
5574
5575 /* ARGSUSED */
5576 static int
5577 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
5578 dmu_tx_t *tx)
5579 {
5580 zdb_cb_t *zcb = arg;
5581 spa_t *spa = zcb->zcb_spa;
5582 vdev_t *vd;
5583 const dva_t *dva = &bp->blk_dva[0];
5584
5585 ASSERT(!bp_freed);
5586 ASSERT(!dump_opt['L']);
5587 ASSERT3U(BP_GET_NDVAS(bp), ==, 1);
5588
5589 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
5590 vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva));
5591 ASSERT3P(vd, !=, NULL);
5592 spa_config_exit(spa, SCL_VDEV, FTAG);
5593
5594 ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0);
5595 ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL);
5596
5597 vdev_indirect_mapping_increment_obsolete_count(
5598 vd->vdev_indirect_mapping,
5599 DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva),
5600 zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
5601
5602 return (0);
5603 }
5604
5605 static uint32_t *
5606 zdb_load_obsolete_counts(vdev_t *vd)
5607 {
5608 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
5609 spa_t *spa = vd->vdev_spa;
5610 spa_condensing_indirect_phys_t *scip =
5611 &spa->spa_condensing_indirect_phys;
5612 uint64_t obsolete_sm_object;
5613 uint32_t *counts;
5614
5615 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
5616 EQUIV(obsolete_sm_object != 0, vd->vdev_obsolete_sm != NULL);
5617 counts = vdev_indirect_mapping_load_obsolete_counts(vim);
5618 if (vd->vdev_obsolete_sm != NULL) {
5619 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
5620 vd->vdev_obsolete_sm);
5621 }
5622 if (scip->scip_vdev == vd->vdev_id &&
5623 scip->scip_prev_obsolete_sm_object != 0) {
5624 space_map_t *prev_obsolete_sm = NULL;
5625 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
5626 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
5627 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
5628 prev_obsolete_sm);
5629 space_map_close(prev_obsolete_sm);
5630 }
5631 return (counts);
5632 }
5633
5634 static void
5635 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
5636 {
5637 ddt_bookmark_t ddb;
5638 ddt_entry_t dde;
5639 int error;
5640 int p;
5641
5642 ASSERT(!dump_opt['L']);
5643
5644 bzero(&ddb, sizeof (ddb));
5645 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
5646 blkptr_t blk;
5647 ddt_phys_t *ddp = dde.dde_phys;
5648
5649 if (ddb.ddb_class == DDT_CLASS_UNIQUE)
5650 return;
5651
5652 ASSERT(ddt_phys_total_refcnt(&dde) > 1);
5653
5654 for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
5655 if (ddp->ddp_phys_birth == 0)
5656 continue;
5657 ddt_bp_create(ddb.ddb_checksum,
5658 &dde.dde_key, ddp, &blk);
5659 if (p == DDT_PHYS_DITTO) {
5660 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
5661 } else {
5662 zcb->zcb_dedup_asize +=
5663 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
5664 zcb->zcb_dedup_blocks++;
5665 }
5666 }
5667 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
5668 ddt_enter(ddt);
5669 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
5670 ddt_exit(ddt);
5671 }
5672
5673 ASSERT(error == ENOENT);
5674 }
5675
5676 typedef struct checkpoint_sm_exclude_entry_arg {
5677 vdev_t *cseea_vd;
5678 uint64_t cseea_checkpoint_size;
5679 } checkpoint_sm_exclude_entry_arg_t;
5680
5681 static int
5682 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
5683 {
5684 checkpoint_sm_exclude_entry_arg_t *cseea = arg;
5685 vdev_t *vd = cseea->cseea_vd;
5686 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
5687 uint64_t end = sme->sme_offset + sme->sme_run;
5688
5689 ASSERT(sme->sme_type == SM_FREE);
5690
5691 /*
5692 * Since the vdev_checkpoint_sm exists in the vdev level
5693 * and the ms_sm space maps exist in the metaslab level,
5694 * an entry in the checkpoint space map could theoretically
5695 * cross the boundaries of the metaslab that it belongs.
5696 *
5697 * In reality, because of the way that we populate and
5698 * manipulate the checkpoint's space maps currently,
5699 * there shouldn't be any entries that cross metaslabs.
5700 * Hence the assertion below.
5701 *
5702 * That said, there is no fundamental requirement that
5703 * the checkpoint's space map entries should not cross
5704 * metaslab boundaries. So if needed we could add code
5705 * that handles metaslab-crossing segments in the future.
5706 */
5707 VERIFY3U(sme->sme_offset, >=, ms->ms_start);
5708 VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
5709
5710 /*
5711 * By removing the entry from the allocated segments we
5712 * also verify that the entry is there to begin with.
5713 */
5714 mutex_enter(&ms->ms_lock);
5715 range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
5716 mutex_exit(&ms->ms_lock);
5717
5718 cseea->cseea_checkpoint_size += sme->sme_run;
5719 return (0);
5720 }
5721
5722 static void
5723 zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
5724 {
5725 spa_t *spa = vd->vdev_spa;
5726 space_map_t *checkpoint_sm = NULL;
5727 uint64_t checkpoint_sm_obj;
5728
5729 /*
5730 * If there is no vdev_top_zap, we are in a pool whose
5731 * version predates the pool checkpoint feature.
5732 */
5733 if (vd->vdev_top_zap == 0)
5734 return;
5735
5736 /*
5737 * If there is no reference of the vdev_checkpoint_sm in
5738 * the vdev_top_zap, then one of the following scenarios
5739 * is true:
5740 *
5741 * 1] There is no checkpoint
5742 * 2] There is a checkpoint, but no checkpointed blocks
5743 * have been freed yet
5744 * 3] The current vdev is indirect
5745 *
5746 * In these cases we return immediately.
5747 */
5748 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
5749 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
5750 return;
5751
5752 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
5753 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1,
5754 &checkpoint_sm_obj));
5755
5756 checkpoint_sm_exclude_entry_arg_t cseea;
5757 cseea.cseea_vd = vd;
5758 cseea.cseea_checkpoint_size = 0;
5759
5760 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
5761 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
5762
5763 VERIFY0(space_map_iterate(checkpoint_sm,
5764 space_map_length(checkpoint_sm),
5765 checkpoint_sm_exclude_entry_cb, &cseea));
5766 space_map_close(checkpoint_sm);
5767
5768 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size;
5769 }
5770
5771 static void
5772 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
5773 {
5774 ASSERT(!dump_opt['L']);
5775
5776 vdev_t *rvd = spa->spa_root_vdev;
5777 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
5778 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
5779 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb);
5780 }
5781 }
5782
5783 static int
5784 count_unflushed_space_cb(spa_t *spa, space_map_entry_t *sme,
5785 uint64_t txg, void *arg)
5786 {
5787 int64_t *ualloc_space = arg;
5788
5789 uint64_t offset = sme->sme_offset;
5790 uint64_t vdev_id = sme->sme_vdev;
5791
5792 vdev_t *vd = vdev_lookup_top(spa, vdev_id);
5793 if (!vdev_is_concrete(vd))
5794 return (0);
5795
5796 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
5797 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
5798
5799 if (txg < metaslab_unflushed_txg(ms))
5800 return (0);
5801
5802 if (sme->sme_type == SM_ALLOC)
5803 *ualloc_space += sme->sme_run;
5804 else
5805 *ualloc_space -= sme->sme_run;
5806
5807 return (0);
5808 }
5809
5810 static int64_t
5811 get_unflushed_alloc_space(spa_t *spa)
5812 {
5813 if (dump_opt['L'])
5814 return (0);
5815
5816 int64_t ualloc_space = 0;
5817 iterate_through_spacemap_logs(spa, count_unflushed_space_cb,
5818 &ualloc_space);
5819 return (ualloc_space);
5820 }
5821
5822 static int
5823 load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg)
5824 {
5825 maptype_t *uic_maptype = arg;
5826
5827 uint64_t offset = sme->sme_offset;
5828 uint64_t size = sme->sme_run;
5829 uint64_t vdev_id = sme->sme_vdev;
5830
5831 vdev_t *vd = vdev_lookup_top(spa, vdev_id);
5832
5833 /* skip indirect vdevs */
5834 if (!vdev_is_concrete(vd))
5835 return (0);
5836
5837 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
5838
5839 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
5840 ASSERT(*uic_maptype == SM_ALLOC || *uic_maptype == SM_FREE);
5841
5842 if (txg < metaslab_unflushed_txg(ms))
5843 return (0);
5844
5845 if (*uic_maptype == sme->sme_type)
5846 range_tree_add(ms->ms_allocatable, offset, size);
5847 else
5848 range_tree_remove(ms->ms_allocatable, offset, size);
5849
5850 return (0);
5851 }
5852
5853 static void
5854 load_unflushed_to_ms_allocatables(spa_t *spa, maptype_t maptype)
5855 {
5856 iterate_through_spacemap_logs(spa, load_unflushed_cb, &maptype);
5857 }
5858
5859 static void
5860 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
5861 {
5862 vdev_t *rvd = spa->spa_root_vdev;
5863 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
5864 vdev_t *vd = rvd->vdev_child[i];
5865
5866 ASSERT3U(i, ==, vd->vdev_id);
5867
5868 if (vd->vdev_ops == &vdev_indirect_ops)
5869 continue;
5870
5871 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
5872 metaslab_t *msp = vd->vdev_ms[m];
5873
5874 (void) fprintf(stderr,
5875 "\rloading concrete vdev %llu, "
5876 "metaslab %llu of %llu ...",
5877 (longlong_t)vd->vdev_id,
5878 (longlong_t)msp->ms_id,
5879 (longlong_t)vd->vdev_ms_count);
5880
5881 mutex_enter(&msp->ms_lock);
5882 range_tree_vacate(msp->ms_allocatable, NULL, NULL);
5883
5884 /*
5885 * We don't want to spend the CPU manipulating the
5886 * size-ordered tree, so clear the range_tree ops.
5887 */
5888 msp->ms_allocatable->rt_ops = NULL;
5889
5890 if (msp->ms_sm != NULL) {
5891 VERIFY0(space_map_load(msp->ms_sm,
5892 msp->ms_allocatable, maptype));
5893 }
5894 if (!msp->ms_loaded)
5895 msp->ms_loaded = B_TRUE;
5896 mutex_exit(&msp->ms_lock);
5897 }
5898 }
5899
5900 load_unflushed_to_ms_allocatables(spa, maptype);
5901 }
5902
5903 /*
5904 * vm_idxp is an in-out parameter which (for indirect vdevs) is the
5905 * index in vim_entries that has the first entry in this metaslab.
5906 * On return, it will be set to the first entry after this metaslab.
5907 */
5908 static void
5909 load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
5910 uint64_t *vim_idxp)
5911 {
5912 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
5913
5914 mutex_enter(&msp->ms_lock);
5915 range_tree_vacate(msp->ms_allocatable, NULL, NULL);
5916
5917 /*
5918 * We don't want to spend the CPU manipulating the
5919 * size-ordered tree, so clear the range_tree ops.
5920 */
5921 msp->ms_allocatable->rt_ops = NULL;
5922
5923 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
5924 (*vim_idxp)++) {
5925 vdev_indirect_mapping_entry_phys_t *vimep =
5926 &vim->vim_entries[*vim_idxp];
5927 uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
5928 uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
5929 ASSERT3U(ent_offset, >=, msp->ms_start);
5930 if (ent_offset >= msp->ms_start + msp->ms_size)
5931 break;
5932
5933 /*
5934 * Mappings do not cross metaslab boundaries,
5935 * because we create them by walking the metaslabs.
5936 */
5937 ASSERT3U(ent_offset + ent_len, <=,
5938 msp->ms_start + msp->ms_size);
5939 range_tree_add(msp->ms_allocatable, ent_offset, ent_len);
5940 }
5941
5942 if (!msp->ms_loaded)
5943 msp->ms_loaded = B_TRUE;
5944 mutex_exit(&msp->ms_lock);
5945 }
5946
5947 static void
5948 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
5949 {
5950 ASSERT(!dump_opt['L']);
5951
5952 vdev_t *rvd = spa->spa_root_vdev;
5953 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
5954 vdev_t *vd = rvd->vdev_child[c];
5955
5956 ASSERT3U(c, ==, vd->vdev_id);
5957
5958 if (vd->vdev_ops != &vdev_indirect_ops)
5959 continue;
5960
5961 /*
5962 * Note: we don't check for mapping leaks on
5963 * removing vdevs because their ms_allocatable's
5964 * are used to look for leaks in allocated space.
5965 */
5966 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd);
5967
5968 /*
5969 * Normally, indirect vdevs don't have any
5970 * metaslabs. We want to set them up for
5971 * zio_claim().
5972 */
5973 vdev_metaslab_group_create(vd);
5974 VERIFY0(vdev_metaslab_init(vd, 0));
5975
5976 vdev_indirect_mapping_t *vim __maybe_unused =
5977 vd->vdev_indirect_mapping;
5978 uint64_t vim_idx = 0;
5979 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
5980
5981 (void) fprintf(stderr,
5982 "\rloading indirect vdev %llu, "
5983 "metaslab %llu of %llu ...",
5984 (longlong_t)vd->vdev_id,
5985 (longlong_t)vd->vdev_ms[m]->ms_id,
5986 (longlong_t)vd->vdev_ms_count);
5987
5988 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m],
5989 &vim_idx);
5990 }
5991 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim));
5992 }
5993 }
5994
5995 static void
5996 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
5997 {
5998 zcb->zcb_spa = spa;
5999
6000 if (dump_opt['L'])
6001 return;
6002
6003 dsl_pool_t *dp = spa->spa_dsl_pool;
6004 vdev_t *rvd = spa->spa_root_vdev;
6005
6006 /*
6007 * We are going to be changing the meaning of the metaslab's
6008 * ms_allocatable. Ensure that the allocator doesn't try to
6009 * use the tree.
6010 */
6011 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
6012 spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
6013 spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops;
6014
6015 zcb->zcb_vd_obsolete_counts =
6016 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
6017 UMEM_NOFAIL);
6018
6019 /*
6020 * For leak detection, we overload the ms_allocatable trees
6021 * to contain allocated segments instead of free segments.
6022 * As a result, we can't use the normal metaslab_load/unload
6023 * interfaces.
6024 */
6025 zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
6026 load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
6027
6028 /*
6029 * On load_concrete_ms_allocatable_trees() we loaded all the
6030 * allocated entries from the ms_sm to the ms_allocatable for
6031 * each metaslab. If the pool has a checkpoint or is in the
6032 * middle of discarding a checkpoint, some of these blocks
6033 * may have been freed but their ms_sm may not have been
6034 * updated because they are referenced by the checkpoint. In
6035 * order to avoid false-positives during leak-detection, we
6036 * go through the vdev's checkpoint space map and exclude all
6037 * its entries from their relevant ms_allocatable.
6038 *
6039 * We also aggregate the space held by the checkpoint and add
6040 * it to zcb_checkpoint_size.
6041 *
6042 * Note that at this point we are also verifying that all the
6043 * entries on the checkpoint_sm are marked as allocated in
6044 * the ms_sm of their relevant metaslab.
6045 * [see comment in checkpoint_sm_exclude_entry_cb()]
6046 */
6047 zdb_leak_init_exclude_checkpoint(spa, zcb);
6048 ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
6049
6050 /* for cleaner progress output */
6051 (void) fprintf(stderr, "\n");
6052
6053 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
6054 ASSERT(spa_feature_is_enabled(spa,
6055 SPA_FEATURE_DEVICE_REMOVAL));
6056 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
6057 increment_indirect_mapping_cb, zcb, NULL);
6058 }
6059
6060 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
6061 zdb_ddt_leak_init(spa, zcb);
6062 spa_config_exit(spa, SCL_CONFIG, FTAG);
6063 }
6064
6065 static boolean_t
6066 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
6067 {
6068 boolean_t leaks = B_FALSE;
6069 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
6070 uint64_t total_leaked = 0;
6071 boolean_t are_precise = B_FALSE;
6072
6073 ASSERT(vim != NULL);
6074
6075 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
6076 vdev_indirect_mapping_entry_phys_t *vimep =
6077 &vim->vim_entries[i];
6078 uint64_t obsolete_bytes = 0;
6079 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
6080 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
6081
6082 /*
6083 * This is not very efficient but it's easy to
6084 * verify correctness.
6085 */
6086 for (uint64_t inner_offset = 0;
6087 inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst);
6088 inner_offset += 1 << vd->vdev_ashift) {
6089 if (range_tree_contains(msp->ms_allocatable,
6090 offset + inner_offset, 1 << vd->vdev_ashift)) {
6091 obsolete_bytes += 1 << vd->vdev_ashift;
6092 }
6093 }
6094
6095 int64_t bytes_leaked = obsolete_bytes -
6096 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i];
6097 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=,
6098 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]);
6099
6100 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
6101 if (bytes_leaked != 0 && (are_precise || dump_opt['d'] >= 5)) {
6102 (void) printf("obsolete indirect mapping count "
6103 "mismatch on %llu:%llx:%llx : %llx bytes leaked\n",
6104 (u_longlong_t)vd->vdev_id,
6105 (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
6106 (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
6107 (u_longlong_t)bytes_leaked);
6108 }
6109 total_leaked += ABS(bytes_leaked);
6110 }
6111
6112 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
6113 if (!are_precise && total_leaked > 0) {
6114 int pct_leaked = total_leaked * 100 /
6115 vdev_indirect_mapping_bytes_mapped(vim);
6116 (void) printf("cannot verify obsolete indirect mapping "
6117 "counts of vdev %llu because precise feature was not "
6118 "enabled when it was removed: %d%% (%llx bytes) of mapping"
6119 "unreferenced\n",
6120 (u_longlong_t)vd->vdev_id, pct_leaked,
6121 (u_longlong_t)total_leaked);
6122 } else if (total_leaked > 0) {
6123 (void) printf("obsolete indirect mapping count mismatch "
6124 "for vdev %llu -- %llx total bytes mismatched\n",
6125 (u_longlong_t)vd->vdev_id,
6126 (u_longlong_t)total_leaked);
6127 leaks |= B_TRUE;
6128 }
6129
6130 vdev_indirect_mapping_free_obsolete_counts(vim,
6131 zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
6132 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
6133
6134 return (leaks);
6135 }
6136
6137 static boolean_t
6138 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
6139 {
6140 if (dump_opt['L'])
6141 return (B_FALSE);
6142
6143 boolean_t leaks = B_FALSE;
6144 vdev_t *rvd = spa->spa_root_vdev;
6145 for (unsigned c = 0; c < rvd->vdev_children; c++) {
6146 vdev_t *vd = rvd->vdev_child[c];
6147
6148 if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
6149 leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
6150 }
6151
6152 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
6153 metaslab_t *msp = vd->vdev_ms[m];
6154 ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class ==
6155 spa_embedded_log_class(spa)) ?
6156 vd->vdev_log_mg : vd->vdev_mg);
6157
6158 /*
6159 * ms_allocatable has been overloaded
6160 * to contain allocated segments. Now that
6161 * we finished traversing all blocks, any
6162 * block that remains in the ms_allocatable
6163 * represents an allocated block that we
6164 * did not claim during the traversal.
6165 * Claimed blocks would have been removed
6166 * from the ms_allocatable. For indirect
6167 * vdevs, space remaining in the tree
6168 * represents parts of the mapping that are
6169 * not referenced, which is not a bug.
6170 */
6171 if (vd->vdev_ops == &vdev_indirect_ops) {
6172 range_tree_vacate(msp->ms_allocatable,
6173 NULL, NULL);
6174 } else {
6175 range_tree_vacate(msp->ms_allocatable,
6176 zdb_leak, vd);
6177 }
6178 if (msp->ms_loaded) {
6179 msp->ms_loaded = B_FALSE;
6180 }
6181 }
6182 }
6183
6184 umem_free(zcb->zcb_vd_obsolete_counts,
6185 rvd->vdev_children * sizeof (uint32_t *));
6186 zcb->zcb_vd_obsolete_counts = NULL;
6187
6188 return (leaks);
6189 }
6190
6191 /* ARGSUSED */
6192 static int
6193 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
6194 {
6195 zdb_cb_t *zcb = arg;
6196
6197 if (dump_opt['b'] >= 5) {
6198 char blkbuf[BP_SPRINTF_LEN];
6199 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
6200 (void) printf("[%s] %s\n",
6201 "deferred free", blkbuf);
6202 }
6203 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
6204 return (0);
6205 }
6206
6207 /*
6208 * Iterate over livelists which have been destroyed by the user but
6209 * are still present in the MOS, waiting to be freed
6210 */
6211 static void
6212 iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg)
6213 {
6214 objset_t *mos = spa->spa_meta_objset;
6215 uint64_t zap_obj;
6216 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
6217 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
6218 if (err == ENOENT)
6219 return;
6220 ASSERT0(err);
6221
6222 zap_cursor_t zc;
6223 zap_attribute_t attr;
6224 dsl_deadlist_t ll;
6225 /* NULL out os prior to dsl_deadlist_open in case it's garbage */
6226 ll.dl_os = NULL;
6227 for (zap_cursor_init(&zc, mos, zap_obj);
6228 zap_cursor_retrieve(&zc, &attr) == 0;
6229 (void) zap_cursor_advance(&zc)) {
6230 dsl_deadlist_open(&ll, mos, attr.za_first_integer);
6231 func(&ll, arg);
6232 dsl_deadlist_close(&ll);
6233 }
6234 zap_cursor_fini(&zc);
6235 }
6236
6237 static int
6238 bpobj_count_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
6239 dmu_tx_t *tx)
6240 {
6241 ASSERT(!bp_freed);
6242 return (count_block_cb(arg, bp, tx));
6243 }
6244
6245 static int
6246 livelist_entry_count_blocks_cb(void *args, dsl_deadlist_entry_t *dle)
6247 {
6248 zdb_cb_t *zbc = args;
6249 bplist_t blks;
6250 bplist_create(&blks);
6251 /* determine which blocks have been alloc'd but not freed */
6252 VERIFY0(dsl_process_sub_livelist(&dle->dle_bpobj, &blks, NULL, NULL));
6253 /* count those blocks */
6254 (void) bplist_iterate(&blks, count_block_cb, zbc, NULL);
6255 bplist_destroy(&blks);
6256 return (0);
6257 }
6258
6259 static void
6260 livelist_count_blocks(dsl_deadlist_t *ll, void *arg)
6261 {
6262 dsl_deadlist_iterate(ll, livelist_entry_count_blocks_cb, arg);
6263 }
6264
6265 /*
6266 * Count the blocks in the livelists that have been destroyed by the user
6267 * but haven't yet been freed.
6268 */
6269 static void
6270 deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc)
6271 {
6272 iterate_deleted_livelists(spa, livelist_count_blocks, zbc);
6273 }
6274
6275 static void
6276 dump_livelist_cb(dsl_deadlist_t *ll, void *arg)
6277 {
6278 ASSERT3P(arg, ==, NULL);
6279 global_feature_count[SPA_FEATURE_LIVELIST]++;
6280 dump_blkptr_list(ll, "Deleted Livelist");
6281 dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL);
6282 }
6283
6284 /*
6285 * Print out, register object references to, and increment feature counts for
6286 * livelists that have been destroyed by the user but haven't yet been freed.
6287 */
6288 static void
6289 deleted_livelists_dump_mos(spa_t *spa)
6290 {
6291 uint64_t zap_obj;
6292 objset_t *mos = spa->spa_meta_objset;
6293 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
6294 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
6295 if (err == ENOENT)
6296 return;
6297 mos_obj_refd(zap_obj);
6298 iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
6299 }
6300
6301 static int
6302 dump_block_stats(spa_t *spa)
6303 {
6304 zdb_cb_t zcb;
6305 zdb_blkstats_t *zb, *tzb;
6306 uint64_t norm_alloc, norm_space, total_alloc, total_found;
6307 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
6308 TRAVERSE_NO_DECRYPT | TRAVERSE_HARD;
6309 boolean_t leaks = B_FALSE;
6310 int e, c, err;
6311 bp_embedded_type_t i;
6312
6313 bzero(&zcb, sizeof (zcb));
6314 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
6315 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
6316 (dump_opt['c'] == 1) ? "metadata " : "",
6317 dump_opt['c'] ? "checksums " : "",
6318 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
6319 !dump_opt['L'] ? "nothing leaked " : "");
6320
6321 /*
6322 * When leak detection is enabled we load all space maps as SM_ALLOC
6323 * maps, then traverse the pool claiming each block we discover. If
6324 * the pool is perfectly consistent, the segment trees will be empty
6325 * when we're done. Anything left over is a leak; any block we can't
6326 * claim (because it's not part of any space map) is a double
6327 * allocation, reference to a freed block, or an unclaimed log block.
6328 *
6329 * When leak detection is disabled (-L option) we still traverse the
6330 * pool claiming each block we discover, but we skip opening any space
6331 * maps.
6332 */
6333 bzero(&zcb, sizeof (zdb_cb_t));
6334 zdb_leak_init(spa, &zcb);
6335
6336 /*
6337 * If there's a deferred-free bplist, process that first.
6338 */
6339 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
6340 bpobj_count_block_cb, &zcb, NULL);
6341
6342 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
6343 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
6344 bpobj_count_block_cb, &zcb, NULL);
6345 }
6346
6347 zdb_claim_removing(spa, &zcb);
6348
6349 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
6350 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
6351 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
6352 &zcb, NULL));
6353 }
6354
6355 deleted_livelists_count_blocks(spa, &zcb);
6356
6357 if (dump_opt['c'] > 1)
6358 flags |= TRAVERSE_PREFETCH_DATA;
6359
6360 zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
6361 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
6362 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
6363 zcb.zcb_totalasize +=
6364 metaslab_class_get_alloc(spa_embedded_log_class(spa));
6365 zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
6366 err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
6367
6368 /*
6369 * If we've traversed the data blocks then we need to wait for those
6370 * I/Os to complete. We leverage "The Godfather" zio to wait on
6371 * all async I/Os to complete.
6372 */
6373 if (dump_opt['c']) {
6374 for (c = 0; c < max_ncpus; c++) {
6375 (void) zio_wait(spa->spa_async_zio_root[c]);
6376 spa->spa_async_zio_root[c] = zio_root(spa, NULL, NULL,
6377 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
6378 ZIO_FLAG_GODFATHER);
6379 }
6380 }
6381 ASSERT0(spa->spa_load_verify_bytes);
6382
6383 /*
6384 * Done after zio_wait() since zcb_haderrors is modified in
6385 * zdb_blkptr_done()
6386 */
6387 zcb.zcb_haderrors |= err;
6388
6389 if (zcb.zcb_haderrors) {
6390 (void) printf("\nError counts:\n\n");
6391 (void) printf("\t%5s %s\n", "errno", "count");
6392 for (e = 0; e < 256; e++) {
6393 if (zcb.zcb_errors[e] != 0) {
6394 (void) printf("\t%5d %llu\n",
6395 e, (u_longlong_t)zcb.zcb_errors[e]);
6396 }
6397 }
6398 }
6399
6400 /*
6401 * Report any leaked segments.
6402 */
6403 leaks |= zdb_leak_fini(spa, &zcb);
6404
6405 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
6406
6407 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
6408 norm_space = metaslab_class_get_space(spa_normal_class(spa));
6409
6410 total_alloc = norm_alloc +
6411 metaslab_class_get_alloc(spa_log_class(spa)) +
6412 metaslab_class_get_alloc(spa_embedded_log_class(spa)) +
6413 metaslab_class_get_alloc(spa_special_class(spa)) +
6414 metaslab_class_get_alloc(spa_dedup_class(spa)) +
6415 get_unflushed_alloc_space(spa);
6416 total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
6417 zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
6418
6419 if (total_found == total_alloc && !dump_opt['L']) {
6420 (void) printf("\n\tNo leaks (block sum matches space"
6421 " maps exactly)\n");
6422 } else if (!dump_opt['L']) {
6423 (void) printf("block traversal size %llu != alloc %llu "
6424 "(%s %lld)\n",
6425 (u_longlong_t)total_found,
6426 (u_longlong_t)total_alloc,
6427 (dump_opt['L']) ? "unreachable" : "leaked",
6428 (longlong_t)(total_alloc - total_found));
6429 leaks = B_TRUE;
6430 }
6431
6432 if (tzb->zb_count == 0)
6433 return (2);
6434
6435 (void) printf("\n");
6436 (void) printf("\t%-16s %14llu\n", "bp count:",
6437 (u_longlong_t)tzb->zb_count);
6438 (void) printf("\t%-16s %14llu\n", "ganged count:",
6439 (longlong_t)tzb->zb_gangs);
6440 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:",
6441 (u_longlong_t)tzb->zb_lsize,
6442 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
6443 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
6444 "bp physical:", (u_longlong_t)tzb->zb_psize,
6445 (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
6446 (double)tzb->zb_lsize / tzb->zb_psize);
6447 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
6448 "bp allocated:", (u_longlong_t)tzb->zb_asize,
6449 (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
6450 (double)tzb->zb_lsize / tzb->zb_asize);
6451 (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n",
6452 "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize,
6453 (u_longlong_t)zcb.zcb_dedup_blocks,
6454 (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
6455 (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
6456 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
6457
6458 if (spa_special_class(spa)->mc_allocator[0].mca_rotor != NULL) {
6459 uint64_t alloc = metaslab_class_get_alloc(
6460 spa_special_class(spa));
6461 uint64_t space = metaslab_class_get_space(
6462 spa_special_class(spa));
6463
6464 (void) printf("\t%-16s %14llu used: %5.2f%%\n",
6465 "Special class", (u_longlong_t)alloc,
6466 100.0 * alloc / space);
6467 }
6468
6469 if (spa_dedup_class(spa)->mc_allocator[0].mca_rotor != NULL) {
6470 uint64_t alloc = metaslab_class_get_alloc(
6471 spa_dedup_class(spa));
6472 uint64_t space = metaslab_class_get_space(
6473 spa_dedup_class(spa));
6474
6475 (void) printf("\t%-16s %14llu used: %5.2f%%\n",
6476 "Dedup class", (u_longlong_t)alloc,
6477 100.0 * alloc / space);
6478 }
6479
6480 if (spa_embedded_log_class(spa)->mc_allocator[0].mca_rotor != NULL) {
6481 uint64_t alloc = metaslab_class_get_alloc(
6482 spa_embedded_log_class(spa));
6483 uint64_t space = metaslab_class_get_space(
6484 spa_embedded_log_class(spa));
6485
6486 (void) printf("\t%-16s %14llu used: %5.2f%%\n",
6487 "Embedded log class", (u_longlong_t)alloc,
6488 100.0 * alloc / space);
6489 }
6490
6491 for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
6492 if (zcb.zcb_embedded_blocks[i] == 0)
6493 continue;
6494 (void) printf("\n");
6495 (void) printf("\tadditional, non-pointer bps of type %u: "
6496 "%10llu\n",
6497 i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
6498
6499 if (dump_opt['b'] >= 3) {
6500 (void) printf("\t number of (compressed) bytes: "
6501 "number of bps\n");
6502 dump_histogram(zcb.zcb_embedded_histogram[i],
6503 sizeof (zcb.zcb_embedded_histogram[i]) /
6504 sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
6505 }
6506 }
6507
6508 if (tzb->zb_ditto_samevdev != 0) {
6509 (void) printf("\tDittoed blocks on same vdev: %llu\n",
6510 (longlong_t)tzb->zb_ditto_samevdev);
6511 }
6512 if (tzb->zb_ditto_same_ms != 0) {
6513 (void) printf("\tDittoed blocks in same metaslab: %llu\n",
6514 (longlong_t)tzb->zb_ditto_same_ms);
6515 }
6516
6517 for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
6518 vdev_t *vd = spa->spa_root_vdev->vdev_child[v];
6519 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
6520
6521 if (vim == NULL) {
6522 continue;
6523 }
6524
6525 char mem[32];
6526 zdb_nicenum(vdev_indirect_mapping_num_entries(vim),
6527 mem, vdev_indirect_mapping_size(vim));
6528
6529 (void) printf("\tindirect vdev id %llu has %llu segments "
6530 "(%s in memory)\n",
6531 (longlong_t)vd->vdev_id,
6532 (longlong_t)vdev_indirect_mapping_num_entries(vim), mem);
6533 }
6534
6535 if (dump_opt['b'] >= 2) {
6536 int l, t, level;
6537 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
6538 "\t avg\t comp\t%%Total\tType\n");
6539
6540 for (t = 0; t <= ZDB_OT_TOTAL; t++) {
6541 char csize[32], lsize[32], psize[32], asize[32];
6542 char avg[32], gang[32];
6543 const char *typename;
6544
6545 /* make sure nicenum has enough space */
6546 CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ);
6547 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
6548 CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ);
6549 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
6550 CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ);
6551 CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ);
6552
6553 if (t < DMU_OT_NUMTYPES)
6554 typename = dmu_ot[t].ot_name;
6555 else
6556 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
6557
6558 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
6559 (void) printf("%6s\t%5s\t%5s\t%5s"
6560 "\t%5s\t%5s\t%6s\t%s\n",
6561 "-",
6562 "-",
6563 "-",
6564 "-",
6565 "-",
6566 "-",
6567 "-",
6568 typename);
6569 continue;
6570 }
6571
6572 for (l = ZB_TOTAL - 1; l >= -1; l--) {
6573 level = (l == -1 ? ZB_TOTAL : l);
6574 zb = &zcb.zcb_type[level][t];
6575
6576 if (zb->zb_asize == 0)
6577 continue;
6578
6579 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
6580 continue;
6581
6582 if (level == 0 && zb->zb_asize ==
6583 zcb.zcb_type[ZB_TOTAL][t].zb_asize)
6584 continue;
6585
6586 zdb_nicenum(zb->zb_count, csize,
6587 sizeof (csize));
6588 zdb_nicenum(zb->zb_lsize, lsize,
6589 sizeof (lsize));
6590 zdb_nicenum(zb->zb_psize, psize,
6591 sizeof (psize));
6592 zdb_nicenum(zb->zb_asize, asize,
6593 sizeof (asize));
6594 zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
6595 sizeof (avg));
6596 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
6597
6598 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
6599 "\t%5.2f\t%6.2f\t",
6600 csize, lsize, psize, asize, avg,
6601 (double)zb->zb_lsize / zb->zb_psize,
6602 100.0 * zb->zb_asize / tzb->zb_asize);
6603
6604 if (level == ZB_TOTAL)
6605 (void) printf("%s\n", typename);
6606 else
6607 (void) printf(" L%d %s\n",
6608 level, typename);
6609
6610 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
6611 (void) printf("\t number of ganged "
6612 "blocks: %s\n", gang);
6613 }
6614
6615 if (dump_opt['b'] >= 4) {
6616 (void) printf("psize "
6617 "(in 512-byte sectors): "
6618 "number of blocks\n");
6619 dump_histogram(zb->zb_psize_histogram,
6620 PSIZE_HISTO_SIZE, 0);
6621 }
6622 }
6623 }
6624
6625 /* Output a table summarizing block sizes in the pool */
6626 if (dump_opt['b'] >= 2) {
6627 dump_size_histograms(&zcb);
6628 }
6629 }
6630
6631 (void) printf("\n");
6632
6633 if (leaks)
6634 return (2);
6635
6636 if (zcb.zcb_haderrors)
6637 return (3);
6638
6639 return (0);
6640 }
6641
6642 typedef struct zdb_ddt_entry {
6643 ddt_key_t zdde_key;
6644 uint64_t zdde_ref_blocks;
6645 uint64_t zdde_ref_lsize;
6646 uint64_t zdde_ref_psize;
6647 uint64_t zdde_ref_dsize;
6648 avl_node_t zdde_node;
6649 } zdb_ddt_entry_t;
6650
6651 /* ARGSUSED */
6652 static int
6653 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
6654 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
6655 {
6656 avl_tree_t *t = arg;
6657 avl_index_t where;
6658 zdb_ddt_entry_t *zdde, zdde_search;
6659
6660 if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
6661 BP_IS_EMBEDDED(bp))
6662 return (0);
6663
6664 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
6665 (void) printf("traversing objset %llu, %llu objects, "
6666 "%lu blocks so far\n",
6667 (u_longlong_t)zb->zb_objset,
6668 (u_longlong_t)BP_GET_FILL(bp),
6669 avl_numnodes(t));
6670 }
6671
6672 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
6673 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
6674 return (0);
6675
6676 ddt_key_fill(&zdde_search.zdde_key, bp);
6677
6678 zdde = avl_find(t, &zdde_search, &where);
6679
6680 if (zdde == NULL) {
6681 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
6682 zdde->zdde_key = zdde_search.zdde_key;
6683 avl_insert(t, zdde, where);
6684 }
6685
6686 zdde->zdde_ref_blocks += 1;
6687 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
6688 zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
6689 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
6690
6691 return (0);
6692 }
6693
6694 static void
6695 dump_simulated_ddt(spa_t *spa)
6696 {
6697 avl_tree_t t;
6698 void *cookie = NULL;
6699 zdb_ddt_entry_t *zdde;
6700 ddt_histogram_t ddh_total;
6701 ddt_stat_t dds_total;
6702
6703 bzero(&ddh_total, sizeof (ddh_total));
6704 bzero(&dds_total, sizeof (dds_total));
6705 avl_create(&t, ddt_entry_compare,
6706 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
6707
6708 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
6709
6710 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
6711 TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t);
6712
6713 spa_config_exit(spa, SCL_CONFIG, FTAG);
6714
6715 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
6716 ddt_stat_t dds;
6717 uint64_t refcnt = zdde->zdde_ref_blocks;
6718 ASSERT(refcnt != 0);
6719
6720 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
6721 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
6722 dds.dds_psize = zdde->zdde_ref_psize / refcnt;
6723 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
6724
6725 dds.dds_ref_blocks = zdde->zdde_ref_blocks;
6726 dds.dds_ref_lsize = zdde->zdde_ref_lsize;
6727 dds.dds_ref_psize = zdde->zdde_ref_psize;
6728 dds.dds_ref_dsize = zdde->zdde_ref_dsize;
6729
6730 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
6731 &dds, 0);
6732
6733 umem_free(zdde, sizeof (*zdde));
6734 }
6735
6736 avl_destroy(&t);
6737
6738 ddt_histogram_stat(&dds_total, &ddh_total);
6739
6740 (void) printf("Simulated DDT histogram:\n");
6741
6742 zpool_dump_ddt(&dds_total, &ddh_total);
6743
6744 dump_dedup_ratio(&dds_total);
6745 }
6746
6747 static int
6748 verify_device_removal_feature_counts(spa_t *spa)
6749 {
6750 uint64_t dr_feature_refcount = 0;
6751 uint64_t oc_feature_refcount = 0;
6752 uint64_t indirect_vdev_count = 0;
6753 uint64_t precise_vdev_count = 0;
6754 uint64_t obsolete_counts_object_count = 0;
6755 uint64_t obsolete_sm_count = 0;
6756 uint64_t obsolete_counts_count = 0;
6757 uint64_t scip_count = 0;
6758 uint64_t obsolete_bpobj_count = 0;
6759 int ret = 0;
6760
6761 spa_condensing_indirect_phys_t *scip =
6762 &spa->spa_condensing_indirect_phys;
6763 if (scip->scip_next_mapping_object != 0) {
6764 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
6765 ASSERT(scip->scip_prev_obsolete_sm_object != 0);
6766 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
6767
6768 (void) printf("Condensing indirect vdev %llu: new mapping "
6769 "object %llu, prev obsolete sm %llu\n",
6770 (u_longlong_t)scip->scip_vdev,
6771 (u_longlong_t)scip->scip_next_mapping_object,
6772 (u_longlong_t)scip->scip_prev_obsolete_sm_object);
6773 if (scip->scip_prev_obsolete_sm_object != 0) {
6774 space_map_t *prev_obsolete_sm = NULL;
6775 VERIFY0(space_map_open(&prev_obsolete_sm,
6776 spa->spa_meta_objset,
6777 scip->scip_prev_obsolete_sm_object,
6778 0, vd->vdev_asize, 0));
6779 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
6780 (void) printf("\n");
6781 space_map_close(prev_obsolete_sm);
6782 }
6783
6784 scip_count += 2;
6785 }
6786
6787 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
6788 vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
6789 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
6790
6791 if (vic->vic_mapping_object != 0) {
6792 ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
6793 vd->vdev_removing);
6794 indirect_vdev_count++;
6795
6796 if (vd->vdev_indirect_mapping->vim_havecounts) {
6797 obsolete_counts_count++;
6798 }
6799 }
6800
6801 boolean_t are_precise;
6802 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
6803 if (are_precise) {
6804 ASSERT(vic->vic_mapping_object != 0);
6805 precise_vdev_count++;
6806 }
6807
6808 uint64_t obsolete_sm_object;
6809 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
6810 if (obsolete_sm_object != 0) {
6811 ASSERT(vic->vic_mapping_object != 0);
6812 obsolete_sm_count++;
6813 }
6814 }
6815
6816 (void) feature_get_refcount(spa,
6817 &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL],
6818 &dr_feature_refcount);
6819 (void) feature_get_refcount(spa,
6820 &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS],
6821 &oc_feature_refcount);
6822
6823 if (dr_feature_refcount != indirect_vdev_count) {
6824 ret = 1;
6825 (void) printf("Number of indirect vdevs (%llu) " \
6826 "does not match feature count (%llu)\n",
6827 (u_longlong_t)indirect_vdev_count,
6828 (u_longlong_t)dr_feature_refcount);
6829 } else {
6830 (void) printf("Verified device_removal feature refcount " \
6831 "of %llu is correct\n",
6832 (u_longlong_t)dr_feature_refcount);
6833 }
6834
6835 if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
6836 DMU_POOL_OBSOLETE_BPOBJ) == 0) {
6837 obsolete_bpobj_count++;
6838 }
6839
6840
6841 obsolete_counts_object_count = precise_vdev_count;
6842 obsolete_counts_object_count += obsolete_sm_count;
6843 obsolete_counts_object_count += obsolete_counts_count;
6844 obsolete_counts_object_count += scip_count;
6845 obsolete_counts_object_count += obsolete_bpobj_count;
6846 obsolete_counts_object_count += remap_deadlist_count;
6847
6848 if (oc_feature_refcount != obsolete_counts_object_count) {
6849 ret = 1;
6850 (void) printf("Number of obsolete counts objects (%llu) " \
6851 "does not match feature count (%llu)\n",
6852 (u_longlong_t)obsolete_counts_object_count,
6853 (u_longlong_t)oc_feature_refcount);
6854 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu "
6855 "ob:%llu rd:%llu\n",
6856 (u_longlong_t)precise_vdev_count,
6857 (u_longlong_t)obsolete_sm_count,
6858 (u_longlong_t)obsolete_counts_count,
6859 (u_longlong_t)scip_count,
6860 (u_longlong_t)obsolete_bpobj_count,
6861 (u_longlong_t)remap_deadlist_count);
6862 } else {
6863 (void) printf("Verified indirect_refcount feature refcount " \
6864 "of %llu is correct\n",
6865 (u_longlong_t)oc_feature_refcount);
6866 }
6867 return (ret);
6868 }
6869
6870 static void
6871 zdb_set_skip_mmp(char *target)
6872 {
6873 spa_t *spa;
6874
6875 /*
6876 * Disable the activity check to allow examination of
6877 * active pools.
6878 */
6879 mutex_enter(&spa_namespace_lock);
6880 if ((spa = spa_lookup(target)) != NULL) {
6881 spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP;
6882 }
6883 mutex_exit(&spa_namespace_lock);
6884 }
6885
6886 #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE"
6887 /*
6888 * Import the checkpointed state of the pool specified by the target
6889 * parameter as readonly. The function also accepts a pool config
6890 * as an optional parameter, else it attempts to infer the config by
6891 * the name of the target pool.
6892 *
6893 * Note that the checkpointed state's pool name will be the name of
6894 * the original pool with the above suffix appended to it. In addition,
6895 * if the target is not a pool name (e.g. a path to a dataset) then
6896 * the new_path parameter is populated with the updated path to
6897 * reflect the fact that we are looking into the checkpointed state.
6898 *
6899 * The function returns a newly-allocated copy of the name of the
6900 * pool containing the checkpointed state. When this copy is no
6901 * longer needed it should be freed with free(3C). Same thing
6902 * applies to the new_path parameter if allocated.
6903 */
6904 static char *
6905 import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
6906 {
6907 int error = 0;
6908 char *poolname, *bogus_name = NULL;
6909 boolean_t freecfg = B_FALSE;
6910
6911 /* If the target is not a pool, the extract the pool name */
6912 char *path_start = strchr(target, '/');
6913 if (path_start != NULL) {
6914 size_t poolname_len = path_start - target;
6915 poolname = strndup(target, poolname_len);
6916 } else {
6917 poolname = target;
6918 }
6919
6920 if (cfg == NULL) {
6921 zdb_set_skip_mmp(poolname);
6922 error = spa_get_stats(poolname, &cfg, NULL, 0);
6923 if (error != 0) {
6924 fatal("Tried to read config of pool \"%s\" but "
6925 "spa_get_stats() failed with error %d\n",
6926 poolname, error);
6927 }
6928 freecfg = B_TRUE;
6929 }
6930
6931 if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1)
6932 return (NULL);
6933 fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name);
6934
6935 error = spa_import(bogus_name, cfg, NULL,
6936 ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT |
6937 ZFS_IMPORT_SKIP_MMP);
6938 if (freecfg)
6939 nvlist_free(cfg);
6940 if (error != 0) {
6941 fatal("Tried to import pool \"%s\" but spa_import() failed "
6942 "with error %d\n", bogus_name, error);
6943 }
6944
6945 if (new_path != NULL && path_start != NULL) {
6946 if (asprintf(new_path, "%s%s", bogus_name, path_start) == -1) {
6947 if (path_start != NULL)
6948 free(poolname);
6949 return (NULL);
6950 }
6951 }
6952
6953 if (target != poolname)
6954 free(poolname);
6955
6956 return (bogus_name);
6957 }
6958
6959 typedef struct verify_checkpoint_sm_entry_cb_arg {
6960 vdev_t *vcsec_vd;
6961
6962 /* the following fields are only used for printing progress */
6963 uint64_t vcsec_entryid;
6964 uint64_t vcsec_num_entries;
6965 } verify_checkpoint_sm_entry_cb_arg_t;
6966
6967 #define ENTRIES_PER_PROGRESS_UPDATE 10000
6968
6969 static int
6970 verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg)
6971 {
6972 verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg;
6973 vdev_t *vd = vcsec->vcsec_vd;
6974 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
6975 uint64_t end = sme->sme_offset + sme->sme_run;
6976
6977 ASSERT(sme->sme_type == SM_FREE);
6978
6979 if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) {
6980 (void) fprintf(stderr,
6981 "\rverifying vdev %llu, space map entry %llu of %llu ...",
6982 (longlong_t)vd->vdev_id,
6983 (longlong_t)vcsec->vcsec_entryid,
6984 (longlong_t)vcsec->vcsec_num_entries);
6985 }
6986 vcsec->vcsec_entryid++;
6987
6988 /*
6989 * See comment in checkpoint_sm_exclude_entry_cb()
6990 */
6991 VERIFY3U(sme->sme_offset, >=, ms->ms_start);
6992 VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
6993
6994 /*
6995 * The entries in the vdev_checkpoint_sm should be marked as
6996 * allocated in the checkpointed state of the pool, therefore
6997 * their respective ms_allocateable trees should not contain them.
6998 */
6999 mutex_enter(&ms->ms_lock);
7000 range_tree_verify_not_present(ms->ms_allocatable,
7001 sme->sme_offset, sme->sme_run);
7002 mutex_exit(&ms->ms_lock);
7003
7004 return (0);
7005 }
7006
7007 /*
7008 * Verify that all segments in the vdev_checkpoint_sm are allocated
7009 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's
7010 * ms_allocatable).
7011 *
7012 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of
7013 * each vdev in the current state of the pool to the metaslab space maps
7014 * (ms_sm) of the checkpointed state of the pool.
7015 *
7016 * Note that the function changes the state of the ms_allocatable
7017 * trees of the current spa_t. The entries of these ms_allocatable
7018 * trees are cleared out and then repopulated from with the free
7019 * entries of their respective ms_sm space maps.
7020 */
7021 static void
7022 verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
7023 {
7024 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
7025 vdev_t *current_rvd = current->spa_root_vdev;
7026
7027 load_concrete_ms_allocatable_trees(checkpoint, SM_FREE);
7028
7029 for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) {
7030 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c];
7031 vdev_t *current_vd = current_rvd->vdev_child[c];
7032
7033 space_map_t *checkpoint_sm = NULL;
7034 uint64_t checkpoint_sm_obj;
7035
7036 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
7037 /*
7038 * Since we don't allow device removal in a pool
7039 * that has a checkpoint, we expect that all removed
7040 * vdevs were removed from the pool before the
7041 * checkpoint.
7042 */
7043 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
7044 continue;
7045 }
7046
7047 /*
7048 * If the checkpoint space map doesn't exist, then nothing
7049 * here is checkpointed so there's nothing to verify.
7050 */
7051 if (current_vd->vdev_top_zap == 0 ||
7052 zap_contains(spa_meta_objset(current),
7053 current_vd->vdev_top_zap,
7054 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
7055 continue;
7056
7057 VERIFY0(zap_lookup(spa_meta_objset(current),
7058 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
7059 sizeof (uint64_t), 1, &checkpoint_sm_obj));
7060
7061 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
7062 checkpoint_sm_obj, 0, current_vd->vdev_asize,
7063 current_vd->vdev_ashift));
7064
7065 verify_checkpoint_sm_entry_cb_arg_t vcsec;
7066 vcsec.vcsec_vd = ckpoint_vd;
7067 vcsec.vcsec_entryid = 0;
7068 vcsec.vcsec_num_entries =
7069 space_map_length(checkpoint_sm) / sizeof (uint64_t);
7070 VERIFY0(space_map_iterate(checkpoint_sm,
7071 space_map_length(checkpoint_sm),
7072 verify_checkpoint_sm_entry_cb, &vcsec));
7073 if (dump_opt['m'] > 3)
7074 dump_spacemap(current->spa_meta_objset, checkpoint_sm);
7075 space_map_close(checkpoint_sm);
7076 }
7077
7078 /*
7079 * If we've added vdevs since we took the checkpoint, ensure
7080 * that their checkpoint space maps are empty.
7081 */
7082 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) {
7083 for (uint64_t c = ckpoint_rvd->vdev_children;
7084 c < current_rvd->vdev_children; c++) {
7085 vdev_t *current_vd = current_rvd->vdev_child[c];
7086 VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL);
7087 }
7088 }
7089
7090 /* for cleaner progress output */
7091 (void) fprintf(stderr, "\n");
7092 }
7093
7094 /*
7095 * Verifies that all space that's allocated in the checkpoint is
7096 * still allocated in the current version, by checking that everything
7097 * in checkpoint's ms_allocatable (which is actually allocated, not
7098 * allocatable/free) is not present in current's ms_allocatable.
7099 *
7100 * Note that the function changes the state of the ms_allocatable
7101 * trees of both spas when called. The entries of all ms_allocatable
7102 * trees are cleared out and then repopulated from their respective
7103 * ms_sm space maps. In the checkpointed state we load the allocated
7104 * entries, and in the current state we load the free entries.
7105 */
7106 static void
7107 verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
7108 {
7109 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
7110 vdev_t *current_rvd = current->spa_root_vdev;
7111
7112 load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC);
7113 load_concrete_ms_allocatable_trees(current, SM_FREE);
7114
7115 for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) {
7116 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i];
7117 vdev_t *current_vd = current_rvd->vdev_child[i];
7118
7119 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
7120 /*
7121 * See comment in verify_checkpoint_vdev_spacemaps()
7122 */
7123 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
7124 continue;
7125 }
7126
7127 for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) {
7128 metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m];
7129 metaslab_t *current_msp = current_vd->vdev_ms[m];
7130
7131 (void) fprintf(stderr,
7132 "\rverifying vdev %llu of %llu, "
7133 "metaslab %llu of %llu ...",
7134 (longlong_t)current_vd->vdev_id,
7135 (longlong_t)current_rvd->vdev_children,
7136 (longlong_t)current_vd->vdev_ms[m]->ms_id,
7137 (longlong_t)current_vd->vdev_ms_count);
7138
7139 /*
7140 * We walk through the ms_allocatable trees that
7141 * are loaded with the allocated blocks from the
7142 * ms_sm spacemaps of the checkpoint. For each
7143 * one of these ranges we ensure that none of them
7144 * exists in the ms_allocatable trees of the
7145 * current state which are loaded with the ranges
7146 * that are currently free.
7147 *
7148 * This way we ensure that none of the blocks that
7149 * are part of the checkpoint were freed by mistake.
7150 */
7151 range_tree_walk(ckpoint_msp->ms_allocatable,
7152 (range_tree_func_t *)range_tree_verify_not_present,
7153 current_msp->ms_allocatable);
7154 }
7155 }
7156
7157 /* for cleaner progress output */
7158 (void) fprintf(stderr, "\n");
7159 }
7160
7161 static void
7162 verify_checkpoint_blocks(spa_t *spa)
7163 {
7164 ASSERT(!dump_opt['L']);
7165
7166 spa_t *checkpoint_spa;
7167 char *checkpoint_pool;
7168 int error = 0;
7169
7170 /*
7171 * We import the checkpointed state of the pool (under a different
7172 * name) so we can do verification on it against the current state
7173 * of the pool.
7174 */
7175 checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL,
7176 NULL);
7177 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
7178
7179 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG);
7180 if (error != 0) {
7181 fatal("Tried to open pool \"%s\" but spa_open() failed with "
7182 "error %d\n", checkpoint_pool, error);
7183 }
7184
7185 /*
7186 * Ensure that ranges in the checkpoint space maps of each vdev
7187 * are allocated according to the checkpointed state's metaslab
7188 * space maps.
7189 */
7190 verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa);
7191
7192 /*
7193 * Ensure that allocated ranges in the checkpoint's metaslab
7194 * space maps remain allocated in the metaslab space maps of
7195 * the current state.
7196 */
7197 verify_checkpoint_ms_spacemaps(checkpoint_spa, spa);
7198
7199 /*
7200 * Once we are done, we get rid of the checkpointed state.
7201 */
7202 spa_close(checkpoint_spa, FTAG);
7203 free(checkpoint_pool);
7204 }
7205
7206 static void
7207 dump_leftover_checkpoint_blocks(spa_t *spa)
7208 {
7209 vdev_t *rvd = spa->spa_root_vdev;
7210
7211 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
7212 vdev_t *vd = rvd->vdev_child[i];
7213
7214 space_map_t *checkpoint_sm = NULL;
7215 uint64_t checkpoint_sm_obj;
7216
7217 if (vd->vdev_top_zap == 0)
7218 continue;
7219
7220 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
7221 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
7222 continue;
7223
7224 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
7225 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
7226 sizeof (uint64_t), 1, &checkpoint_sm_obj));
7227
7228 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
7229 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
7230 dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
7231 space_map_close(checkpoint_sm);
7232 }
7233 }
7234
7235 static int
7236 verify_checkpoint(spa_t *spa)
7237 {
7238 uberblock_t checkpoint;
7239 int error;
7240
7241 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
7242 return (0);
7243
7244 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
7245 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
7246 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
7247
7248 if (error == ENOENT && !dump_opt['L']) {
7249 /*
7250 * If the feature is active but the uberblock is missing
7251 * then we must be in the middle of discarding the
7252 * checkpoint.
7253 */
7254 (void) printf("\nPartially discarded checkpoint "
7255 "state found:\n");
7256 if (dump_opt['m'] > 3)
7257 dump_leftover_checkpoint_blocks(spa);
7258 return (0);
7259 } else if (error != 0) {
7260 (void) printf("lookup error %d when looking for "
7261 "checkpointed uberblock in MOS\n", error);
7262 return (error);
7263 }
7264 dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n");
7265
7266 if (checkpoint.ub_checkpoint_txg == 0) {
7267 (void) printf("\nub_checkpoint_txg not set in checkpointed "
7268 "uberblock\n");
7269 error = 3;
7270 }
7271
7272 if (error == 0 && !dump_opt['L'])
7273 verify_checkpoint_blocks(spa);
7274
7275 return (error);
7276 }
7277
7278 /* ARGSUSED */
7279 static void
7280 mos_leaks_cb(void *arg, uint64_t start, uint64_t size)
7281 {
7282 for (uint64_t i = start; i < size; i++) {
7283 (void) printf("MOS object %llu referenced but not allocated\n",
7284 (u_longlong_t)i);
7285 }
7286 }
7287
7288 static void
7289 mos_obj_refd(uint64_t obj)
7290 {
7291 if (obj != 0 && mos_refd_objs != NULL)
7292 range_tree_add(mos_refd_objs, obj, 1);
7293 }
7294
7295 /*
7296 * Call on a MOS object that may already have been referenced.
7297 */
7298 static void
7299 mos_obj_refd_multiple(uint64_t obj)
7300 {
7301 if (obj != 0 && mos_refd_objs != NULL &&
7302 !range_tree_contains(mos_refd_objs, obj, 1))
7303 range_tree_add(mos_refd_objs, obj, 1);
7304 }
7305
7306 static void
7307 mos_leak_vdev_top_zap(vdev_t *vd)
7308 {
7309 uint64_t ms_flush_data_obj;
7310 int error = zap_lookup(spa_meta_objset(vd->vdev_spa),
7311 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
7312 sizeof (ms_flush_data_obj), 1, &ms_flush_data_obj);
7313 if (error == ENOENT)
7314 return;
7315 ASSERT0(error);
7316
7317 mos_obj_refd(ms_flush_data_obj);
7318 }
7319
7320 static void
7321 mos_leak_vdev(vdev_t *vd)
7322 {
7323 mos_obj_refd(vd->vdev_dtl_object);
7324 mos_obj_refd(vd->vdev_ms_array);
7325 mos_obj_refd(vd->vdev_indirect_config.vic_births_object);
7326 mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object);
7327 mos_obj_refd(vd->vdev_leaf_zap);
7328 if (vd->vdev_checkpoint_sm != NULL)
7329 mos_obj_refd(vd->vdev_checkpoint_sm->sm_object);
7330 if (vd->vdev_indirect_mapping != NULL) {
7331 mos_obj_refd(vd->vdev_indirect_mapping->
7332 vim_phys->vimp_counts_object);
7333 }
7334 if (vd->vdev_obsolete_sm != NULL)
7335 mos_obj_refd(vd->vdev_obsolete_sm->sm_object);
7336
7337 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
7338 metaslab_t *ms = vd->vdev_ms[m];
7339 mos_obj_refd(space_map_object(ms->ms_sm));
7340 }
7341
7342 if (vd->vdev_top_zap != 0) {
7343 mos_obj_refd(vd->vdev_top_zap);
7344 mos_leak_vdev_top_zap(vd);
7345 }
7346
7347 for (uint64_t c = 0; c < vd->vdev_children; c++) {
7348 mos_leak_vdev(vd->vdev_child[c]);
7349 }
7350 }
7351
7352 static void
7353 mos_leak_log_spacemaps(spa_t *spa)
7354 {
7355 uint64_t spacemap_zap;
7356 int error = zap_lookup(spa_meta_objset(spa),
7357 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_LOG_SPACEMAP_ZAP,
7358 sizeof (spacemap_zap), 1, &spacemap_zap);
7359 if (error == ENOENT)
7360 return;
7361 ASSERT0(error);
7362
7363 mos_obj_refd(spacemap_zap);
7364 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
7365 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls))
7366 mos_obj_refd(sls->sls_sm_obj);
7367 }
7368
7369 static int
7370 dump_mos_leaks(spa_t *spa)
7371 {
7372 int rv = 0;
7373 objset_t *mos = spa->spa_meta_objset;
7374 dsl_pool_t *dp = spa->spa_dsl_pool;
7375
7376 /* Visit and mark all referenced objects in the MOS */
7377
7378 mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT);
7379 mos_obj_refd(spa->spa_pool_props_object);
7380 mos_obj_refd(spa->spa_config_object);
7381 mos_obj_refd(spa->spa_ddt_stat_object);
7382 mos_obj_refd(spa->spa_feat_desc_obj);
7383 mos_obj_refd(spa->spa_feat_enabled_txg_obj);
7384 mos_obj_refd(spa->spa_feat_for_read_obj);
7385 mos_obj_refd(spa->spa_feat_for_write_obj);
7386 mos_obj_refd(spa->spa_history);
7387 mos_obj_refd(spa->spa_errlog_last);
7388 mos_obj_refd(spa->spa_errlog_scrub);
7389 mos_obj_refd(spa->spa_all_vdev_zaps);
7390 mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj);
7391 mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj);
7392 mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj);
7393 bpobj_count_refd(&spa->spa_deferred_bpobj);
7394 mos_obj_refd(dp->dp_empty_bpobj);
7395 bpobj_count_refd(&dp->dp_obsolete_bpobj);
7396 bpobj_count_refd(&dp->dp_free_bpobj);
7397 mos_obj_refd(spa->spa_l2cache.sav_object);
7398 mos_obj_refd(spa->spa_spares.sav_object);
7399
7400 if (spa->spa_syncing_log_sm != NULL)
7401 mos_obj_refd(spa->spa_syncing_log_sm->sm_object);
7402 mos_leak_log_spacemaps(spa);
7403
7404 mos_obj_refd(spa->spa_condensing_indirect_phys.
7405 scip_next_mapping_object);
7406 mos_obj_refd(spa->spa_condensing_indirect_phys.
7407 scip_prev_obsolete_sm_object);
7408 if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) {
7409 vdev_indirect_mapping_t *vim =
7410 vdev_indirect_mapping_open(mos,
7411 spa->spa_condensing_indirect_phys.scip_next_mapping_object);
7412 mos_obj_refd(vim->vim_phys->vimp_counts_object);
7413 vdev_indirect_mapping_close(vim);
7414 }
7415 deleted_livelists_dump_mos(spa);
7416
7417 if (dp->dp_origin_snap != NULL) {
7418 dsl_dataset_t *ds;
7419
7420 dsl_pool_config_enter(dp, FTAG);
7421 VERIFY0(dsl_dataset_hold_obj(dp,
7422 dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj,
7423 FTAG, &ds));
7424 count_ds_mos_objects(ds);
7425 dump_blkptr_list(&ds->ds_deadlist, "Deadlist");
7426 dsl_dataset_rele(ds, FTAG);
7427 dsl_pool_config_exit(dp, FTAG);
7428
7429 count_ds_mos_objects(dp->dp_origin_snap);
7430 dump_blkptr_list(&dp->dp_origin_snap->ds_deadlist, "Deadlist");
7431 }
7432 count_dir_mos_objects(dp->dp_mos_dir);
7433 if (dp->dp_free_dir != NULL)
7434 count_dir_mos_objects(dp->dp_free_dir);
7435 if (dp->dp_leak_dir != NULL)
7436 count_dir_mos_objects(dp->dp_leak_dir);
7437
7438 mos_leak_vdev(spa->spa_root_vdev);
7439
7440 for (uint64_t class = 0; class < DDT_CLASSES; class++) {
7441 for (uint64_t type = 0; type < DDT_TYPES; type++) {
7442 for (uint64_t cksum = 0;
7443 cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) {
7444 ddt_t *ddt = spa->spa_ddt[cksum];
7445 mos_obj_refd(ddt->ddt_object[type][class]);
7446 }
7447 }
7448 }
7449
7450 /*
7451 * Visit all allocated objects and make sure they are referenced.
7452 */
7453 uint64_t object = 0;
7454 while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) {
7455 if (range_tree_contains(mos_refd_objs, object, 1)) {
7456 range_tree_remove(mos_refd_objs, object, 1);
7457 } else {
7458 dmu_object_info_t doi;
7459 const char *name;
7460 dmu_object_info(mos, object, &doi);
7461 if (doi.doi_type & DMU_OT_NEWTYPE) {
7462 dmu_object_byteswap_t bswap =
7463 DMU_OT_BYTESWAP(doi.doi_type);
7464 name = dmu_ot_byteswap[bswap].ob_name;
7465 } else {
7466 name = dmu_ot[doi.doi_type].ot_name;
7467 }
7468
7469 (void) printf("MOS object %llu (%s) leaked\n",
7470 (u_longlong_t)object, name);
7471 rv = 2;
7472 }
7473 }
7474 (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL);
7475 if (!range_tree_is_empty(mos_refd_objs))
7476 rv = 2;
7477 range_tree_vacate(mos_refd_objs, NULL, NULL);
7478 range_tree_destroy(mos_refd_objs);
7479 return (rv);
7480 }
7481
7482 typedef struct log_sm_obsolete_stats_arg {
7483 uint64_t lsos_current_txg;
7484
7485 uint64_t lsos_total_entries;
7486 uint64_t lsos_valid_entries;
7487
7488 uint64_t lsos_sm_entries;
7489 uint64_t lsos_valid_sm_entries;
7490 } log_sm_obsolete_stats_arg_t;
7491
7492 static int
7493 log_spacemap_obsolete_stats_cb(spa_t *spa, space_map_entry_t *sme,
7494 uint64_t txg, void *arg)
7495 {
7496 log_sm_obsolete_stats_arg_t *lsos = arg;
7497
7498 uint64_t offset = sme->sme_offset;
7499 uint64_t vdev_id = sme->sme_vdev;
7500
7501 if (lsos->lsos_current_txg == 0) {
7502 /* this is the first log */
7503 lsos->lsos_current_txg = txg;
7504 } else if (lsos->lsos_current_txg < txg) {
7505 /* we just changed log - print stats and reset */
7506 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n",
7507 (u_longlong_t)lsos->lsos_valid_sm_entries,
7508 (u_longlong_t)lsos->lsos_sm_entries,
7509 (u_longlong_t)lsos->lsos_current_txg);
7510 lsos->lsos_valid_sm_entries = 0;
7511 lsos->lsos_sm_entries = 0;
7512 lsos->lsos_current_txg = txg;
7513 }
7514 ASSERT3U(lsos->lsos_current_txg, ==, txg);
7515
7516 lsos->lsos_sm_entries++;
7517 lsos->lsos_total_entries++;
7518
7519 vdev_t *vd = vdev_lookup_top(spa, vdev_id);
7520 if (!vdev_is_concrete(vd))
7521 return (0);
7522
7523 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
7524 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
7525
7526 if (txg < metaslab_unflushed_txg(ms))
7527 return (0);
7528 lsos->lsos_valid_sm_entries++;
7529 lsos->lsos_valid_entries++;
7530 return (0);
7531 }
7532
7533 static void
7534 dump_log_spacemap_obsolete_stats(spa_t *spa)
7535 {
7536 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
7537 return;
7538
7539 log_sm_obsolete_stats_arg_t lsos;
7540 bzero(&lsos, sizeof (lsos));
7541
7542 (void) printf("Log Space Map Obsolete Entry Statistics:\n");
7543
7544 iterate_through_spacemap_logs(spa,
7545 log_spacemap_obsolete_stats_cb, &lsos);
7546
7547 /* print stats for latest log */
7548 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n",
7549 (u_longlong_t)lsos.lsos_valid_sm_entries,
7550 (u_longlong_t)lsos.lsos_sm_entries,
7551 (u_longlong_t)lsos.lsos_current_txg);
7552
7553 (void) printf("%-8llu valid entries out of %-8llu - total\n\n",
7554 (u_longlong_t)lsos.lsos_valid_entries,
7555 (u_longlong_t)lsos.lsos_total_entries);
7556 }
7557
7558 static void
7559 dump_zpool(spa_t *spa)
7560 {
7561 dsl_pool_t *dp = spa_get_dsl(spa);
7562 int rc = 0;
7563
7564 if (dump_opt['y']) {
7565 livelist_metaslab_validate(spa);
7566 }
7567
7568 if (dump_opt['S']) {
7569 dump_simulated_ddt(spa);
7570 return;
7571 }
7572
7573 if (!dump_opt['e'] && dump_opt['C'] > 1) {
7574 (void) printf("\nCached configuration:\n");
7575 dump_nvlist(spa->spa_config, 8);
7576 }
7577
7578 if (dump_opt['C'])
7579 dump_config(spa);
7580
7581 if (dump_opt['u'])
7582 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
7583
7584 if (dump_opt['D'])
7585 dump_all_ddts(spa);
7586
7587 if (dump_opt['d'] > 2 || dump_opt['m'])
7588 dump_metaslabs(spa);
7589 if (dump_opt['M'])
7590 dump_metaslab_groups(spa);
7591 if (dump_opt['d'] > 2 || dump_opt['m']) {
7592 dump_log_spacemaps(spa);
7593 dump_log_spacemap_obsolete_stats(spa);
7594 }
7595
7596 if (dump_opt['d'] || dump_opt['i']) {
7597 spa_feature_t f;
7598 mos_refd_objs = range_tree_create(NULL, RANGE_SEG64, NULL, 0,
7599 0);
7600 dump_objset(dp->dp_meta_objset);
7601
7602 if (dump_opt['d'] >= 3) {
7603 dsl_pool_t *dp = spa->spa_dsl_pool;
7604 dump_full_bpobj(&spa->spa_deferred_bpobj,
7605 "Deferred frees", 0);
7606 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
7607 dump_full_bpobj(&dp->dp_free_bpobj,
7608 "Pool snapshot frees", 0);
7609 }
7610 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
7611 ASSERT(spa_feature_is_enabled(spa,
7612 SPA_FEATURE_DEVICE_REMOVAL));
7613 dump_full_bpobj(&dp->dp_obsolete_bpobj,
7614 "Pool obsolete blocks", 0);
7615 }
7616
7617 if (spa_feature_is_active(spa,
7618 SPA_FEATURE_ASYNC_DESTROY)) {
7619 dump_bptree(spa->spa_meta_objset,
7620 dp->dp_bptree_obj,
7621 "Pool dataset frees");
7622 }
7623 dump_dtl(spa->spa_root_vdev, 0);
7624 }
7625
7626 for (spa_feature_t f = 0; f < SPA_FEATURES; f++)
7627 global_feature_count[f] = UINT64_MAX;
7628 global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0;
7629 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0;
7630 global_feature_count[SPA_FEATURE_LIVELIST] = 0;
7631
7632 (void) dmu_objset_find(spa_name(spa), dump_one_objset,
7633 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
7634
7635 if (rc == 0 && !dump_opt['L'])
7636 rc = dump_mos_leaks(spa);
7637
7638 for (f = 0; f < SPA_FEATURES; f++) {
7639 uint64_t refcount;
7640
7641 uint64_t *arr;
7642 if (!(spa_feature_table[f].fi_flags &
7643 ZFEATURE_FLAG_PER_DATASET)) {
7644 if (global_feature_count[f] == UINT64_MAX)
7645 continue;
7646 if (!spa_feature_is_enabled(spa, f)) {
7647 ASSERT0(global_feature_count[f]);
7648 continue;
7649 }
7650 arr = global_feature_count;
7651 } else {
7652 if (!spa_feature_is_enabled(spa, f)) {
7653 ASSERT0(dataset_feature_count[f]);
7654 continue;
7655 }
7656 arr = dataset_feature_count;
7657 }
7658 if (feature_get_refcount(spa, &spa_feature_table[f],
7659 &refcount) == ENOTSUP)
7660 continue;
7661 if (arr[f] != refcount) {
7662 (void) printf("%s feature refcount mismatch: "
7663 "%lld consumers != %lld refcount\n",
7664 spa_feature_table[f].fi_uname,
7665 (longlong_t)arr[f], (longlong_t)refcount);
7666 rc = 2;
7667 } else {
7668 (void) printf("Verified %s feature refcount "
7669 "of %llu is correct\n",
7670 spa_feature_table[f].fi_uname,
7671 (longlong_t)refcount);
7672 }
7673 }
7674
7675 if (rc == 0)
7676 rc = verify_device_removal_feature_counts(spa);
7677 }
7678
7679 if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
7680 rc = dump_block_stats(spa);
7681
7682 if (rc == 0)
7683 rc = verify_spacemap_refcounts(spa);
7684
7685 if (dump_opt['s'])
7686 show_pool_stats(spa);
7687
7688 if (dump_opt['h'])
7689 dump_history(spa);
7690
7691 if (rc == 0)
7692 rc = verify_checkpoint(spa);
7693
7694 if (rc != 0) {
7695 dump_debug_buffer();
7696 exit(rc);
7697 }
7698 }
7699
7700 #define ZDB_FLAG_CHECKSUM 0x0001
7701 #define ZDB_FLAG_DECOMPRESS 0x0002
7702 #define ZDB_FLAG_BSWAP 0x0004
7703 #define ZDB_FLAG_GBH 0x0008
7704 #define ZDB_FLAG_INDIRECT 0x0010
7705 #define ZDB_FLAG_RAW 0x0020
7706 #define ZDB_FLAG_PRINT_BLKPTR 0x0040
7707 #define ZDB_FLAG_VERBOSE 0x0080
7708
7709 static int flagbits[256];
7710 static char flagbitstr[16];
7711
7712 static void
7713 zdb_print_blkptr(const blkptr_t *bp, int flags)
7714 {
7715 char blkbuf[BP_SPRINTF_LEN];
7716
7717 if (flags & ZDB_FLAG_BSWAP)
7718 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
7719
7720 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
7721 (void) printf("%s\n", blkbuf);
7722 }
7723
7724 static void
7725 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
7726 {
7727 int i;
7728
7729 for (i = 0; i < nbps; i++)
7730 zdb_print_blkptr(&bp[i], flags);
7731 }
7732
7733 static void
7734 zdb_dump_gbh(void *buf, int flags)
7735 {
7736 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
7737 }
7738
7739 static void
7740 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
7741 {
7742 if (flags & ZDB_FLAG_BSWAP)
7743 byteswap_uint64_array(buf, size);
7744 VERIFY(write(fileno(stdout), buf, size) == size);
7745 }
7746
7747 static void
7748 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
7749 {
7750 uint64_t *d = (uint64_t *)buf;
7751 unsigned nwords = size / sizeof (uint64_t);
7752 int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
7753 unsigned i, j;
7754 const char *hdr;
7755 char *c;
7756
7757
7758 if (do_bswap)
7759 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8";
7760 else
7761 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f";
7762
7763 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr);
7764
7765 #ifdef _LITTLE_ENDIAN
7766 /* correct the endianness */
7767 do_bswap = !do_bswap;
7768 #endif
7769 for (i = 0; i < nwords; i += 2) {
7770 (void) printf("%06llx: %016llx %016llx ",
7771 (u_longlong_t)(i * sizeof (uint64_t)),
7772 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
7773 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
7774
7775 c = (char *)&d[i];
7776 for (j = 0; j < 2 * sizeof (uint64_t); j++)
7777 (void) printf("%c", isprint(c[j]) ? c[j] : '.');
7778 (void) printf("\n");
7779 }
7780 }
7781
7782 /*
7783 * There are two acceptable formats:
7784 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a
7785 * child[.child]* - For example: 0.1.1
7786 *
7787 * The second form can be used to specify arbitrary vdevs anywhere
7788 * in the hierarchy. For example, in a pool with a mirror of
7789 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
7790 */
7791 static vdev_t *
7792 zdb_vdev_lookup(vdev_t *vdev, const char *path)
7793 {
7794 char *s, *p, *q;
7795 unsigned i;
7796
7797 if (vdev == NULL)
7798 return (NULL);
7799
7800 /* First, assume the x.x.x.x format */
7801 i = strtoul(path, &s, 10);
7802 if (s == path || (s && *s != '.' && *s != '\0'))
7803 goto name;
7804 if (i >= vdev->vdev_children)
7805 return (NULL);
7806
7807 vdev = vdev->vdev_child[i];
7808 if (s && *s == '\0')
7809 return (vdev);
7810 return (zdb_vdev_lookup(vdev, s+1));
7811
7812 name:
7813 for (i = 0; i < vdev->vdev_children; i++) {
7814 vdev_t *vc = vdev->vdev_child[i];
7815
7816 if (vc->vdev_path == NULL) {
7817 vc = zdb_vdev_lookup(vc, path);
7818 if (vc == NULL)
7819 continue;
7820 else
7821 return (vc);
7822 }
7823
7824 p = strrchr(vc->vdev_path, '/');
7825 p = p ? p + 1 : vc->vdev_path;
7826 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
7827
7828 if (strcmp(vc->vdev_path, path) == 0)
7829 return (vc);
7830 if (strcmp(p, path) == 0)
7831 return (vc);
7832 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
7833 return (vc);
7834 }
7835
7836 return (NULL);
7837 }
7838
7839 static int
7840 name_from_objset_id(spa_t *spa, uint64_t objset_id, char *outstr)
7841 {
7842 dsl_dataset_t *ds;
7843
7844 dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
7845 int error = dsl_dataset_hold_obj(spa->spa_dsl_pool, objset_id,
7846 NULL, &ds);
7847 if (error != 0) {
7848 (void) fprintf(stderr, "failed to hold objset %llu: %s\n",
7849 (u_longlong_t)objset_id, strerror(error));
7850 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
7851 return (error);
7852 }
7853 dsl_dataset_name(ds, outstr);
7854 dsl_dataset_rele(ds, NULL);
7855 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
7856 return (0);
7857 }
7858
7859 static boolean_t
7860 zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize)
7861 {
7862 char *s0, *s1;
7863
7864 if (sizes == NULL)
7865 return (B_FALSE);
7866
7867 s0 = strtok(sizes, "/");
7868 if (s0 == NULL)
7869 return (B_FALSE);
7870 s1 = strtok(NULL, "/");
7871 *lsize = strtoull(s0, NULL, 16);
7872 *psize = s1 ? strtoull(s1, NULL, 16) : *lsize;
7873 return (*lsize >= *psize && *psize > 0);
7874 }
7875
7876 #define ZIO_COMPRESS_MASK(alg) (1ULL << (ZIO_COMPRESS_##alg))
7877
7878 static boolean_t
7879 zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize,
7880 uint64_t psize, int flags)
7881 {
7882 boolean_t exceeded = B_FALSE;
7883 /*
7884 * We don't know how the data was compressed, so just try
7885 * every decompress function at every inflated blocksize.
7886 */
7887 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
7888 int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 };
7889 int *cfuncp = cfuncs;
7890 uint64_t maxlsize = SPA_MAXBLOCKSIZE;
7891 uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) |
7892 ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) |
7893 (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0);
7894 *cfuncp++ = ZIO_COMPRESS_LZ4;
7895 *cfuncp++ = ZIO_COMPRESS_LZJB;
7896 mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB);
7897 for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++)
7898 if (((1ULL << c) & mask) == 0)
7899 *cfuncp++ = c;
7900
7901 /*
7902 * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this
7903 * could take a while and we should let the user know
7904 * we are not stuck. On the other hand, printing progress
7905 * info gets old after a while. User can specify 'v' flag
7906 * to see the progression.
7907 */
7908 if (lsize == psize)
7909 lsize += SPA_MINBLOCKSIZE;
7910 else
7911 maxlsize = lsize;
7912 for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) {
7913 for (cfuncp = cfuncs; *cfuncp; cfuncp++) {
7914 if (flags & ZDB_FLAG_VERBOSE) {
7915 (void) fprintf(stderr,
7916 "Trying %05llx -> %05llx (%s)\n",
7917 (u_longlong_t)psize,
7918 (u_longlong_t)lsize,
7919 zio_compress_table[*cfuncp].\
7920 ci_name);
7921 }
7922
7923 /*
7924 * We randomize lbuf2, and decompress to both
7925 * lbuf and lbuf2. This way, we will know if
7926 * decompression fill exactly to lsize.
7927 */
7928 VERIFY0(random_get_pseudo_bytes(lbuf2, lsize));
7929
7930 if (zio_decompress_data(*cfuncp, pabd,
7931 lbuf, psize, lsize, NULL) == 0 &&
7932 zio_decompress_data(*cfuncp, pabd,
7933 lbuf2, psize, lsize, NULL) == 0 &&
7934 bcmp(lbuf, lbuf2, lsize) == 0)
7935 break;
7936 }
7937 if (*cfuncp != 0)
7938 break;
7939 }
7940 umem_free(lbuf2, SPA_MAXBLOCKSIZE);
7941
7942 if (lsize > maxlsize) {
7943 exceeded = B_TRUE;
7944 }
7945 buf = lbuf;
7946 if (*cfuncp == ZIO_COMPRESS_ZLE) {
7947 printf("\nZLE decompression was selected. If you "
7948 "suspect the results are wrong,\ntry avoiding ZLE "
7949 "by setting and exporting ZDB_NO_ZLE=\"true\"\n");
7950 }
7951
7952 return (exceeded);
7953 }
7954
7955 /*
7956 * Read a block from a pool and print it out. The syntax of the
7957 * block descriptor is:
7958 *
7959 * pool:vdev_specifier:offset:[lsize/]psize[:flags]
7960 *
7961 * pool - The name of the pool you wish to read from
7962 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
7963 * offset - offset, in hex, in bytes
7964 * size - Amount of data to read, in hex, in bytes
7965 * flags - A string of characters specifying options
7966 * b: Decode a blkptr at given offset within block
7967 * c: Calculate and display checksums
7968 * d: Decompress data before dumping
7969 * e: Byteswap data before dumping
7970 * g: Display data as a gang block header
7971 * i: Display as an indirect block
7972 * r: Dump raw data to stdout
7973 * v: Verbose
7974 *
7975 */
7976 static void
7977 zdb_read_block(char *thing, spa_t *spa)
7978 {
7979 blkptr_t blk, *bp = &blk;
7980 dva_t *dva = bp->blk_dva;
7981 int flags = 0;
7982 uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0;
7983 zio_t *zio;
7984 vdev_t *vd;
7985 abd_t *pabd;
7986 void *lbuf, *buf;
7987 char *s, *p, *dup, *vdev, *flagstr, *sizes;
7988 int i, error;
7989 boolean_t borrowed = B_FALSE, found = B_FALSE;
7990
7991 dup = strdup(thing);
7992 s = strtok(dup, ":");
7993 vdev = s ? s : "";
7994 s = strtok(NULL, ":");
7995 offset = strtoull(s ? s : "", NULL, 16);
7996 sizes = strtok(NULL, ":");
7997 s = strtok(NULL, ":");
7998 flagstr = strdup(s ? s : "");
7999
8000 s = NULL;
8001 if (!zdb_parse_block_sizes(sizes, &lsize, &psize))
8002 s = "invalid size(s)";
8003 if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE))
8004 s = "size must be a multiple of sector size";
8005 if (!IS_P2ALIGNED(offset, DEV_BSIZE))
8006 s = "offset must be a multiple of sector size";
8007 if (s) {
8008 (void) printf("Invalid block specifier: %s - %s\n", thing, s);
8009 goto done;
8010 }
8011
8012 for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
8013 for (i = 0; i < strlen(flagstr); i++) {
8014 int bit = flagbits[(uchar_t)flagstr[i]];
8015
8016 if (bit == 0) {
8017 (void) printf("***Ignoring flag: %c\n",
8018 (uchar_t)flagstr[i]);
8019 continue;
8020 }
8021 found = B_TRUE;
8022 flags |= bit;
8023
8024 p = &flagstr[i + 1];
8025 if (*p != ':' && *p != '\0') {
8026 int j = 0, nextbit = flagbits[(uchar_t)*p];
8027 char *end, offstr[8] = { 0 };
8028 if ((bit == ZDB_FLAG_PRINT_BLKPTR) &&
8029 (nextbit == 0)) {
8030 /* look ahead to isolate the offset */
8031 while (nextbit == 0 &&
8032 strchr(flagbitstr, *p) == NULL) {
8033 offstr[j] = *p;
8034 j++;
8035 if (i + j > strlen(flagstr))
8036 break;
8037 p++;
8038 nextbit = flagbits[(uchar_t)*p];
8039 }
8040 blkptr_offset = strtoull(offstr, &end,
8041 16);
8042 i += j;
8043 } else if (nextbit == 0) {
8044 (void) printf("***Ignoring flag arg:"
8045 " '%c'\n", (uchar_t)*p);
8046 }
8047 }
8048 }
8049 }
8050 if (blkptr_offset % sizeof (blkptr_t)) {
8051 printf("Block pointer offset 0x%llx "
8052 "must be divisible by 0x%x\n",
8053 (longlong_t)blkptr_offset, (int)sizeof (blkptr_t));
8054 goto done;
8055 }
8056 if (found == B_FALSE && strlen(flagstr) > 0) {
8057 printf("Invalid flag arg: '%s'\n", flagstr);
8058 goto done;
8059 }
8060
8061 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
8062 if (vd == NULL) {
8063 (void) printf("***Invalid vdev: %s\n", vdev);
8064 free(dup);
8065 return;
8066 } else {
8067 if (vd->vdev_path)
8068 (void) fprintf(stderr, "Found vdev: %s\n",
8069 vd->vdev_path);
8070 else
8071 (void) fprintf(stderr, "Found vdev type: %s\n",
8072 vd->vdev_ops->vdev_op_type);
8073 }
8074
8075 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
8076 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
8077
8078 BP_ZERO(bp);
8079
8080 DVA_SET_VDEV(&dva[0], vd->vdev_id);
8081 DVA_SET_OFFSET(&dva[0], offset);
8082 DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
8083 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
8084
8085 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
8086
8087 BP_SET_LSIZE(bp, lsize);
8088 BP_SET_PSIZE(bp, psize);
8089 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
8090 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
8091 BP_SET_TYPE(bp, DMU_OT_NONE);
8092 BP_SET_LEVEL(bp, 0);
8093 BP_SET_DEDUP(bp, 0);
8094 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
8095
8096 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
8097 zio = zio_root(spa, NULL, NULL, 0);
8098
8099 if (vd == vd->vdev_top) {
8100 /*
8101 * Treat this as a normal block read.
8102 */
8103 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
8104 ZIO_PRIORITY_SYNC_READ,
8105 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
8106 } else {
8107 /*
8108 * Treat this as a vdev child I/O.
8109 */
8110 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
8111 psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
8112 ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
8113 ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
8114 ZIO_FLAG_OPTIONAL, NULL, NULL));
8115 }
8116
8117 error = zio_wait(zio);
8118 spa_config_exit(spa, SCL_STATE, FTAG);
8119
8120 if (error) {
8121 (void) printf("Read of %s failed, error: %d\n", thing, error);
8122 goto out;
8123 }
8124
8125 uint64_t orig_lsize = lsize;
8126 buf = lbuf;
8127 if (flags & ZDB_FLAG_DECOMPRESS) {
8128 boolean_t failed = zdb_decompress_block(pabd, buf, lbuf,
8129 lsize, psize, flags);
8130 if (failed) {
8131 (void) printf("Decompress of %s failed\n", thing);
8132 goto out;
8133 }
8134 } else {
8135 buf = abd_borrow_buf_copy(pabd, lsize);
8136 borrowed = B_TRUE;
8137 }
8138 /*
8139 * Try to detect invalid block pointer. If invalid, try
8140 * decompressing.
8141 */
8142 if ((flags & ZDB_FLAG_PRINT_BLKPTR || flags & ZDB_FLAG_INDIRECT) &&
8143 !(flags & ZDB_FLAG_DECOMPRESS)) {
8144 const blkptr_t *b = (const blkptr_t *)(void *)
8145 ((uintptr_t)buf + (uintptr_t)blkptr_offset);
8146 if (zfs_blkptr_verify(spa, b, B_FALSE, BLK_VERIFY_ONLY) ==
8147 B_FALSE) {
8148 abd_return_buf_copy(pabd, buf, lsize);
8149 borrowed = B_FALSE;
8150 buf = lbuf;
8151 boolean_t failed = zdb_decompress_block(pabd, buf,
8152 lbuf, lsize, psize, flags);
8153 b = (const blkptr_t *)(void *)
8154 ((uintptr_t)buf + (uintptr_t)blkptr_offset);
8155 if (failed || zfs_blkptr_verify(spa, b, B_FALSE,
8156 BLK_VERIFY_LOG) == B_FALSE) {
8157 printf("invalid block pointer at this DVA\n");
8158 goto out;
8159 }
8160 }
8161 }
8162
8163 if (flags & ZDB_FLAG_PRINT_BLKPTR)
8164 zdb_print_blkptr((blkptr_t *)(void *)
8165 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
8166 else if (flags & ZDB_FLAG_RAW)
8167 zdb_dump_block_raw(buf, lsize, flags);
8168 else if (flags & ZDB_FLAG_INDIRECT)
8169 zdb_dump_indirect((blkptr_t *)buf,
8170 orig_lsize / sizeof (blkptr_t), flags);
8171 else if (flags & ZDB_FLAG_GBH)
8172 zdb_dump_gbh(buf, flags);
8173 else
8174 zdb_dump_block(thing, buf, lsize, flags);
8175
8176 /*
8177 * If :c was specified, iterate through the checksum table to
8178 * calculate and display each checksum for our specified
8179 * DVA and length.
8180 */
8181 if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) &&
8182 !(flags & ZDB_FLAG_GBH)) {
8183 zio_t *czio;
8184 (void) printf("\n");
8185 for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL;
8186 ck < ZIO_CHECKSUM_FUNCTIONS; ck++) {
8187
8188 if ((zio_checksum_table[ck].ci_flags &
8189 ZCHECKSUM_FLAG_EMBEDDED) ||
8190 ck == ZIO_CHECKSUM_NOPARITY) {
8191 continue;
8192 }
8193 BP_SET_CHECKSUM(bp, ck);
8194 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
8195 czio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
8196 czio->io_bp = bp;
8197
8198 if (vd == vd->vdev_top) {
8199 zio_nowait(zio_read(czio, spa, bp, pabd, psize,
8200 NULL, NULL,
8201 ZIO_PRIORITY_SYNC_READ,
8202 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
8203 ZIO_FLAG_DONT_RETRY, NULL));
8204 } else {
8205 zio_nowait(zio_vdev_child_io(czio, bp, vd,
8206 offset, pabd, psize, ZIO_TYPE_READ,
8207 ZIO_PRIORITY_SYNC_READ,
8208 ZIO_FLAG_DONT_CACHE |
8209 ZIO_FLAG_DONT_PROPAGATE |
8210 ZIO_FLAG_DONT_RETRY |
8211 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
8212 ZIO_FLAG_SPECULATIVE |
8213 ZIO_FLAG_OPTIONAL, NULL, NULL));
8214 }
8215 error = zio_wait(czio);
8216 if (error == 0 || error == ECKSUM) {
8217 zio_t *ck_zio = zio_root(spa, NULL, NULL, 0);
8218 ck_zio->io_offset =
8219 DVA_GET_OFFSET(&bp->blk_dva[0]);
8220 ck_zio->io_bp = bp;
8221 zio_checksum_compute(ck_zio, ck, pabd, lsize);
8222 printf("%12s\tcksum=%llx:%llx:%llx:%llx\n",
8223 zio_checksum_table[ck].ci_name,
8224 (u_longlong_t)bp->blk_cksum.zc_word[0],
8225 (u_longlong_t)bp->blk_cksum.zc_word[1],
8226 (u_longlong_t)bp->blk_cksum.zc_word[2],
8227 (u_longlong_t)bp->blk_cksum.zc_word[3]);
8228 zio_wait(ck_zio);
8229 } else {
8230 printf("error %d reading block\n", error);
8231 }
8232 spa_config_exit(spa, SCL_STATE, FTAG);
8233 }
8234 }
8235
8236 if (borrowed)
8237 abd_return_buf_copy(pabd, buf, lsize);
8238
8239 out:
8240 abd_free(pabd);
8241 umem_free(lbuf, SPA_MAXBLOCKSIZE);
8242 done:
8243 free(flagstr);
8244 free(dup);
8245 }
8246
8247 static void
8248 zdb_embedded_block(char *thing)
8249 {
8250 blkptr_t bp;
8251 unsigned long long *words = (void *)&bp;
8252 char *buf;
8253 int err;
8254
8255 bzero(&bp, sizeof (bp));
8256 err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:"
8257 "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
8258 words + 0, words + 1, words + 2, words + 3,
8259 words + 4, words + 5, words + 6, words + 7,
8260 words + 8, words + 9, words + 10, words + 11,
8261 words + 12, words + 13, words + 14, words + 15);
8262 if (err != 16) {
8263 (void) fprintf(stderr, "invalid input format\n");
8264 exit(1);
8265 }
8266 ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE);
8267 buf = malloc(SPA_MAXBLOCKSIZE);
8268 if (buf == NULL) {
8269 (void) fprintf(stderr, "out of memory\n");
8270 exit(1);
8271 }
8272 err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp));
8273 if (err != 0) {
8274 (void) fprintf(stderr, "decode failed: %u\n", err);
8275 exit(1);
8276 }
8277 zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0);
8278 free(buf);
8279 }
8280
8281 int
8282 main(int argc, char **argv)
8283 {
8284 int c;
8285 struct rlimit rl = { 1024, 1024 };
8286 spa_t *spa = NULL;
8287 objset_t *os = NULL;
8288 int dump_all = 1;
8289 int verbose = 0;
8290 int error = 0;
8291 char **searchdirs = NULL;
8292 int nsearch = 0;
8293 char *target, *target_pool, dsname[ZFS_MAX_DATASET_NAME_LEN];
8294 nvlist_t *policy = NULL;
8295 uint64_t max_txg = UINT64_MAX;
8296 int64_t objset_id = -1;
8297 uint64_t object;
8298 int flags = ZFS_IMPORT_MISSING_LOG;
8299 int rewind = ZPOOL_NEVER_REWIND;
8300 char *spa_config_path_env, *objset_str;
8301 boolean_t target_is_spa = B_TRUE, dataset_lookup = B_FALSE;
8302 nvlist_t *cfg = NULL;
8303
8304 (void) setrlimit(RLIMIT_NOFILE, &rl);
8305 (void) enable_extended_FILE_stdio(-1, -1);
8306
8307 dprintf_setup(&argc, argv);
8308
8309 /*
8310 * If there is an environment variable SPA_CONFIG_PATH it overrides
8311 * default spa_config_path setting. If -U flag is specified it will
8312 * override this environment variable settings once again.
8313 */
8314 spa_config_path_env = getenv("SPA_CONFIG_PATH");
8315 if (spa_config_path_env != NULL)
8316 spa_config_path = spa_config_path_env;
8317
8318 /*
8319 * For performance reasons, we set this tunable down. We do so before
8320 * the arg parsing section so that the user can override this value if
8321 * they choose.
8322 */
8323 zfs_btree_verify_intensity = 3;
8324
8325 while ((c = getopt(argc, argv,
8326 "AbcCdDeEFGhiI:klLmMo:Op:PqrRsSt:uU:vVx:XYyZ")) != -1) {
8327 switch (c) {
8328 case 'b':
8329 case 'c':
8330 case 'C':
8331 case 'd':
8332 case 'D':
8333 case 'E':
8334 case 'G':
8335 case 'h':
8336 case 'i':
8337 case 'l':
8338 case 'm':
8339 case 'M':
8340 case 'O':
8341 case 'r':
8342 case 'R':
8343 case 's':
8344 case 'S':
8345 case 'u':
8346 case 'y':
8347 case 'Z':
8348 dump_opt[c]++;
8349 dump_all = 0;
8350 break;
8351 case 'A':
8352 case 'e':
8353 case 'F':
8354 case 'k':
8355 case 'L':
8356 case 'P':
8357 case 'q':
8358 case 'X':
8359 dump_opt[c]++;
8360 break;
8361 case 'Y':
8362 zfs_reconstruct_indirect_combinations_max = INT_MAX;
8363 zfs_deadman_enabled = 0;
8364 break;
8365 /* NB: Sort single match options below. */
8366 case 'I':
8367 max_inflight_bytes = strtoull(optarg, NULL, 0);
8368 if (max_inflight_bytes == 0) {
8369 (void) fprintf(stderr, "maximum number "
8370 "of inflight bytes must be greater "
8371 "than 0\n");
8372 usage();
8373 }
8374 break;
8375 case 'o':
8376 error = set_global_var(optarg);
8377 if (error != 0)
8378 usage();
8379 break;
8380 case 'p':
8381 if (searchdirs == NULL) {
8382 searchdirs = umem_alloc(sizeof (char *),
8383 UMEM_NOFAIL);
8384 } else {
8385 char **tmp = umem_alloc((nsearch + 1) *
8386 sizeof (char *), UMEM_NOFAIL);
8387 bcopy(searchdirs, tmp, nsearch *
8388 sizeof (char *));
8389 umem_free(searchdirs,
8390 nsearch * sizeof (char *));
8391 searchdirs = tmp;
8392 }
8393 searchdirs[nsearch++] = optarg;
8394 break;
8395 case 't':
8396 max_txg = strtoull(optarg, NULL, 0);
8397 if (max_txg < TXG_INITIAL) {
8398 (void) fprintf(stderr, "incorrect txg "
8399 "specified: %s\n", optarg);
8400 usage();
8401 }
8402 break;
8403 case 'U':
8404 spa_config_path = optarg;
8405 if (spa_config_path[0] != '/') {
8406 (void) fprintf(stderr,
8407 "cachefile must be an absolute path "
8408 "(i.e. start with a slash)\n");
8409 usage();
8410 }
8411 break;
8412 case 'v':
8413 verbose++;
8414 break;
8415 case 'V':
8416 flags = ZFS_IMPORT_VERBATIM;
8417 break;
8418 case 'x':
8419 vn_dumpdir = optarg;
8420 break;
8421 default:
8422 usage();
8423 break;
8424 }
8425 }
8426
8427 if (!dump_opt['e'] && searchdirs != NULL) {
8428 (void) fprintf(stderr, "-p option requires use of -e\n");
8429 usage();
8430 }
8431 if (dump_opt['d'] || dump_opt['r']) {
8432 /* <pool>[/<dataset | objset id> is accepted */
8433 if (argv[2] && (objset_str = strchr(argv[2], '/')) != NULL &&
8434 objset_str++ != NULL) {
8435 char *endptr;
8436 errno = 0;
8437 objset_id = strtoull(objset_str, &endptr, 0);
8438 /* dataset 0 is the same as opening the pool */
8439 if (errno == 0 && endptr != objset_str &&
8440 objset_id != 0) {
8441 target_is_spa = B_FALSE;
8442 dataset_lookup = B_TRUE;
8443 } else if (objset_id != 0) {
8444 printf("failed to open objset %s "
8445 "%llu %s", objset_str,
8446 (u_longlong_t)objset_id,
8447 strerror(errno));
8448 exit(1);
8449 }
8450 /* normal dataset name not an objset ID */
8451 if (endptr == objset_str) {
8452 objset_id = -1;
8453 }
8454 }
8455 }
8456
8457 #if defined(_LP64)
8458 /*
8459 * ZDB does not typically re-read blocks; therefore limit the ARC
8460 * to 256 MB, which can be used entirely for metadata.
8461 */
8462 zfs_arc_min = zfs_arc_meta_min = 2ULL << SPA_MAXBLOCKSHIFT;
8463 zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
8464 #endif
8465
8466 /*
8467 * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
8468 * "zdb -b" uses traversal prefetch which uses async reads.
8469 * For good performance, let several of them be active at once.
8470 */
8471 zfs_vdev_async_read_max_active = 10;
8472
8473 /*
8474 * Disable reference tracking for better performance.
8475 */
8476 reference_tracking_enable = B_FALSE;
8477
8478 /*
8479 * Do not fail spa_load when spa_load_verify fails. This is needed
8480 * to load non-idle pools.
8481 */
8482 spa_load_verify_dryrun = B_TRUE;
8483
8484 kernel_init(SPA_MODE_READ);
8485
8486 if (dump_all)
8487 verbose = MAX(verbose, 1);
8488
8489 for (c = 0; c < 256; c++) {
8490 if (dump_all && strchr("AeEFklLOPrRSXy", c) == NULL)
8491 dump_opt[c] = 1;
8492 if (dump_opt[c])
8493 dump_opt[c] += verbose;
8494 }
8495
8496 aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
8497 zfs_recover = (dump_opt['A'] > 1);
8498
8499 argc -= optind;
8500 argv += optind;
8501 if (argc < 2 && dump_opt['R'])
8502 usage();
8503
8504 if (dump_opt['E']) {
8505 if (argc != 1)
8506 usage();
8507 zdb_embedded_block(argv[0]);
8508 return (0);
8509 }
8510
8511 if (argc < 1) {
8512 if (!dump_opt['e'] && dump_opt['C']) {
8513 dump_cachefile(spa_config_path);
8514 return (0);
8515 }
8516 usage();
8517 }
8518
8519 if (dump_opt['l'])
8520 return (dump_label(argv[0]));
8521
8522 if (dump_opt['O']) {
8523 if (argc != 2)
8524 usage();
8525 dump_opt['v'] = verbose + 3;
8526 return (dump_path(argv[0], argv[1], NULL));
8527 }
8528 if (dump_opt['r']) {
8529 if (argc != 3)
8530 usage();
8531 dump_opt['v'] = verbose;
8532 error = dump_path(argv[0], argv[1], &object);
8533 }
8534
8535 if (dump_opt['X'] || dump_opt['F'])
8536 rewind = ZPOOL_DO_REWIND |
8537 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
8538
8539 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
8540 nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 ||
8541 nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0)
8542 fatal("internal error: %s", strerror(ENOMEM));
8543
8544 error = 0;
8545 target = argv[0];
8546
8547 if (strpbrk(target, "/@") != NULL) {
8548 size_t targetlen;
8549
8550 target_pool = strdup(target);
8551 *strpbrk(target_pool, "/@") = '\0';
8552
8553 target_is_spa = B_FALSE;
8554 targetlen = strlen(target);
8555 if (targetlen && target[targetlen - 1] == '/')
8556 target[targetlen - 1] = '\0';
8557 } else {
8558 target_pool = target;
8559 }
8560
8561 if (dump_opt['e']) {
8562 importargs_t args = { 0 };
8563
8564 args.paths = nsearch;
8565 args.path = searchdirs;
8566 args.can_be_active = B_TRUE;
8567
8568 error = zpool_find_config(NULL, target_pool, &cfg, &args,
8569 &libzpool_config_ops);
8570
8571 if (error == 0) {
8572
8573 if (nvlist_add_nvlist(cfg,
8574 ZPOOL_LOAD_POLICY, policy) != 0) {
8575 fatal("can't open '%s': %s",
8576 target, strerror(ENOMEM));
8577 }
8578
8579 if (dump_opt['C'] > 1) {
8580 (void) printf("\nConfiguration for import:\n");
8581 dump_nvlist(cfg, 8);
8582 }
8583
8584 /*
8585 * Disable the activity check to allow examination of
8586 * active pools.
8587 */
8588 error = spa_import(target_pool, cfg, NULL,
8589 flags | ZFS_IMPORT_SKIP_MMP);
8590 }
8591 }
8592
8593 if (searchdirs != NULL) {
8594 umem_free(searchdirs, nsearch * sizeof (char *));
8595 searchdirs = NULL;
8596 }
8597
8598 /*
8599 * import_checkpointed_state makes the assumption that the
8600 * target pool that we pass it is already part of the spa
8601 * namespace. Because of that we need to make sure to call
8602 * it always after the -e option has been processed, which
8603 * imports the pool to the namespace if it's not in the
8604 * cachefile.
8605 */
8606 char *checkpoint_pool = NULL;
8607 char *checkpoint_target = NULL;
8608 if (dump_opt['k']) {
8609 checkpoint_pool = import_checkpointed_state(target, cfg,
8610 &checkpoint_target);
8611
8612 if (checkpoint_target != NULL)
8613 target = checkpoint_target;
8614 }
8615
8616 if (cfg != NULL) {
8617 nvlist_free(cfg);
8618 cfg = NULL;
8619 }
8620
8621 if (target_pool != target)
8622 free(target_pool);
8623
8624 if (error == 0) {
8625 if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
8626 ASSERT(checkpoint_pool != NULL);
8627 ASSERT(checkpoint_target == NULL);
8628
8629 error = spa_open(checkpoint_pool, &spa, FTAG);
8630 if (error != 0) {
8631 fatal("Tried to open pool \"%s\" but "
8632 "spa_open() failed with error %d\n",
8633 checkpoint_pool, error);
8634 }
8635
8636 } else if (target_is_spa || dump_opt['R'] || objset_id == 0) {
8637 zdb_set_skip_mmp(target);
8638 error = spa_open_rewind(target, &spa, FTAG, policy,
8639 NULL);
8640 if (error) {
8641 /*
8642 * If we're missing the log device then
8643 * try opening the pool after clearing the
8644 * log state.
8645 */
8646 mutex_enter(&spa_namespace_lock);
8647 if ((spa = spa_lookup(target)) != NULL &&
8648 spa->spa_log_state == SPA_LOG_MISSING) {
8649 spa->spa_log_state = SPA_LOG_CLEAR;
8650 error = 0;
8651 }
8652 mutex_exit(&spa_namespace_lock);
8653
8654 if (!error) {
8655 error = spa_open_rewind(target, &spa,
8656 FTAG, policy, NULL);
8657 }
8658 }
8659 } else if (strpbrk(target, "#") != NULL) {
8660 dsl_pool_t *dp;
8661 error = dsl_pool_hold(target, FTAG, &dp);
8662 if (error != 0) {
8663 fatal("can't dump '%s': %s", target,
8664 strerror(error));
8665 }
8666 error = dump_bookmark(dp, target, B_TRUE, verbose > 1);
8667 dsl_pool_rele(dp, FTAG);
8668 if (error != 0) {
8669 fatal("can't dump '%s': %s", target,
8670 strerror(error));
8671 }
8672 return (error);
8673 } else {
8674 zdb_set_skip_mmp(target);
8675 if (dataset_lookup == B_TRUE) {
8676 /*
8677 * Use the supplied id to get the name
8678 * for open_objset.
8679 */
8680 error = spa_open(target, &spa, FTAG);
8681 if (error == 0) {
8682 error = name_from_objset_id(spa,
8683 objset_id, dsname);
8684 spa_close(spa, FTAG);
8685 if (error == 0)
8686 target = dsname;
8687 }
8688 }
8689 if (error == 0)
8690 error = open_objset(target, FTAG, &os);
8691 if (error == 0)
8692 spa = dmu_objset_spa(os);
8693 }
8694 }
8695 nvlist_free(policy);
8696
8697 if (error)
8698 fatal("can't open '%s': %s", target, strerror(error));
8699
8700 /*
8701 * Set the pool failure mode to panic in order to prevent the pool
8702 * from suspending. A suspended I/O will have no way to resume and
8703 * can prevent the zdb(8) command from terminating as expected.
8704 */
8705 if (spa != NULL)
8706 spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
8707
8708 argv++;
8709 argc--;
8710 if (dump_opt['r']) {
8711 error = zdb_copy_object(os, object, argv[1]);
8712 } else if (!dump_opt['R']) {
8713 flagbits['d'] = ZOR_FLAG_DIRECTORY;
8714 flagbits['f'] = ZOR_FLAG_PLAIN_FILE;
8715 flagbits['m'] = ZOR_FLAG_SPACE_MAP;
8716 flagbits['z'] = ZOR_FLAG_ZAP;
8717 flagbits['A'] = ZOR_FLAG_ALL_TYPES;
8718
8719 if (argc > 0 && dump_opt['d']) {
8720 zopt_object_args = argc;
8721 zopt_object_ranges = calloc(zopt_object_args,
8722 sizeof (zopt_object_range_t));
8723 for (unsigned i = 0; i < zopt_object_args; i++) {
8724 int err;
8725 char *msg = NULL;
8726
8727 err = parse_object_range(argv[i],
8728 &zopt_object_ranges[i], &msg);
8729 if (err != 0)
8730 fatal("Bad object or range: '%s': %s\n",
8731 argv[i], msg ? msg : "");
8732 }
8733 } else if (argc > 0 && dump_opt['m']) {
8734 zopt_metaslab_args = argc;
8735 zopt_metaslab = calloc(zopt_metaslab_args,
8736 sizeof (uint64_t));
8737 for (unsigned i = 0; i < zopt_metaslab_args; i++) {
8738 errno = 0;
8739 zopt_metaslab[i] = strtoull(argv[i], NULL, 0);
8740 if (zopt_metaslab[i] == 0 && errno != 0)
8741 fatal("bad number %s: %s", argv[i],
8742 strerror(errno));
8743 }
8744 }
8745 if (os != NULL) {
8746 dump_objset(os);
8747 } else if (zopt_object_args > 0 && !dump_opt['m']) {
8748 dump_objset(spa->spa_meta_objset);
8749 } else {
8750 dump_zpool(spa);
8751 }
8752 } else {
8753 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
8754 flagbits['c'] = ZDB_FLAG_CHECKSUM;
8755 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
8756 flagbits['e'] = ZDB_FLAG_BSWAP;
8757 flagbits['g'] = ZDB_FLAG_GBH;
8758 flagbits['i'] = ZDB_FLAG_INDIRECT;
8759 flagbits['r'] = ZDB_FLAG_RAW;
8760 flagbits['v'] = ZDB_FLAG_VERBOSE;
8761
8762 for (int i = 0; i < argc; i++)
8763 zdb_read_block(argv[i], spa);
8764 }
8765
8766 if (dump_opt['k']) {
8767 free(checkpoint_pool);
8768 if (!target_is_spa)
8769 free(checkpoint_target);
8770 }
8771
8772 if (os != NULL) {
8773 close_objset(os, FTAG);
8774 } else {
8775 spa_close(spa, FTAG);
8776 }
8777
8778 fuid_table_destroy();
8779
8780 dump_debug_buffer();
8781
8782 kernel_fini();
8783
8784 return (error);
8785 }