From afd2f7b7117ff8bf23afa70ecae86ec0c1a1461e Mon Sep 17 00:00:00 2001 From: Pavel Zakharov Date: Tue, 30 Jan 2018 15:25:19 -0800 Subject: [PATCH] OpenZFS 8962 - zdb should work on non-idle pools Currently `zdb` consistently fails to examine non-idle pools as it fails during the `spa_load()` process. The main problem seems to be that `spa_load_verify()` fails as can be seen below: $ sudo zdb -d -G dcenter zdb: can't open 'dcenter': I/O error ZFS_DBGMSG(zdb): spa_open_common: opening dcenter spa_load(dcenter): LOADING disk vdev '/dev/dsk/c4t11d0s0': best uberblock found for spa dcenter. txg 40824950 spa_load(dcenter): using uberblock with txg=40824950 spa_load(dcenter): UNLOADING spa_load(dcenter): RELOADING spa_load(dcenter): LOADING disk vdev '/dev/dsk/c3t10d0s0': best uberblock found for spa dcenter. txg 40824952 spa_load(dcenter): using uberblock with txg=40824952 spa_load(dcenter): FAILED: spa_load_verify failed [error=5] spa_load(dcenter): UNLOADING This change makes `spa_load_verify()` a dryrun when ran from `zdb`. This is done by creating a global flag in zfs and then setting it in `zdb`. Authored by: Pavel Zakharov Reviewed by: George Wilson Reviewed by: Matthew Ahrens Reviewed by: Andy Stormont Approved by: Dan McDonald Ported-by: Tim Chase Signed-off-by: Tim Chase OpenZFS-issue: https://illumos.org/issues/8962 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/180ad792 Closes #7459 --- cmd/zdb/zdb.c | 7 +++++++ module/zfs/spa.c | 20 ++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 5f56b10e2..a7bd64ecd 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -96,6 +96,7 @@ extern int reference_tracking_enable; extern int zfs_recover; extern uint64_t zfs_arc_max, zfs_arc_meta_limit; extern int zfs_vdev_async_read_max_active; +extern boolean_t spa_load_verify_dryrun; static const char cmdname[] = "zdb"; uint8_t dump_opt[256]; @@ -5009,6 +5010,12 @@ main(int argc, char **argv) */ reference_tracking_enable = B_FALSE; + /* + * Do not fail spa_load when spa_load_verify fails. This is needed + * to load non-idle pools. + */ + spa_load_verify_dryrun = B_TRUE; + kernel_init(FREAD); if ((g_zfs = libzfs_init()) == NULL) { (void) fprintf(stderr, "%s", libzfs_error_init(errno)); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index a17da4239..80f0c6f36 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -169,6 +169,12 @@ uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ +/* + * Report any spa_load_verify errors found, but do not fail spa_load. + * This is used by zdb to analyze non-idle pools. + */ +boolean_t spa_load_verify_dryrun = B_FALSE; + /* * This (illegal) pool name is used when temporarily importing a spa_t in order * to get the vdev stats associated with the imported devices. @@ -2140,8 +2146,15 @@ spa_load_verify(spa_t *spa) spa->spa_load_meta_errors = sle.sle_meta_count; spa->spa_load_data_errors = sle.sle_data_count; - if (!error && sle.sle_meta_count <= policy.zrp_maxmeta && - sle.sle_data_count <= policy.zrp_maxdata) { + if (sle.sle_meta_count != 0 || sle.sle_data_count != 0) { + spa_load_note(spa, "spa_load_verify found %llu metadata errors " + "and %llu data errors", (u_longlong_t)sle.sle_meta_count, + (u_longlong_t)sle.sle_data_count); + } + + if (spa_load_verify_dryrun || + (!error && sle.sle_meta_count <= policy.zrp_maxmeta && + sle.sle_data_count <= policy.zrp_maxdata)) { int64_t loss = 0; verify_ok = B_TRUE; @@ -2159,6 +2172,9 @@ spa_load_verify(spa_t *spa) spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; } + if (spa_load_verify_dryrun) + return (0); + if (error) { if (error != ENXIO && error != EIO) error = SET_ERROR(EIO); -- 2.39.5