]> git.proxmox.com Git - mirror_zfs.git/commitdiff
OpenZFS 8473 - scrub does not detect errors on active spares
authorasomers <asomers@FreeBSD.org>
Wed, 20 Sep 2017 16:31:00 +0000 (16:31 +0000)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 8 Jan 2019 17:51:30 +0000 (09:51 -0800)
Scrubbing is supposed to detect and repair all errors in the pool.
However, it wrongly ignores active spare devices. The problem can
easily be reproduced in OpenZFS at git rev 0ef125d with these
commands:

    truncate -s 64m /tmp/a /tmp/b /tmp/c
    sudo zpool create testpool mirror /tmp/a /tmp/b spare /tmp/c
    sudo zpool replace testpool /tmp/a /tmp/c
    /bin/dd if=/dev/zero bs=1024k count=63 oseek=1 conv=notrunc of=/tmp/c
    sync
    sudo zpool scrub testpool
    zpool status testpool # Will show 0 errors, which is wrong
    sudo zpool offline testpool /tmp/a
    sudo zpool scrub testpool
    zpool status testpool # Will show errors on /tmp/c,
                          # which should've already been fixed

FreeBSD head is partially affected: the first scrub will detect
some errors, but the second scrub will detect more.  This same
test was run on Linux before applying the fix and the FreeBSD
head behavior was observed.

Authored by: asomers <asomers@FreeBSD.org>
Reviewed by: Andy Stormont <astormont@racktopsystems.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Richard Lowe <richlowe@richlowe.net>
Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
Sponsored by: Spectra Logic Corp

OpenZFS-issue: https://www.illumos.org/issues/8473
FreeBSD-commit: https://github.com/freebsd/freebsd/commit/e20ec8879
OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/554675ee
Closes #8251

module/zfs/vdev_mirror.c

index b45c05db28a45ff95a963350ad214b896670178a..a92d956cdaa2d5c0274387db7ac8367bc6778ce3 100644 (file)
@@ -29,6 +29,9 @@
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_scan.h>
 #include <sys/vdev_impl.h>
 #include <sys/zio.h>
 #include <sys/abd.h>
@@ -111,7 +114,7 @@ typedef struct mirror_map {
        int             *mm_preferred;
        int             mm_preferred_cnt;
        int             mm_children;
-       boolean_t       mm_replacing;
+       boolean_t       mm_resilvering;
        boolean_t       mm_root;
        mirror_child_t  mm_child[];
 } mirror_map_t;
@@ -145,13 +148,13 @@ vdev_mirror_map_size(int children)
 }
 
 static inline mirror_map_t *
-vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root)
+vdev_mirror_map_alloc(int children, boolean_t resilvering, boolean_t root)
 {
        mirror_map_t *mm;
 
        mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP);
        mm->mm_children = children;
-       mm->mm_replacing = replacing;
+       mm->mm_resilvering = resilvering;
        mm->mm_root = root;
        mm->mm_preferred = (int *)((uintptr_t)mm +
            offsetof(mirror_map_t, mm_child[children]));
@@ -285,9 +288,39 @@ vdev_mirror_map_init(zio_t *zio)
                        mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
                }
        } else {
-               mm = vdev_mirror_map_alloc(vd->vdev_children,
-                   (vd->vdev_ops == &vdev_replacing_ops ||
-                   vd->vdev_ops == &vdev_spare_ops), B_FALSE);
+               /*
+                * If we are resilvering, then we should handle scrub reads
+                * differently; we shouldn't issue them to the resilvering
+                * device because it might not have those blocks.
+                *
+                * We are resilvering iff:
+                * 1) We are a replacing vdev (ie our name is "replacing-1" or
+                *    "spare-1" or something like that), and
+                * 2) The pool is currently being resilvered.
+                *
+                * We cannot simply check vd->vdev_resilver_txg, because it's
+                * not set in this path.
+                *
+                * Nor can we just check our vdev_ops; there are cases (such as
+                * when a user types "zpool replace pool odev spare_dev" and
+                * spare_dev is in the spare list, or when a spare device is
+                * automatically used to replace a DEGRADED device) when
+                * resilvering is complete but both the original vdev and the
+                * spare vdev remain in the pool.  That behavior is intentional.
+                * It helps implement the policy that a spare should be
+                * automatically removed from the pool after the user replaces
+                * the device that originally failed.
+                *
+                * If a spa load is in progress, then spa_dsl_pool may be
+                * uninitialized.  But we shouldn't be resilvering during a spa
+                * load anyway.
+                */
+               boolean_t replacing = (vd->vdev_ops == &vdev_replacing_ops ||
+                   vd->vdev_ops == &vdev_spare_ops) &&
+                   spa_load_state(vd->vdev_spa) == SPA_LOAD_NONE &&
+                   dsl_scan_resilvering(vd->vdev_spa->spa_dsl_pool);
+               mm = vdev_mirror_map_alloc(vd->vdev_children, replacing,
+                   B_FALSE);
                for (c = 0; c < mm->mm_children; c++) {
                        mc = &mm->mm_child[c];
                        mc->mc_vd = vd->vdev_child[c];
@@ -521,7 +554,7 @@ vdev_mirror_io_start(zio_t *zio)
 
        if (zio->io_type == ZIO_TYPE_READ) {
                if (zio->io_bp != NULL &&
-                   (zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
+                   (zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering) {
                        /*
                         * For scrubbing reads (if we can verify the
                         * checksum here, as indicated by io_bp being
@@ -663,7 +696,7 @@ vdev_mirror_io_done(zio_t *zio)
        if (good_copies && spa_writeable(zio->io_spa) &&
            (unexpected_errors ||
            (zio->io_flags & ZIO_FLAG_RESILVER) ||
-           ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) {
+           ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_resilvering))) {
                /*
                 * Use the good data we have in hand to repair damaged children.
                 */