]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Write label 2,3 uberblocks when vdev expands
authorOlaf Faaland <faaland1@llnl.gov>
Tue, 2 May 2017 20:55:24 +0000 (13:55 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 2 May 2017 20:55:24 +0000 (13:55 -0700)
When vdev_psize increases, the location of labels 2 and 3 changes
because their location is relative to the end of the device.

The configs for labels 2 and 3 are written during the next spa_sync()
because the vdev is added to the dirty config list.  However, the
uberblock rings are not re-written in their new location, leaving the
device vulnerable to the beginning of the device being overwritten or
damaged.

This patch copies the uberblock ring from label 0 to labels 2 and 3,
in their new locations, at the next sync after vdev_psize increases.

Also, add a test zpool_expand_004_pos.ksh to confirm the uberblocks
are copied.

Reviewed-by: BearBabyLiu <liu.huang@zte.com.cn>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #5108

include/sys/vdev_impl.h
module/zfs/vdev.c
module/zfs/vdev_label.c
tests/runfiles/linux.run
tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am
tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh [new file with mode: 0755]

index d7f11a2b885d2fd2a50eaa410d6b4e6676dc5236..c9e9bede972811b5242fd8f8bfd697ccc80079eb 100644 (file)
@@ -229,6 +229,7 @@ struct vdev {
        boolean_t       vdev_cant_write; /* vdev is failing all writes  */
        boolean_t       vdev_isspare;   /* was a hot spare              */
        boolean_t       vdev_isl2cache; /* was a l2cache device         */
+       boolean_t       vdev_copy_uberblocks;  /* post expand copy uberblocks */
        vdev_queue_t    vdev_queue;     /* I/O deadline schedule queue  */
        vdev_cache_t    vdev_cache;     /* physical block cache         */
        spa_aux_vdev_t  *vdev_aux;      /* for l2cache and spares vdevs */
index da06391029fdf4d9092cb699371ba4e8fa26e0f0..dfb1ef522868976cb34647d14c65ce0604df60cd 100644 (file)
@@ -1330,6 +1330,13 @@ vdev_open(vdev_t *vd)
                max_asize = max_osize;
        }
 
+       /*
+        * If the vdev was expanded, record this so that we can re-create the
+        * uberblock rings in labels {2,3}, during the next sync.
+        */
+       if ((psize > vd->vdev_psize) && (vd->vdev_psize != 0))
+               vd->vdev_copy_uberblocks = B_TRUE;
+
        vd->vdev_psize = psize;
 
        /*
index b50a297bcb8ba0054fc829c17f7a5a6b568d35e9..20c0ac86a03b5388e0c75785734250504bb0fad7 100644 (file)
@@ -1132,6 +1132,60 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
        spa_config_exit(spa, SCL_ALL, FTAG);
 }
 
+/*
+ * For use when a leaf vdev is expanded.
+ * The location of labels 2 and 3 changed, and at the new location the
+ * uberblock rings are either empty or contain garbage.  The sync will write
+ * new configs there because the vdev is dirty, but expansion also needs the
+ * uberblock rings copied.  Read them from label 0 which did not move.
+ *
+ * Since the point is to populate labels {2,3} with valid uberblocks,
+ * we zero uberblocks we fail to read or which are not valid.
+ */
+
+static void
+vdev_copy_uberblocks(vdev_t *vd)
+{
+       abd_t *ub_abd;
+       zio_t *write_zio;
+       int locks = (SCL_L2ARC | SCL_ZIO);
+       int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+           ZIO_FLAG_SPECULATIVE;
+
+       ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_READER) ==
+           SCL_STATE);
+       ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+       spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER);
+
+       ub_abd = abd_alloc(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
+
+       write_zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
+       for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+               const int src_label = 0;
+               zio_t *zio;
+
+               zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
+               vdev_label_read(zio, vd, src_label, ub_abd,
+                   VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
+                   NULL, NULL, flags);
+
+               if (zio_wait(zio) || uberblock_verify(abd_to_buf(ub_abd)))
+                       abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
+
+               for (int l = 2; l < VDEV_LABELS; l++)
+                       vdev_label_write(write_zio, vd, l, ub_abd,
+                           VDEV_UBERBLOCK_OFFSET(vd, n),
+                           VDEV_UBERBLOCK_SIZE(vd), NULL, NULL,
+                           flags | ZIO_FLAG_DONT_PROPAGATE);
+       }
+       (void) zio_wait(write_zio);
+
+       spa_config_exit(vd->vdev_spa, locks, FTAG);
+
+       abd_free(ub_abd);
+}
+
 /*
  * On success, increment root zio's count of good writes.
  * We only get credit for writes to known-visible vdevs; see spa_vdev_add().
@@ -1163,6 +1217,13 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
        if (!vdev_writeable(vd))
                return;
 
+       /* If the vdev was expanded, need to copy uberblock rings. */
+       if (vd->vdev_state == VDEV_STATE_HEALTHY &&
+           vd->vdev_copy_uberblocks == B_TRUE) {
+               vdev_copy_uberblocks(vd);
+               vd->vdev_copy_uberblocks = B_FALSE;
+       }
+
        n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
 
        /* Copy the uberblock_t into the ABD */
index 406491aa1c5580ab95ff2b833f53a9a4438adc0f..8439f8e6691b8c7054df5cacbbb5ffee8c3bebc8 100644 (file)
@@ -276,7 +276,7 @@ tests = ['zpool_detach_001_neg']
 # zpool_expand_001_pos - https://github.com/zfsonlinux/zfs/issues/2437
 # zpool_expand_003_pos - https://github.com/zfsonlinux/zfs/issues/2437
 [tests/functional/cli_root/zpool_expand]
-tests = ['zpool_expand_002_pos']
+tests = ['zpool_expand_002_pos', 'zpool_expand_004_pos']
 
 # DISABLED:
 # zpool_export_004_pos - https://github.com/zfsonlinux/zfs/issues/3484
index dc90f9f01bc8768df9faaddd072edcfcaa5eb1b8..672261718284a1d4797294afae50d9bacb3047de 100644 (file)
@@ -5,4 +5,5 @@ dist_pkgdata_SCRIPTS = \
        cleanup.ksh \
        zpool_expand_001_pos.ksh \
        zpool_expand_002_pos.ksh \
-       zpool_expand_003_neg.ksh
+       zpool_expand_003_neg.ksh \
+       zpool_expand_004_pos.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
new file mode 100755 (executable)
index 0000000..69481ba
--- /dev/null
@@ -0,0 +1,102 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
+
+#
+# DESCRIPTION:
+# After vdev expansion, all 4 labels have the same set of uberblocks.
+#
+#
+# STRATEGY:
+# 1) Create 3 files
+# 2) Create a pool backed by the files
+# 3) Expand the files' size with truncate
+# 4) Use zpool online -e to expand the vdevs
+# 5) Check that for all the devices, all 4 labels have the same uberblocks
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+        if poolexists $TESTPOOL1; then
+                log_must zpool destroy $TESTPOOL1
+        fi
+
+       for i in 1 2 3; do
+               [ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i
+       done
+}
+
+log_onexit cleanup
+
+log_assert "After vdev expansion, all 4 labels have the same set of uberblocks."
+
+for type in " " mirror raidz raidz2; do
+       for i in 1 2 3; do
+               log_must truncate -s $org_size ${TEMPFILE}.$i
+       done
+
+       log_must zpool create $TESTPOOL1 $type $TEMPFILE.1 \
+           $TEMPFILE.2 $TEMPFILE.3
+
+       sync_pool $TESTPOOL1
+
+       for i in 1 2 3; do
+               log_must truncate -s $exp_size ${TEMPFILE}.$i
+       done
+
+       for i in 1 2 3; do
+               log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
+       done
+
+       sync_pool $TESTPOOL1
+
+
+       for i in 1 2 3; do
+               non_uniform=$(zdb -lu ${TEMPFILE}.$i    | \
+                   grep 'labels = '                    | \
+                   grep -c -v 'labels = 0 1 2 3')
+
+               log_note "non-uniform label count: $non_uniform"
+
+               if [[ $non_uniform -ne 0 ]]; then
+                       log_fail "After vdev expansion, all labels contents are not identical"
+               fi
+       done
+
+       log_must zpool destroy $TESTPOOL1
+done
+
+log_pass "After vdev expansion, all 4 labels have the same set of uberblocks."