From a103ae446ed4aaaac6c83ce242efef313aad2200 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 17 Dec 2020 12:11:56 -0800 Subject: [PATCH] special device removal space accounting fixes The space in special devices is not included in spa_dspace (or dsl_pool_adjustedsize(), or the zfs `available` property). Therefore there is always at least as much free space in the normal class, as there is allocated in the special class(es). And therefore, there is always enough free space to remove a special device. However, the checks for free space when removing special devices did not take this into account. This commit corrects that. Reviewed-by: Ryan Moeller Reviewed-by: Don Brady Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #11329 --- module/zfs/spa_misc.c | 15 +++-- module/zfs/vdev_removal.c | 56 ++++++++++--------- tests/test-runner/bin/zts-report.py.in | 2 - .../alloc_class/alloc_class_012_pos.ksh | 5 +- 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 1640dcedd..540fd1077 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1807,10 +1807,11 @@ spa_update_dspace(spa_t *spa) ddt_get_dedup_dspace(spa); if (spa->spa_vdev_removal != NULL) { /* - * We can't allocate from the removing device, so - * subtract its size. This prevents the DMU/DSL from - * filling up the (now smaller) pool while we are in the - * middle of removing the device. + * We can't allocate from the removing device, so subtract + * its size if it was included in dspace (i.e. if this is a + * normal-class vdev, not special/dedup). This prevents the + * DMU/DSL from filling up the (now smaller) pool while we + * are in the middle of removing the device. * * Note that the DMU/DSL doesn't actually know or care * how much space is allocated (it does its own tracking @@ -1822,8 +1823,10 @@ spa_update_dspace(spa_t *spa) spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); vdev_t *vd = vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id); - spa->spa_dspace -= spa_deflate(spa) ? - vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; + if (vd->vdev_mg->mg_class == spa_normal_class(spa)) { + spa->spa_dspace -= spa_deflate(spa) ? + vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; + } spa_config_exit(spa, SCL_VDEV, FTAG); } } diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index ed7d1d4b3..3bccb9d7d 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -993,7 +993,7 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, * An allocation class might not have any remaining vdevs or space */ metaslab_class_t *mc = mg->mg_class; - if (mc != spa_normal_class(spa) && mc->mc_groups <= 1) + if (mc->mc_groups == 0) mc = spa_normal_class(spa); int error = metaslab_alloc_dva(spa, mc, size, &dst, 0, NULL, txg, 0, zal, 0); @@ -1976,32 +1976,38 @@ spa_vdev_remove_top_check(vdev_t *vd) if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REMOVAL)) return (SET_ERROR(ENOTSUP)); - /* available space in the pool's normal class */ - uint64_t available = dsl_dir_space_available( - spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE); metaslab_class_t *mc = vd->vdev_mg->mg_class; - - /* - * When removing a vdev from an allocation class that has - * remaining vdevs, include available space from the class. - */ - if (mc != spa_normal_class(spa) && mc->mc_groups > 1) { - uint64_t class_avail = metaslab_class_get_space(mc) - - metaslab_class_get_alloc(mc); - - /* add class space, adjusted for overhead */ - available += (class_avail * 94) / 100; - } - - /* - * There has to be enough free space to remove the - * device and leave double the "slop" space (i.e. we - * must leave at least 3% of the pool free, in addition to - * the normal slop space). - */ - if (available < vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) { - return (SET_ERROR(ENOSPC)); + metaslab_class_t *normal = spa_normal_class(spa); + if (mc != normal) { + /* + * Space allocated from the special (or dedup) class is + * included in the DMU's space usage, but it's not included + * in spa_dspace (or dsl_pool_adjustedsize()). Therefore + * there is always at least as much free space in the normal + * class, as is allocated from the special (and dedup) class. + * As a backup check, we will return ENOSPC if this is + * violated. See also spa_update_dspace(). + */ + uint64_t available = metaslab_class_get_space(normal) - + metaslab_class_get_alloc(normal); + ASSERT3U(available, >=, vd->vdev_stat.vs_alloc); + if (available < vd->vdev_stat.vs_alloc) + return (SET_ERROR(ENOSPC)); + } else { + /* available space in the pool's normal class */ + uint64_t available = dsl_dir_space_available( + spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE); + if (available < + vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) { + /* + * This is a normal device. There has to be enough free + * space to remove the device and leave double the + * "slop" space (i.e. we must leave at least 3% of the + * pool free, in addition to the normal slop space). + */ + return (SET_ERROR(ENOSPC)); + } } /* diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 6b5cd191c..05b53bb4c 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -187,8 +187,6 @@ elif sys.platform.startswith('linux'): # reasons listed above can be used. # maybe = { - 'alloc_class/alloc_class_012_pos': ['FAIL', '9142'], - 'alloc_class/alloc_class_013_pos': ['FAIL', '9142'], 'chattr/setup': ['SKIP', exec_reason], 'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason], 'cli_root/zfs_get/zfs_get_004_pos': ['FAIL', known_reason], diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh index 1cfe6642d..b49a8919e 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh @@ -33,8 +33,9 @@ function file_in_special_vdev # { typeset dataset="$1" typeset inum="$2" + typeset num_normal=$(echo $ZPOOL_DISKS | wc -w | xargs) - zdb -dddddd $dataset $inum | awk '{ + zdb -dddddd $dataset $inum | awk -v d=$num_normal '{ # find DVAs from string "offset level dva" only for L0 (data) blocks if (match($0,"L0 [0-9]+")) { dvas[0]=$3 @@ -49,7 +50,7 @@ if (match($0,"L0 [0-9]+")) { exit 1; } # verify vdev is "special" - if (arr[1] < 3) { + if (arr[1] < d) { exit 1; } } -- 2.39.2