]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Get rid of space_map_update() for ms_synced_length
authorSerapheim Dimitropoulos <serapheimd@gmail.com>
Tue, 12 Feb 2019 18:38:11 +0000 (10:38 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 12 Feb 2019 18:38:11 +0000 (10:38 -0800)
Initially, metaslabs and space maps used to be the same thing
in ZFS. Later, we started differentiating them by referring
to the space map as the on-disk state of the metaslab, making
the metaslab a higher-level concept that is metadata that deals
with space accounting. Today we've managed to split that code
furthermore, with the space map being its own on-disk data
structure used in areas of ZFS besides metaslabs (e.g. the
vdev-wide space maps used for zpool checkpoint or vdev removal
features).

This patch refactors the space map code to further split the
space map code from the metaslab code. It does so by getting
rid of the idea that the space map can have a different in-core
and on-disk length (sm_length vs smp_length) which is something
that is only used for the metaslab code, and other consumers
of space maps just have to deal with. Instead, this patch
introduces changes that move the old in-core length of the
metaslab's space map to the metaslab structure itself (see
ms_synced_length field) while making the space map code only
care about the actual space map's length on-disk.

The result of this is that space map consumers no longer have
to deal with syncing two different lengths for the same
structure (e.g. space_map_update() goes away) while metaslab
specific behavior stays within the metaslab code. Specifically,
the ms_synced_length field keeps track of the amount of data
metaslab_load() can read from the metaslab's space map while
working concurrently with metaslab_sync() that may be
appending to that same space map.

As a side note, the patch also adds a few comments around
the metaslab code documenting some assumptions and expected
behavior.

Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Closes #8328

12 files changed:
cmd/zdb/zdb.c
include/sys/metaslab.h
include/sys/metaslab_impl.h
include/sys/space_map.h
module/zfs/metaslab.c
module/zfs/spa_checkpoint.c
module/zfs/space_map.c
module/zfs/vdev.c
module/zfs/vdev_indirect.c
module/zfs/vdev_indirect_mapping.c
module/zfs/vdev_initialize.c
module/zfs/vdev_removal.c

index 5ef69790d9256353fd5a8096e080cc3b32c892a8..3d175dacafb29acb04807b2122e6c14b10717a99 100644 (file)
@@ -793,9 +793,9 @@ dump_spacemap(objset_t *os, space_map_t *sm)
                return;
 
        (void) printf("space map object %llu:\n",
-           (longlong_t)sm->sm_phys->smp_object);
-       (void) printf("  smp_objsize = 0x%llx\n",
-           (longlong_t)sm->sm_phys->smp_objsize);
+           (longlong_t)sm->sm_object);
+       (void) printf("  smp_length = 0x%llx\n",
+           (longlong_t)sm->sm_phys->smp_length);
        (void) printf("  smp_alloc = 0x%llx\n",
            (longlong_t)sm->sm_phys->smp_alloc);
 
@@ -3697,7 +3697,6 @@ zdb_load_obsolete_counts(vdev_t *vd)
                space_map_t *prev_obsolete_sm = NULL;
                VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
                    scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
-               space_map_update(prev_obsolete_sm);
                vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
                    prev_obsolete_sm);
                space_map_close(prev_obsolete_sm);
@@ -3833,9 +3832,9 @@ zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
 
        VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
            checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
-       space_map_update(checkpoint_sm);
 
        VERIFY0(space_map_iterate(checkpoint_sm,
+           space_map_length(checkpoint_sm),
            checkpoint_sm_exclude_entry_cb, &cseea));
        space_map_close(checkpoint_sm);
 
@@ -4651,7 +4650,6 @@ verify_device_removal_feature_counts(spa_t *spa)
                            spa->spa_meta_objset,
                            scip->scip_prev_obsolete_sm_object,
                            0, vd->vdev_asize, 0));
-                       space_map_update(prev_obsolete_sm);
                        dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
                        (void) printf("\n");
                        space_map_close(prev_obsolete_sm);
@@ -4933,7 +4931,6 @@ verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
                VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
                    checkpoint_sm_obj, 0, current_vd->vdev_asize,
                    current_vd->vdev_ashift));
-               space_map_update(checkpoint_sm);
 
                verify_checkpoint_sm_entry_cb_arg_t vcsec;
                vcsec.vcsec_vd = ckpoint_vd;
@@ -4941,6 +4938,7 @@ verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
                vcsec.vcsec_num_entries =
                    space_map_length(checkpoint_sm) / sizeof (uint64_t);
                VERIFY0(space_map_iterate(checkpoint_sm,
+                   space_map_length(checkpoint_sm),
                    verify_checkpoint_sm_entry_cb, &vcsec));
                if (dump_opt['m'] > 3)
                        dump_spacemap(current->spa_meta_objset, checkpoint_sm);
@@ -5100,7 +5098,6 @@ dump_leftover_checkpoint_blocks(spa_t *spa)
 
                VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
                    checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
-               space_map_update(checkpoint_sm);
                dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
                space_map_close(checkpoint_sm);
        }
index f47bc19cfc2b1058a7c928d6af9f4323c47b8bbb..fd0d23502a43c877cc1b8a0bf326f92d07290c75 100644 (file)
@@ -52,6 +52,8 @@ void metaslab_fini(metaslab_t *);
 int metaslab_load(metaslab_t *);
 void metaslab_unload(metaslab_t *);
 
+uint64_t metaslab_allocated_space(metaslab_t *);
+
 void metaslab_sync(metaslab_t *, uint64_t);
 void metaslab_sync_done(metaslab_t *, uint64_t);
 void metaslab_sync_reassess(metaslab_group_t *);
index 137a8476924a206d3d4bd4cb7e8c33e142ebe362..02ce02226b5d6bece9d50717b1b82e2e37da253a 100644 (file)
@@ -340,8 +340,34 @@ struct metaslab_group {
  * being written.
  */
 struct metaslab {
+       /*
+        * This is the main lock of the metaslab and its purpose is to
+        * coordinate our allocations and frees [e.g metaslab_block_alloc(),
+        * metaslab_free_concrete(), ..etc] with our various syncing
+        * procedures [e.g. metaslab_sync(), metaslab_sync_done(), ..etc].
+        *
+        * The lock is also used during some miscellaneous operations like
+        * using the metaslab's histogram for the metaslab group's histogram
+        * aggregation, or marking the metaslab for initialization.
+        */
        kmutex_t        ms_lock;
+
+       /*
+        * Acquired together with the ms_lock whenever we expect to
+        * write to metaslab data on-disk (i.e flushing entries to
+        * the metaslab's space map). It helps coordinate readers of
+        * the metaslab's space map [see spa_vdev_remove_thread()]
+        * with writers [see metaslab_sync()].
+        *
+        * Note that metaslab_load(), even though a reader, uses
+        * a completely different mechanism to deal with the reading
+        * of the metaslab's space map based on ms_synced_length. That
+        * said, the function still uses the ms_sync_lock after it
+        * has read the ms_sm [see relevant comment in metaslab_load()
+        * as to why].
+        */
        kmutex_t        ms_sync_lock;
+
        kcondvar_t      ms_load_cv;
        space_map_t     *ms_sm;
        uint64_t        ms_id;
@@ -351,6 +377,7 @@ struct metaslab {
 
        range_tree_t    *ms_allocating[TXG_SIZE];
        range_tree_t    *ms_allocatable;
+       uint64_t        ms_allocated_this_txg;
 
        /*
         * The following range trees are accessed only from syncing context.
@@ -375,6 +402,12 @@ struct metaslab {
        boolean_t       ms_loaded;
        boolean_t       ms_loading;
 
+       /*
+        * Tracks the exact amount of allocated space of this metaslab
+        * (and specifically the metaslab's space map) up to the most
+        * recently completed sync pass [see usage in metaslab_sync()].
+        */
+       uint64_t        ms_allocated_space;
        int64_t         ms_deferspace;  /* sum of ms_defermap[] space   */
        uint64_t        ms_weight;      /* weight vs. others in group   */
        uint64_t        ms_activation_weight;   /* activation weight    */
@@ -411,6 +444,9 @@ struct metaslab {
        avl_node_t      ms_group_node;  /* node in metaslab group tree  */
        txg_node_t      ms_txg_node;    /* per-txg dirty metaslab links */
 
+       /* updated every time we are done syncing the metaslab's space map */
+       uint64_t        ms_synced_length;
+
        boolean_t       ms_new;
 };
 
index 64c97bb4dd6ee7b5169ec8c188b9124602d8eadb..52536cccca4624b865e7a55b8206fccf527dc632 100644 (file)
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_SPACE_MAP_H
@@ -55,10 +55,17 @@ extern "C" {
  * for backward compatibility.
  */
 typedef struct space_map_phys {
-       uint64_t        smp_object;     /* on-disk space map object */
-       uint64_t        smp_objsize;    /* size of the object */
-       int64_t         smp_alloc;      /* space allocated from the map */
-       uint64_t        smp_pad[5];     /* reserved */
+       /* object number: not needed but kept for backwards compatibility */
+       uint64_t        smp_object;
+
+       /* length of the object in bytes */
+       uint64_t        smp_length;
+
+       /* space allocated from the map */
+       int64_t         smp_alloc;
+
+       /* reserved */
+       uint64_t        smp_pad[5];
 
        /*
         * The smp_histogram maintains a histogram of free regions. Each
@@ -81,8 +88,6 @@ typedef struct space_map {
        uint64_t        sm_start;       /* start of map */
        uint64_t        sm_size;        /* size of map */
        uint8_t         sm_shift;       /* unit shift */
-       uint64_t        sm_length;      /* synced length */
-       int64_t         sm_alloc;       /* synced space allocated */
        objset_t        *sm_os;         /* objset for this map */
        uint64_t        sm_object;      /* object id for this map */
        uint32_t        sm_blksz;       /* block size for space map */
@@ -189,7 +194,10 @@ boolean_t sm_entry_is_double_word(uint64_t e);
 typedef int (*sm_cb_t)(space_map_entry_t *sme, void *arg);
 
 int space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype);
-int space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg);
+int space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
+    uint64_t length);
+int space_map_iterate(space_map_t *sm, uint64_t length,
+    sm_cb_t callback, void *arg);
 int space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg,
     dmu_tx_t *tx);
 
@@ -197,10 +205,8 @@ void space_map_histogram_clear(space_map_t *sm);
 void space_map_histogram_add(space_map_t *sm, range_tree_t *rt,
     dmu_tx_t *tx);
 
-void space_map_update(space_map_t *sm);
-
 uint64_t space_map_object(space_map_t *sm);
-uint64_t space_map_allocated(space_map_t *sm);
+int64_t space_map_allocated(space_map_t *sm);
 uint64_t space_map_length(space_map_t *sm);
 
 void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
@@ -216,8 +222,6 @@ int space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
     uint64_t start, uint64_t size, uint8_t shift);
 void space_map_close(space_map_t *sm);
 
-int64_t space_map_alloc_delta(space_map_t *sm);
-
 #ifdef __cplusplus
 }
 #endif
index aeca0ed20faf52cea18f4835f1da66d1b7890745..58c47a0abfb26a638bc861eff4801d5bee973ceb 100644 (file)
@@ -496,45 +496,62 @@ metaslab_compare(const void *x1, const void *x2)
        return (AVL_CMP(m1->ms_start, m2->ms_start));
 }
 
+uint64_t
+metaslab_allocated_space(metaslab_t *msp)
+{
+       return (msp->ms_allocated_space);
+}
+
 /*
  * Verify that the space accounting on disk matches the in-core range_trees.
  */
-void
+static void
 metaslab_verify_space(metaslab_t *msp, uint64_t txg)
 {
        spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
-       uint64_t allocated = 0;
+       uint64_t allocating = 0;
        uint64_t sm_free_space, msp_free_space;
 
        ASSERT(MUTEX_HELD(&msp->ms_lock));
+       ASSERT(!msp->ms_condensing);
 
        if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0)
                return;
 
        /*
         * We can only verify the metaslab space when we're called
-        * from syncing context with a loaded metaslab that has an allocated
-        * space map. Calling this in non-syncing context does not
-        * provide a consistent view of the metaslab since we're performing
-        * allocations in the future.
+        * from syncing context with a loaded metaslab that has an
+        * allocated space map. Calling this in non-syncing context
+        * does not provide a consistent view of the metaslab since
+        * we're performing allocations in the future.
         */
        if (txg != spa_syncing_txg(spa) || msp->ms_sm == NULL ||
            !msp->ms_loaded)
                return;
 
-       sm_free_space = msp->ms_size - space_map_allocated(msp->ms_sm) -
-           space_map_alloc_delta(msp->ms_sm);
+       /*
+        * Even though the smp_alloc field can get negative (e.g.
+        * see vdev_checkpoint_sm), that should never be the case
+        * when it come's to a metaslab's space map.
+        */
+       ASSERT3S(space_map_allocated(msp->ms_sm), >=, 0);
+
+       sm_free_space = msp->ms_size - metaslab_allocated_space(msp);
 
        /*
-        * Account for future allocations since we would have already
-        * deducted that space from the ms_freetree.
+        * Account for future allocations since we would have
+        * already deducted that space from the ms_allocatable.
         */
        for (int t = 0; t < TXG_CONCURRENT_STATES; t++) {
-               allocated +=
+               allocating +=
                    range_tree_space(msp->ms_allocating[(txg + t) & TXG_MASK]);
        }
 
-       msp_free_space = range_tree_space(msp->ms_allocatable) + allocated +
+       ASSERT3U(msp->ms_deferspace, ==,
+           range_tree_space(msp->ms_defer[0]) +
+           range_tree_space(msp->ms_defer[1]));
+
+       msp_free_space = range_tree_space(msp->ms_allocatable) + allocating +
            msp->ms_deferspace + range_tree_space(msp->ms_freed);
 
        VERIFY3U(sm_free_space, ==, msp_free_space);
@@ -1420,27 +1437,52 @@ metaslab_load_impl(metaslab_t *msp)
 
        ASSERT(MUTEX_HELD(&msp->ms_lock));
        ASSERT(msp->ms_loading);
+       ASSERT(!msp->ms_condensing);
 
        /*
-        * Nobody else can manipulate a loading metaslab, so it's now safe
-        * to drop the lock. This way we don't have to hold the lock while
-        * reading the spacemap from disk.
+        * We temporarily drop the lock to unblock other operations while we
+        * are reading the space map. Therefore, metaslab_sync() and
+        * metaslab_sync_done() can run at the same time as we do.
+        *
+        * metaslab_sync() can append to the space map while we are loading.
+        * Therefore we load only entries that existed when we started the
+        * load. Additionally, metaslab_sync_done() has to wait for the load
+        * to complete because there are potential races like metaslab_load()
+        * loading parts of the space map that are currently being appended
+        * by metaslab_sync(). If we didn't, the ms_allocatable would have
+        * entries that metaslab_sync_done() would try to re-add later.
+        *
+        * That's why before dropping the lock we remember the synced length
+        * of the metaslab and read up to that point of the space map,
+        * ignoring entries appended by metaslab_sync() that happen after we
+        * drop the lock.
         */
+       uint64_t length = msp->ms_synced_length;
        mutex_exit(&msp->ms_lock);
 
-       /*
-        * If the space map has not been allocated yet, then treat
-        * all the space in the metaslab as free and add it to ms_allocatable.
-        */
        if (msp->ms_sm != NULL) {
-               error = space_map_load(msp->ms_sm, msp->ms_allocatable,
-                   SM_FREE);
+               error = space_map_load_length(msp->ms_sm, msp->ms_allocatable,
+                   SM_FREE, length);
        } else {
+               /*
+                * The space map has not been allocated yet, so treat
+                * all the space in the metaslab as free and add it to the
+                * ms_allocatable tree.
+                */
                range_tree_add(msp->ms_allocatable,
                    msp->ms_start, msp->ms_size);
        }
 
+       /*
+        * We need to grab the ms_sync_lock to prevent metaslab_sync() from
+        * changing the ms_sm and the metaslab's range trees while we are
+        * about to use them and populate the ms_allocatable. The ms_lock
+        * is insufficient for this because metaslab_sync() doesn't hold
+        * the ms_lock while writing the ms_checkpointing tree to disk.
+        */
+       mutex_enter(&msp->ms_sync_lock);
        mutex_enter(&msp->ms_lock);
+       ASSERT(!msp->ms_condensing);
 
        if (error != 0)
                return (error);
@@ -1449,18 +1491,22 @@ metaslab_load_impl(metaslab_t *msp)
        msp->ms_loaded = B_TRUE;
 
        /*
-        * If the metaslab already has a spacemap, then we need to
-        * remove all segments from the defer tree; otherwise, the
-        * metaslab is completely empty and we can skip this.
+        * The ms_allocatable contains the segments that exist in the
+        * ms_defer trees [see ms_synced_length]. Thus we need to remove
+        * them from ms_allocatable as they will be added again in
+        * metaslab_sync_done().
         */
-       if (msp->ms_sm != NULL) {
-               for (int t = 0; t < TXG_DEFER_SIZE; t++) {
-                       range_tree_walk(msp->ms_defer[t],
-                           range_tree_remove, msp->ms_allocatable);
-               }
+       for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+               range_tree_walk(msp->ms_defer[t],
+                   range_tree_remove, msp->ms_allocatable);
        }
+
        msp->ms_max_size = metaslab_block_maxsize(msp);
 
+       spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+       metaslab_verify_space(msp, spa_syncing_txg(spa));
+       mutex_exit(&msp->ms_sync_lock);
+
        return (0);
 }
 
@@ -1477,6 +1523,7 @@ metaslab_load(metaslab_t *msp)
        if (msp->ms_loaded)
                return (0);
        VERIFY(!msp->ms_loading);
+       ASSERT(!msp->ms_condensing);
 
        msp->ms_loading = B_TRUE;
        int error = metaslab_load_impl(msp);
@@ -1533,6 +1580,13 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
        /*
         * We only open space map objects that already exist. All others
         * will be opened when we finally allocate an object for it.
+        *
+        * Note:
+        * When called from vdev_expand(), we can't call into the DMU as
+        * we are holding the spa_config_lock as a writer and we would
+        * deadlock [see relevant comment in vdev_metaslab_init()]. in
+        * that case, the object parameter is zero though, so we won't
+        * call into the DMU.
         */
        if (object != 0) {
                error = space_map_open(&ms->ms_sm, mos, object, ms->ms_start,
@@ -1544,14 +1598,16 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
                }
 
                ASSERT(ms->ms_sm != NULL);
+               ms->ms_allocated_space = space_map_allocated(ms->ms_sm);
        }
 
        /*
-        * We create the main range tree here, but we don't create the
+        * We create the ms_allocatable here, but we don't create the
         * other range trees until metaslab_sync_done().  This serves
         * two purposes: it allows metaslab_sync_done() to detect the
-        * addition of new space; and for debugging, it ensures that we'd
-        * data fault on any attempt to use this metaslab before it's ready.
+        * addition of new space; and for debugging, it ensures that
+        * we'd data fault on any attempt to use this metaslab before
+        * it's ready.
         */
        ms->ms_allocatable = range_tree_create_impl(&rt_avl_ops,
            &ms->ms_allocatable_by_size, metaslab_rangesize_compare, 0);
@@ -1568,8 +1624,11 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
         * out this txg. This ensures that we don't attempt to allocate
         * from it before we have initialized it completely.
         */
-       if (txg <= TXG_INITIAL)
+       if (txg <= TXG_INITIAL) {
                metaslab_sync_done(ms, 0);
+               metaslab_space_update(vd, mg->mg_class,
+                   metaslab_allocated_space(ms), 0, 0);
+       }
 
        /*
         * If metaslab_debug_load is set and we're initializing a metaslab
@@ -1603,7 +1662,7 @@ metaslab_fini(metaslab_t *msp)
        mutex_enter(&msp->ms_lock);
        VERIFY(msp->ms_group == NULL);
        metaslab_space_update(vd, mg->mg_class,
-           -space_map_allocated(msp->ms_sm), 0, -msp->ms_size);
+           -metaslab_allocated_space(msp), 0, -msp->ms_size);
 
        space_map_close(msp->ms_sm);
 
@@ -1674,10 +1733,10 @@ int zfs_frag_table[FRAGMENTATION_TABLE_SIZE] = {
 };
 
 /*
- * Calclate the metaslab's fragmentation metric. A return value
- * of ZFS_FRAG_INVALID means that the metaslab has not been upgraded and does
- * not support this metric. Otherwise, the return value should be in the
- * range [0, 100].
+ * Calculate the metaslab's fragmentation metric and set ms_fragmentation.
+ * Setting this value to ZFS_FRAG_INVALID means that the metaslab has not
+ * been upgraded and does not support this metric. Otherwise, the return
+ * value should be in the range [0, 100].
  */
 static void
 metaslab_set_fragmentation(metaslab_t *msp)
@@ -1770,7 +1829,7 @@ metaslab_space_weight(metaslab_t *msp)
        /*
         * The baseline weight is the metaslab's free space.
         */
-       space = msp->ms_size - space_map_allocated(msp->ms_sm);
+       space = msp->ms_size - metaslab_allocated_space(msp);
 
        if (metaslab_fragmentation_factor_enabled &&
            msp->ms_fragmentation != ZFS_FRAG_INVALID) {
@@ -1906,7 +1965,7 @@ metaslab_segment_weight(metaslab_t *msp)
        /*
         * The metaslab is completely free.
         */
-       if (space_map_allocated(msp->ms_sm) == 0) {
+       if (metaslab_allocated_space(msp) == 0) {
                int idx = highbit64(msp->ms_size) - 1;
                int max_idx = SPACE_MAP_HISTOGRAM_SIZE + shift - 1;
 
@@ -1928,7 +1987,7 @@ metaslab_segment_weight(metaslab_t *msp)
        /*
         * If the metaslab is fully allocated then just make the weight 0.
         */
-       if (space_map_allocated(msp->ms_sm) == msp->ms_size)
+       if (metaslab_allocated_space(msp) == msp->ms_size)
                return (0);
        /*
         * If the metaslab is already loaded, then use the range tree to
@@ -2008,6 +2067,8 @@ metaslab_weight(metaslab_t *msp)
         */
        if (msp->ms_loaded)
                msp->ms_max_size = metaslab_block_maxsize(msp);
+       else
+               ASSERT0(msp->ms_max_size);
 
        /*
         * Segment-based weighting requires space map histogram support.
@@ -2411,17 +2472,17 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
        VERIFY(txg <= spa_final_dirty_txg(spa));
 
        /*
-        * The only state that can actually be changing concurrently with
-        * metaslab_sync() is the metaslab's ms_allocatable.  No other
-        * thread can be modifying this txg's alloc, freeing,
+        * The only state that can actually be changing concurrently
+        * with metaslab_sync() is the metaslab's ms_allocatable. No
+        * other thread can be modifying this txg's alloc, freeing,
         * freed, or space_map_phys_t.  We drop ms_lock whenever we
-        * could call into the DMU, because the DMU can call down to us
-        * (e.g. via zio_free()) at any time.
+        * could call into the DMU, because the DMU can call down to
+        * us (e.g. via zio_free()) at any time.
         *
         * The spa_vdev_remove_thread() can be reading metaslab state
-        * concurrently, and it is locked out by the ms_sync_lock.  Note
-        * that the ms_lock is insufficient for this, because it is dropped
-        * by space_map_write().
+        * concurrently, and it is locked out by the ms_sync_lock.
+        * Note that the ms_lock is insufficient for this, because it
+        * is dropped by space_map_write().
         */
        tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
 
@@ -2433,7 +2494,9 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
 
                VERIFY0(space_map_open(&msp->ms_sm, mos, new_object,
                    msp->ms_start, msp->ms_size, vd->vdev_ashift));
+
                ASSERT(msp->ms_sm != NULL);
+               ASSERT0(metaslab_allocated_space(msp));
        }
 
        if (!range_tree_is_empty(msp->ms_checkpointing) &&
@@ -2481,6 +2544,11 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
                mutex_enter(&msp->ms_lock);
        }
 
+       msp->ms_allocated_space += range_tree_space(alloctree);
+       ASSERT3U(msp->ms_allocated_space, >=,
+           range_tree_space(msp->ms_freeing));
+       msp->ms_allocated_space -= range_tree_space(msp->ms_freeing);
+
        if (!range_tree_is_empty(msp->ms_checkpointing)) {
                ASSERT(spa_has_checkpoint(spa));
                ASSERT3P(vd->vdev_checkpoint_sm, !=, NULL);
@@ -2494,14 +2562,13 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
                space_map_write(vd->vdev_checkpoint_sm,
                    msp->ms_checkpointing, SM_FREE, SM_NO_VDEVID, tx);
                mutex_enter(&msp->ms_lock);
-               space_map_update(vd->vdev_checkpoint_sm);
 
                spa->spa_checkpoint_info.sci_dspace +=
                    range_tree_space(msp->ms_checkpointing);
                vd->vdev_stat.vs_checkpoint_space +=
                    range_tree_space(msp->ms_checkpointing);
                ASSERT3U(vd->vdev_stat.vs_checkpoint_space, ==,
-                   -vd->vdev_checkpoint_sm->sm_alloc);
+                   -space_map_allocated(vd->vdev_checkpoint_sm));
 
                range_tree_vacate(msp->ms_checkpointing, NULL, NULL);
        }
@@ -2553,16 +2620,18 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
 
        /*
         * For sync pass 1, we avoid traversing this txg's free range tree
-        * and instead will just swap the pointers for freeing and
-        * freed. We can safely do this since the freed_tree is
-        * guaranteed to be empty on the initial pass.
+        * and instead will just swap the pointers for freeing and freed.
+        * We can safely do this since the freed_tree is guaranteed to be
+        * empty on the initial pass.
         */
        if (spa_sync_pass(spa) == 1) {
                range_tree_swap(&msp->ms_freeing, &msp->ms_freed);
+               ASSERT0(msp->ms_allocated_this_txg);
        } else {
                range_tree_vacate(msp->ms_freeing,
                    range_tree_add, msp->ms_freed);
        }
+       msp->ms_allocated_this_txg += range_tree_space(alloctree);
        range_tree_vacate(alloctree, NULL, NULL);
 
        ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
@@ -2640,7 +2709,8 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
        }
 
        defer_delta = 0;
-       alloc_delta = space_map_alloc_delta(msp->ms_sm);
+       alloc_delta = msp->ms_allocated_this_txg -
+           range_tree_space(msp->ms_freed);
        if (defer_allowed) {
                defer_delta = range_tree_space(msp->ms_freed) -
                    range_tree_space(*defer_tree);
@@ -2672,7 +2742,8 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
                    msp->ms_loaded ? range_tree_add : NULL,
                    msp->ms_allocatable);
        }
-       space_map_update(msp->ms_sm);
+
+       msp->ms_synced_length = space_map_length(msp->ms_sm);
 
        msp->ms_deferspace += defer_delta;
        ASSERT3S(msp->ms_deferspace, >=, 0);
@@ -2724,6 +2795,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
        ASSERT0(range_tree_space(msp->ms_freed));
        ASSERT0(range_tree_space(msp->ms_checkpointing));
 
+       msp->ms_allocated_this_txg = 0;
        mutex_exit(&msp->ms_lock);
 }
 
index 230ae5785a854638bbcff3951ab1b51f46bd37e7..d6f68ceda589ed648f21522db3a5481c30f59b32 100644 (file)
@@ -263,7 +263,7 @@ spa_checkpoint_accounting_verify(spa_t *spa)
 
                if (vd->vdev_checkpoint_sm != NULL) {
                        ckpoint_sm_space_sum +=
-                           -vd->vdev_checkpoint_sm->sm_alloc;
+                           -space_map_allocated(vd->vdev_checkpoint_sm);
                        vs_ckpoint_space_sum +=
                            vd->vdev_stat.vs_checkpoint_space;
                        ASSERT3U(ckpoint_sm_space_sum, ==,
@@ -349,7 +349,7 @@ spa_checkpoint_discard_thread_sync(void *arg, dmu_tx_t *tx)
                            error, vd->vdev_id);
                }
                ASSERT0(words_after);
-               ASSERT0(vd->vdev_checkpoint_sm->sm_alloc);
+               ASSERT0(space_map_allocated(vd->vdev_checkpoint_sm));
                ASSERT0(space_map_length(vd->vdev_checkpoint_sm));
 
                space_map_free(vd->vdev_checkpoint_sm, tx);
index 9ba6ff6ff4c2ad6b05f05f0e2318e6410ed961e9..5cf3feaae108672fb8f2e05fc21965d27fce8fd7 100644 (file)
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -81,20 +81,22 @@ sm_entry_is_double_word(uint64_t e)
 
 /*
  * Iterate through the space map, invoking the callback on each (non-debug)
- * space map entry.
+ * space map entry. Stop after reading 'end' bytes of the space map.
  */
 int
-space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg)
+space_map_iterate(space_map_t *sm, uint64_t end, sm_cb_t callback, void *arg)
 {
-       uint64_t sm_len = space_map_length(sm);
-       ASSERT3U(sm->sm_blksz, !=, 0);
+       uint64_t blksz = sm->sm_blksz;
+
+       ASSERT3U(blksz, !=, 0);
+       ASSERT3U(end, <=, space_map_length(sm));
+       ASSERT0(P2PHASE(end, sizeof (uint64_t)));
 
-       dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, sm_len,
+       dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, end,
            ZIO_PRIORITY_SYNC_READ);
 
-       uint64_t blksz = sm->sm_blksz;
        int error = 0;
-       for (uint64_t block_base = 0; block_base < sm_len && error == 0;
+       for (uint64_t block_base = 0; block_base < end && error == 0;
            block_base += blksz) {
                dmu_buf_t *db;
                error = dmu_buf_hold(sm->sm_os, space_map_object(sm),
@@ -103,7 +105,7 @@ space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg)
                        return (error);
 
                uint64_t *block_start = db->db_data;
-               uint64_t block_length = MIN(sm_len - block_base, blksz);
+               uint64_t block_length = MIN(end - block_base, blksz);
                uint64_t *block_end = block_start +
                    (block_length / sizeof (uint64_t));
 
@@ -186,7 +188,7 @@ space_map_reversed_last_block_entries(space_map_t *sm, uint64_t *buf,
         * dmu_buf_hold().
         */
        uint64_t last_word_offset =
-           sm->sm_phys->smp_objsize - sizeof (uint64_t);
+           sm->sm_phys->smp_length - sizeof (uint64_t);
        error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset,
            FTAG, &db, DMU_READ_NO_PREFETCH);
        if (error != 0)
@@ -199,7 +201,7 @@ space_map_reversed_last_block_entries(space_map_t *sm, uint64_t *buf,
 
        uint64_t *words = db->db_data;
        *nwords =
-           (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
+           (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t);
 
        ASSERT3U(*nwords, <=, bufsz / sizeof (uint64_t));
 
@@ -298,8 +300,7 @@ space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg,
                        uint64_t e = buf[i];
 
                        if (sm_entry_is_debug(e)) {
-                               sm->sm_phys->smp_objsize -= sizeof (uint64_t);
-                               space_map_update(sm);
+                               sm->sm_phys->smp_length -= sizeof (uint64_t);
                                continue;
                        }
 
@@ -354,15 +355,13 @@ space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg,
                                sm->sm_phys->smp_alloc -= entry_run;
                        else
                                sm->sm_phys->smp_alloc += entry_run;
-                       sm->sm_phys->smp_objsize -= words * sizeof (uint64_t);
-                       space_map_update(sm);
+                       sm->sm_phys->smp_length -= words * sizeof (uint64_t);
                }
        }
 
        if (space_map_length(sm) == 0) {
                ASSERT0(error);
-               ASSERT0(sm->sm_phys->smp_objsize);
-               ASSERT0(sm->sm_alloc);
+               ASSERT0(space_map_allocated(sm));
        }
 
        zio_buf_free(buf, bufsz);
@@ -391,38 +390,42 @@ space_map_load_callback(space_map_entry_t *sme, void *arg)
 }
 
 /*
- * Load the space map disk into the specified range tree. Segments of maptype
- * are added to the range tree, other segment types are removed.
+ * Load the spacemap into the rangetree, like space_map_load. But only
+ * read the first 'length' bytes of the spacemap.
  */
 int
-space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
+space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
+    uint64_t length)
 {
-       uint64_t space;
-       int err;
        space_map_load_arg_t smla;
 
        VERIFY0(range_tree_space(rt));
-       space = space_map_allocated(sm);
 
-       if (maptype == SM_FREE) {
+       if (maptype == SM_FREE)
                range_tree_add(rt, sm->sm_start, sm->sm_size);
-               space = sm->sm_size - space;
-       }
 
        smla.smla_rt = rt;
        smla.smla_sm = sm;
        smla.smla_type = maptype;
-       err = space_map_iterate(sm, space_map_load_callback, &smla);
+       int err = space_map_iterate(sm, length,
+           space_map_load_callback, &smla);
 
-       if (err == 0) {
-               VERIFY3U(range_tree_space(rt), ==, space);
-       } else {
+       if (err != 0)
                range_tree_vacate(rt, NULL, NULL);
-       }
 
        return (err);
 }
 
+/*
+ * Load the space map disk into the specified range tree. Segments of maptype
+ * are added to the range tree, other segment types are removed.
+ */
+int
+space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
+{
+       return (space_map_load_length(sm, rt, maptype, space_map_length(sm)));
+}
+
 void
 space_map_histogram_clear(space_map_t *sm)
 {
@@ -506,10 +509,10 @@ space_map_write_intro_debug(space_map_t *sm, maptype_t maptype, dmu_tx_t *tx)
            SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) |
            SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
 
-       dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_objsize,
+       dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_length,
            sizeof (dentry), &dentry, tx);
 
-       sm->sm_phys->smp_objsize += sizeof (dentry);
+       sm->sm_phys->smp_length += sizeof (dentry);
 }
 
 /*
@@ -541,7 +544,7 @@ space_map_write_seg(space_map_t *sm, range_seg_t *rs, maptype_t maptype,
        uint64_t *block_base = db->db_data;
        uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t));
        uint64_t *block_cursor = block_base +
-           (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
+           (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t);
 
        ASSERT3P(block_cursor, <=, block_end);
 
@@ -564,7 +567,7 @@ space_map_write_seg(space_map_t *sm, range_seg_t *rs, maptype_t maptype,
                if (block_cursor == block_end) {
                        dmu_buf_rele(db, tag);
 
-                       uint64_t next_word_offset = sm->sm_phys->smp_objsize;
+                       uint64_t next_word_offset = sm->sm_phys->smp_length;
                        VERIFY0(dmu_buf_hold(sm->sm_os,
                            space_map_object(sm), next_word_offset,
                            tag, &db, DMU_READ_PREFETCH));
@@ -594,7 +597,7 @@ space_map_write_seg(space_map_t *sm, range_seg_t *rs, maptype_t maptype,
                            SM_DEBUG_SYNCPASS_ENCODE(0) |
                            SM_DEBUG_TXG_ENCODE(0);
                        block_cursor++;
-                       sm->sm_phys->smp_objsize += sizeof (uint64_t);
+                       sm->sm_phys->smp_length += sizeof (uint64_t);
                        ASSERT3P(block_cursor, ==, block_end);
                        continue;
                }
@@ -625,7 +628,7 @@ space_map_write_seg(space_map_t *sm, range_seg_t *rs, maptype_t maptype,
                            words);
                        break;
                }
-               sm->sm_phys->smp_objsize += words * sizeof (uint64_t);
+               sm->sm_phys->smp_length += words * sizeof (uint64_t);
 
                start += run_len;
                size -= run_len;
@@ -652,7 +655,7 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
         * We do this right after we write the intro debug entry
         * because the estimate does not take it into account.
         */
-       uint64_t initial_objsize = sm->sm_phys->smp_objsize;
+       uint64_t initial_objsize = sm->sm_phys->smp_length;
        uint64_t estimated_growth =
            space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID);
        uint64_t estimated_final_objsize = initial_objsize + estimated_growth;
@@ -663,7 +666,7 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
         * and use that to get a hold of the last block, so we can
         * start appending to it.
         */
-       uint64_t next_word_offset = sm->sm_phys->smp_objsize;
+       uint64_t next_word_offset = sm->sm_phys->smp_length;
        VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm),
            next_word_offset, FTAG, &db, DMU_READ_PREFETCH));
        ASSERT3U(db->db_size, ==, sm->sm_blksz);
@@ -711,7 +714,7 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
         * Therefore we expect the actual objsize to be equal or less
         * than whatever we estimated it to be.
         */
-       ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_objsize);
+       ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_length);
 #endif
 }
 
@@ -792,8 +795,6 @@ space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
        sm->sm_shift = shift;
        sm->sm_os = os;
        sm->sm_object = object;
-       sm->sm_length = 0;
-       sm->sm_alloc = 0;
        sm->sm_blksz = 0;
        sm->sm_dbuf = NULL;
        sm->sm_phys = NULL;
@@ -870,23 +871,10 @@ space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx)
        }
 
        dmu_buf_will_dirty(sm->sm_dbuf, tx);
-       sm->sm_phys->smp_objsize = 0;
+       sm->sm_phys->smp_length = 0;
        sm->sm_phys->smp_alloc = 0;
 }
 
-/*
- * Update the in-core space_map allocation and length values.
- */
-void
-space_map_update(space_map_t *sm)
-{
-       if (sm == NULL)
-               return;
-
-       sm->sm_alloc = sm->sm_phys->smp_alloc;
-       sm->sm_length = sm->sm_phys->smp_objsize;
-}
-
 uint64_t
 space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
 {
@@ -1068,32 +1056,14 @@ space_map_object(space_map_t *sm)
        return (sm != NULL ? sm->sm_object : 0);
 }
 
-/*
- * Returns the already synced, on-disk allocated space.
- */
-uint64_t
+int64_t
 space_map_allocated(space_map_t *sm)
 {
-       return (sm != NULL ? sm->sm_alloc : 0);
+       return (sm != NULL ? sm->sm_phys->smp_alloc : 0);
 }
 
-/*
- * Returns the already synced, on-disk length;
- */
 uint64_t
 space_map_length(space_map_t *sm)
 {
-       return (sm != NULL ? sm->sm_length : 0);
-}
-
-/*
- * Returns the allocated space that is currently syncing.
- */
-int64_t
-space_map_alloc_delta(space_map_t *sm)
-{
-       if (sm == NULL)
-               return (0);
-       ASSERT(sm->sm_dbuf != NULL);
-       return (sm->sm_phys->smp_alloc - space_map_allocated(sm));
+       return (sm != NULL ? sm->sm_phys->smp_length : 0);
 }
index 7add0d6e6cb1c778d2bc6a708b7a2938759736f2..81c34da074fd0a2218b42984bacfaaec0add29ab 100644 (file)
@@ -2701,13 +2701,6 @@ vdev_dtl_load(vdev_t *vd)
                ASSERT(vd->vdev_dtl_sm != NULL);
 
                mutex_enter(&vd->vdev_dtl_lock);
-
-               /*
-                * Now that we've opened the space_map we need to update
-                * the in-core DTL.
-                */
-               space_map_update(vd->vdev_dtl_sm);
-
                error = space_map_load(vd->vdev_dtl_sm,
                    vd->vdev_dtl[DTL_MISSING], SM_ALLOC);
                mutex_exit(&vd->vdev_dtl_lock);
@@ -2867,10 +2860,6 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
        }
 
        dmu_tx_commit(tx);
-
-       mutex_enter(&vd->vdev_dtl_lock);
-       space_map_update(vd->vdev_dtl_sm);
-       mutex_exit(&vd->vdev_dtl_lock);
 }
 
 /*
@@ -3042,15 +3031,15 @@ vdev_load(vdev_t *vd)
                                return (error);
                        }
                        ASSERT3P(vd->vdev_checkpoint_sm, !=, NULL);
-                       space_map_update(vd->vdev_checkpoint_sm);
 
                        /*
                         * Since the checkpoint_sm contains free entries
-                        * exclusively we can use sm_alloc to indicate the
-                        * cumulative checkpointed space that has been freed.
+                        * exclusively we can use space_map_allocated() to
+                        * indicate the cumulative checkpointed space that
+                        * has been freed.
                         */
                        vd->vdev_stat.vs_checkpoint_space =
-                           -vd->vdev_checkpoint_sm->sm_alloc;
+                           -space_map_allocated(vd->vdev_checkpoint_sm);
                        vd->vdev_spa->spa_checkpoint_info.sci_dspace +=
                            vd->vdev_stat.vs_checkpoint_space;
                } else if (error != 0) {
@@ -3088,7 +3077,6 @@ vdev_load(vdev_t *vd)
                            (u_longlong_t)obsolete_sm_object, error);
                        return (error);
                }
-               space_map_update(vd->vdev_obsolete_sm);
        } else if (error != 0) {
                vdev_dbgmsg(vd, "vdev_load: failed to retrieve obsolete "
                    "space map object from vdev ZAP [error=%d]", error);
@@ -3519,8 +3507,8 @@ top:
                         */
                        if (error == 0 &&
                            tvd->vdev_checkpoint_sm != NULL) {
-                               ASSERT3U(tvd->vdev_checkpoint_sm->sm_alloc,
-                                   !=, 0);
+                               ASSERT3U(space_map_allocated(
+                                   tvd->vdev_checkpoint_sm), !=, 0);
                                error = ZFS_ERR_CHECKPOINT_EXISTS;
                        }
 
index 2f8268f0fab663b76944593e7353d2c047daf0a1..68dfe83128a7b0e6b96ae7d6ae3ae477ec394651 100644 (file)
@@ -684,7 +684,6 @@ spa_condense_indirect_thread(void *arg, zthr_t *zthr)
 
        VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
            scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
-       space_map_update(prev_obsolete_sm);
        counts = vdev_indirect_mapping_load_obsolete_counts(old_mapping);
        if (prev_obsolete_sm != NULL) {
                vdev_indirect_mapping_load_obsolete_spacemap(old_mapping,
@@ -838,7 +837,6 @@ vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx)
                VERIFY0(space_map_open(&vd->vdev_obsolete_sm,
                    spa->spa_meta_objset, obsolete_sm_object,
                    0, vd->vdev_asize, 0));
-               space_map_update(vd->vdev_obsolete_sm);
        }
 
        ASSERT(vd->vdev_obsolete_sm != NULL);
@@ -847,7 +845,6 @@ vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx)
 
        space_map_write(vd->vdev_obsolete_sm,
            vd->vdev_obsolete_segments, SM_ALLOC, SM_NO_VDEVID, tx);
-       space_map_update(vd->vdev_obsolete_sm);
        range_tree_vacate(vd->vdev_obsolete_segments, NULL, NULL);
 }
 
index c02a4f5a4ce50a106d62b506eaf91958e9731afe..e4d998f09b857a288e145ab3a0ae148c9911b57d 100644 (file)
@@ -560,6 +560,7 @@ vdev_indirect_mapping_load_obsolete_spacemap(vdev_indirect_mapping_t *vim,
        losma.losma_counts = counts;
        losma.losma_vim = vim;
        VERIFY0(space_map_iterate(obsolete_space_sm,
+           space_map_length(obsolete_space_sm),
            load_obsolete_sm_callback, &losma));
 }
 
index e68f23e3f4e90dd4f8d5d67dd6fdb07a2c23479c..a69eca354c1078a4bbf185bd8a88cab4d5620400 100644 (file)
@@ -452,7 +452,7 @@ vdev_initialize_calculate_progress(vdev_t *vd)
                mutex_enter(&msp->ms_lock);
 
                uint64_t ms_free = msp->ms_size -
-                   space_map_allocated(msp->ms_sm);
+                   metaslab_allocated_space(msp);
 
                if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
                        ms_free /= vd->vdev_top->vdev_children;
index 70620499756c871d3a87f0766e248209564364f7..ff39a0a26d6a1a1d9c79f92ce293d3fa668a219e 100644 (file)
@@ -291,15 +291,8 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx)
                if (ms->ms_sm == NULL)
                        continue;
 
-               /*
-                * Sync tasks happen before metaslab_sync(), therefore
-                * smp_alloc and sm_alloc must be the same.
-                */
-               ASSERT3U(space_map_allocated(ms->ms_sm), ==,
-                   ms->ms_sm->sm_phys->smp_alloc);
-
                spa->spa_removing_phys.sr_to_copy +=
-                   space_map_allocated(ms->ms_sm);
+                   metaslab_allocated_space(ms);
 
                /*
                 * Space which we are freeing this txg does not need to
@@ -1443,22 +1436,8 @@ spa_vdev_remove_thread(void *arg)
                 * appropriate action (see free_from_removing_vdev()).
                 */
                if (msp->ms_sm != NULL) {
-                       space_map_t *sm = NULL;
-
-                       /*
-                        * We have to open a new space map here, because
-                        * ms_sm's sm_length and sm_alloc may not reflect
-                        * what's in the object contents, if we are in between
-                        * metaslab_sync() and metaslab_sync_done().
-                        */
-                       VERIFY0(space_map_open(&sm,
-                           spa->spa_dsl_pool->dp_meta_objset,
-                           msp->ms_sm->sm_object, msp->ms_sm->sm_start,
-                           msp->ms_sm->sm_size, msp->ms_sm->sm_shift));
-                       space_map_update(sm);
-                       VERIFY0(space_map_load(sm, svr->svr_allocd_segs,
-                           SM_ALLOC));
-                       space_map_close(sm);
+                       VERIFY0(space_map_load(msp->ms_sm,
+                           svr->svr_allocd_segs, SM_ALLOC));
 
                        range_tree_walk(msp->ms_freeing,
                            range_tree_remove, svr->svr_allocd_segs);
@@ -1681,16 +1660,6 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
                ASSERT0(range_tree_space(msp->ms_freed));
 
                if (msp->ms_sm != NULL) {
-                       /*
-                        * Assert that the in-core spacemap has the same
-                        * length as the on-disk one, so we can use the
-                        * existing in-core spacemap to load it from disk.
-                        */
-                       ASSERT3U(msp->ms_sm->sm_alloc, ==,
-                           msp->ms_sm->sm_phys->smp_alloc);
-                       ASSERT3U(msp->ms_sm->sm_length, ==,
-                           msp->ms_sm->sm_phys->smp_objsize);
-
                        mutex_enter(&svr->svr_lock);
                        VERIFY0(space_map_load(msp->ms_sm,
                            svr->svr_allocd_segs, SM_ALLOC));
@@ -1789,9 +1758,6 @@ spa_vdev_remove_cancel(spa_t *spa)
        return (spa_vdev_remove_cancel_impl(spa));
 }
 
-/*
- * Called every sync pass of every txg if there's a svr.
- */
 void
 svr_sync(spa_t *spa, dmu_tx_t *tx)
 {