]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/dsl_dataset.c
Fix typo/etc in module/zfs/zfs_ctldir.c
[mirror_zfs.git] / module / zfs / dsl_dataset.c
index 0bfc4cd7a24d24c457ab2f3d7ac48fbbbfa3dc8b..966c2cc93d13093baefcef469f0ad3fc033825be 100644 (file)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 RackTop Systems.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -46,6 +46,8 @@
 #include <sys/zfs_context.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
@@ -55,7 +57,7 @@
 #include <sys/dsl_userhold.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/policy.h>
-#include <sys/dmu_send.h>
+#include <sys/dmu_recv.h>
 #include <sys/zio_compress.h>
 #include <zfs_fletcher.h>
 #include <sys/zio_checksum.h>
@@ -82,6 +84,13 @@ int zfs_max_recordsize = 1 * 1024 * 1024;
 
 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
 
+static void dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds,
+    uint64_t obj, dmu_tx_t *tx);
+static void dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds,
+    dmu_tx_t *tx);
+
+static void unload_zfeature(dsl_dataset_t *ds, spa_feature_t f);
+
 extern int spa_asize_inflation;
 
 static zil_header_t zero_zil;
@@ -142,13 +151,16 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
        dsl_dataset_phys(ds)->ds_unique_bytes += used;
 
        if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
-               ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] =
-                   B_TRUE;
+               ds->ds_feature_activation[SPA_FEATURE_LARGE_BLOCKS] =
+                   (void *)B_TRUE;
        }
 
        spa_feature_t f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
-       if (f != SPA_FEATURE_NONE)
-               ds->ds_feature_activation_needed[f] = B_TRUE;
+       if (f != SPA_FEATURE_NONE) {
+               ASSERT3S(spa_feature_table[f].fi_type, ==,
+                   ZFEATURE_TYPE_BOOLEAN);
+               ds->ds_feature_activation[f] = (void *)B_TRUE;
+       }
 
        mutex_exit(&ds->ds_lock);
        dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
@@ -157,11 +169,54 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
            DD_USED_REFRSRV, DD_USED_HEAD, tx);
 }
 
+/*
+ * Called when the specified segment has been remapped, and is thus no
+ * longer referenced in the head dataset.  The vdev must be indirect.
+ *
+ * If the segment is referenced by a snapshot, put it on the remap deadlist.
+ * Otherwise, add this segment to the obsolete spacemap.
+ */
+void
+dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev, uint64_t offset,
+    uint64_t size, uint64_t birth, dmu_tx_t *tx)
+{
+       spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
+
+       ASSERT(dmu_tx_is_syncing(tx));
+       ASSERT(birth <= tx->tx_txg);
+       ASSERT(!ds->ds_is_snapshot);
+
+       if (birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
+               spa_vdev_indirect_mark_obsolete(spa, vdev, offset, size, tx);
+       } else {
+               blkptr_t fakebp;
+               dva_t *dva = &fakebp.blk_dva[0];
+
+               ASSERT(ds != NULL);
+
+               mutex_enter(&ds->ds_remap_deadlist_lock);
+               if (!dsl_dataset_remap_deadlist_exists(ds)) {
+                       dsl_dataset_create_remap_deadlist(ds, tx);
+               }
+               mutex_exit(&ds->ds_remap_deadlist_lock);
+
+               BP_ZERO(&fakebp);
+               fakebp.blk_birth = birth;
+               DVA_SET_VDEV(dva, vdev);
+               DVA_SET_OFFSET(dva, offset);
+               DVA_SET_ASIZE(dva, size);
+
+               dsl_deadlist_insert(&ds->ds_remap_deadlist, &fakebp, tx);
+       }
+}
+
 int
 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
     boolean_t async)
 {
-       int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
+       spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+
+       int used = bp_get_dsize_sync(spa, bp);
        int compressed = BP_GET_PSIZE(bp);
        int uncompressed = BP_GET_UCSIZE(bp);
 
@@ -241,6 +296,72 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
        return (used);
 }
 
+struct feature_type_uint64_array_arg {
+       uint64_t length;
+       uint64_t *array;
+};
+
+static void
+unload_zfeature(dsl_dataset_t *ds, spa_feature_t f)
+{
+       switch (spa_feature_table[f].fi_type) {
+       case ZFEATURE_TYPE_BOOLEAN:
+               break;
+       case ZFEATURE_TYPE_UINT64_ARRAY:
+       {
+               struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f];
+               kmem_free(ftuaa->array, ftuaa->length * sizeof (uint64_t));
+               kmem_free(ftuaa, sizeof (*ftuaa));
+               break;
+       }
+       default:
+               panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
+       }
+}
+
+static int
+load_zfeature(objset_t *mos, dsl_dataset_t *ds, spa_feature_t f)
+{
+       int err = 0;
+       switch (spa_feature_table[f].fi_type) {
+       case ZFEATURE_TYPE_BOOLEAN:
+               err = zap_contains(mos, ds->ds_object,
+                   spa_feature_table[f].fi_guid);
+               if (err == 0) {
+                       ds->ds_feature[f] = (void *)B_TRUE;
+               } else {
+                       ASSERT3U(err, ==, ENOENT);
+                       err = 0;
+               }
+               break;
+       case ZFEATURE_TYPE_UINT64_ARRAY:
+       {
+               uint64_t int_size, num_int;
+               uint64_t *data;
+               err = zap_length(mos, ds->ds_object,
+                   spa_feature_table[f].fi_guid, &int_size, &num_int);
+               if (err != 0) {
+                       ASSERT3U(err, ==, ENOENT);
+                       err = 0;
+                       break;
+               }
+               ASSERT3U(int_size, ==, sizeof (uint64_t));
+               data = kmem_alloc(int_size * num_int, KM_SLEEP);
+               VERIFY0(zap_lookup(mos, ds->ds_object,
+                   spa_feature_table[f].fi_guid, int_size, num_int, data));
+               struct feature_type_uint64_array_arg *ftuaa =
+                   kmem_alloc(sizeof (*ftuaa), KM_SLEEP);
+               ftuaa->length = num_int;
+               ftuaa->array = data;
+               ds->ds_feature[f] = ftuaa;
+               break;
+       }
+       default:
+               panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
+       }
+       return (err);
+}
+
 /*
  * We have to release the fsid syncronously or we risk that a subsequent
  * mount of the same dataset will fail to unique_insert the fsid.  This
@@ -275,18 +396,26 @@ dsl_dataset_evict_async(void *dbu)
        }
 
        bplist_destroy(&ds->ds_pending_deadlist);
-       if (ds->ds_deadlist.dl_os != NULL)
+       if (dsl_deadlist_is_open(&ds->ds_deadlist))
                dsl_deadlist_close(&ds->ds_deadlist);
+       if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))
+               dsl_deadlist_close(&ds->ds_remap_deadlist);
        if (ds->ds_dir)
                dsl_dir_async_rele(ds->ds_dir, ds);
 
        ASSERT(!list_link_active(&ds->ds_synced_link));
 
+       for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+               if (dsl_dataset_feature_is_active(ds, f))
+                       unload_zfeature(ds, f);
+       }
+
        list_destroy(&ds->ds_prop_cbs);
        mutex_destroy(&ds->ds_lock);
        mutex_destroy(&ds->ds_opening_lock);
        mutex_destroy(&ds->ds_sendstream_lock);
-       refcount_destroy(&ds->ds_longholds);
+       mutex_destroy(&ds->ds_remap_deadlist_lock);
+       zfs_refcount_destroy(&ds->ds_longholds);
        rrw_destroy(&ds->ds_bp_rwlock);
 
        kmem_free(ds, sizeof (dsl_dataset_t));
@@ -385,8 +514,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
 }
 
 int
-dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
-    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
+dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
+    dsl_dataset_t **dsp)
 {
        objset_t *mos = dp->dp_meta_objset;
        dmu_buf_t *dbuf;
@@ -417,15 +546,23 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
                ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
                list_link_init(&ds->ds_synced_link);
 
+               err = dsl_dir_hold_obj(dp, dsl_dataset_phys(ds)->ds_dir_obj,
+                   NULL, ds, &ds->ds_dir);
+               if (err != 0) {
+                       kmem_free(ds, sizeof (dsl_dataset_t));
+                       dmu_buf_rele(dbuf, tag);
+                       return (err);
+               }
+
                mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
                mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
                mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
+               mutex_init(&ds->ds_remap_deadlist_lock,
+                   NULL, MUTEX_DEFAULT, NULL);
                rrw_init(&ds->ds_bp_rwlock, B_FALSE);
-               refcount_create(&ds->ds_longholds);
+               zfs_refcount_create(&ds->ds_longholds);
 
                bplist_create(&ds->ds_pending_deadlist);
-               dsl_deadlist_open(&ds->ds_deadlist,
-                   mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
 
                list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
                    offsetof(dmu_sendarg_t, dsa_link));
@@ -440,31 +577,10 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
                                if (!(spa_feature_table[f].fi_flags &
                                    ZFEATURE_FLAG_PER_DATASET))
                                        continue;
-                               err = zap_contains(mos, dsobj,
-                                   spa_feature_table[f].fi_guid);
-                               if (err == 0) {
-                                       ds->ds_feature_inuse[f] = B_TRUE;
-                               } else {
-                                       ASSERT3U(err, ==, ENOENT);
-                                       err = 0;
-                               }
+                               err = load_zfeature(mos, ds, f);
                        }
                }
 
-               err = dsl_dir_hold_obj(dp,
-                   dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, &ds->ds_dir);
-               if (err != 0) {
-                       mutex_destroy(&ds->ds_lock);
-                       mutex_destroy(&ds->ds_opening_lock);
-                       mutex_destroy(&ds->ds_sendstream_lock);
-                       refcount_destroy(&ds->ds_longholds);
-                       bplist_destroy(&ds->ds_pending_deadlist);
-                       dsl_deadlist_close(&ds->ds_deadlist);
-                       kmem_free(ds, sizeof (dsl_dataset_t));
-                       dmu_buf_rele(dbuf, tag);
-                       return (err);
-               }
-
                if (!ds->ds_is_snapshot) {
                        ds->ds_snapname[0] = '\0';
                        if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
@@ -505,6 +621,22 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
                        ds->ds_reserved = ds->ds_quota = 0;
                }
 
+               if (err == 0 && ds->ds_dir->dd_crypto_obj != 0 &&
+                   ds->ds_is_snapshot &&
+                   zap_contains(mos, dsobj, DS_FIELD_IVSET_GUID) != 0) {
+                       dp->dp_spa->spa_errata =
+                           ZPOOL_ERRATA_ZOL_8308_ENCRYPTION;
+               }
+
+               dsl_deadlist_open(&ds->ds_deadlist,
+                   mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
+               uint64_t remap_deadlist_obj =
+                   dsl_dataset_get_remap_deadlist_object(ds);
+               if (remap_deadlist_obj != 0) {
+                       dsl_deadlist_open(&ds->ds_remap_deadlist, mos,
+                           remap_deadlist_obj);
+               }
+
                dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync,
                    dsl_dataset_evict_async, &ds->ds_dbuf);
                if (err == 0)
@@ -513,13 +645,19 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
                if (err != 0 || winner != NULL) {
                        bplist_destroy(&ds->ds_pending_deadlist);
                        dsl_deadlist_close(&ds->ds_deadlist);
+                       if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))
+                               dsl_deadlist_close(&ds->ds_remap_deadlist);
                        if (ds->ds_prev)
                                dsl_dataset_rele(ds->ds_prev, ds);
                        dsl_dir_rele(ds->ds_dir, ds);
+                       list_destroy(&ds->ds_prop_cbs);
+                       list_destroy(&ds->ds_sendstreams);
                        mutex_destroy(&ds->ds_lock);
                        mutex_destroy(&ds->ds_opening_lock);
                        mutex_destroy(&ds->ds_sendstream_lock);
-                       refcount_destroy(&ds->ds_longholds);
+                       mutex_destroy(&ds->ds_remap_deadlist_lock);
+                       zfs_refcount_destroy(&ds->ds_longholds);
+                       rrw_destroy(&ds->ds_bp_rwlock);
                        kmem_free(ds, sizeof (dsl_dataset_t));
                        if (err != 0) {
                                dmu_buf_rele(dbuf, tag);
@@ -541,6 +679,7 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
                        }
                }
        }
+
        ASSERT3P(ds->ds_dbuf, ==, dbuf);
        ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);
        ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||
@@ -548,22 +687,40 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
            dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
        *dsp = ds;
 
-       if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) {
-               err = spa_keystore_create_mapping(dp->dp_spa, ds, ds);
-               if (err != 0) {
-                       dsl_dataset_rele(ds, tag);
-                       return (SET_ERROR(EACCES));
-               }
-       }
-
        return (0);
 }
 
 int
-dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
-    dsl_dataset_t **dsp)
+dsl_dataset_create_key_mapping(dsl_dataset_t *ds)
 {
-       return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp));
+       dsl_dir_t *dd = ds->ds_dir;
+
+       if (dd->dd_crypto_obj == 0)
+               return (0);
+
+       return (spa_keystore_create_mapping(dd->dd_pool->dp_spa,
+           ds, ds, &ds->ds_key_mapping));
+}
+
+int
+dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
+{
+       int err;
+
+       err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
+       if (err != 0)
+               return (err);
+
+       ASSERT3P(*dsp, !=, NULL);
+
+       if (flags & DS_HOLD_FLAG_DECRYPT) {
+               err = dsl_dataset_create_key_mapping(*dsp);
+               if (err != 0)
+                       dsl_dataset_rele(*dsp, tag);
+       }
+
+       return (err);
 }
 
 int
@@ -669,20 +826,20 @@ void
 dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag)
 {
        ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
-       (void) refcount_add(&ds->ds_longholds, tag);
+       (void) zfs_refcount_add(&ds->ds_longholds, tag);
 }
 
 void
 dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag)
 {
-       (void) refcount_remove(&ds->ds_longholds, tag);
+       (void) zfs_refcount_remove(&ds->ds_longholds, tag);
 }
 
 /* Return B_TRUE if there are any long holds on this dataset. */
 boolean_t
 dsl_dataset_long_held(dsl_dataset_t *ds)
 {
-       return (!refcount_is_zero(&ds->ds_longholds));
+       return (!zfs_refcount_is_zero(&ds->ds_longholds));
 }
 
 void
@@ -721,30 +878,39 @@ dsl_dataset_namelen(dsl_dataset_t *ds)
        VERIFY0(dsl_dataset_get_snapname(ds));
        mutex_enter(&ds->ds_lock);
        int len = strlen(ds->ds_snapname);
+       mutex_exit(&ds->ds_lock);
        /* add '@' if ds is a snap */
        if (len > 0)
                len++;
        len += dsl_dir_namelen(ds->ds_dir);
-       mutex_exit(&ds->ds_lock);
        return (len);
 }
 
 void
-dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
+dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 {
-       if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 &&
-           (flags & DS_HOLD_FLAG_DECRYPT)) {
-               (void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa,
-                   ds->ds_object, ds);
-       }
-
        dmu_buf_rele(ds->ds_dbuf, tag);
 }
 
 void
-dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
+dsl_dataset_remove_key_mapping(dsl_dataset_t *ds)
 {
-       dsl_dataset_rele_flags(ds, 0, tag);
+       dsl_dir_t *dd = ds->ds_dir;
+
+       if (dd == NULL || dd->dd_crypto_obj == 0)
+               return;
+
+       (void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa,
+           ds->ds_object, ds);
+}
+
+void
+dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
+{
+       if (flags & DS_HOLD_FLAG_DECRYPT)
+               dsl_dataset_remove_key_mapping(ds);
+
+       dsl_dataset_rele(ds, tag);
 }
 
 void
@@ -786,8 +952,55 @@ dsl_dataset_has_owner(dsl_dataset_t *ds)
        return (rv);
 }
 
+static boolean_t
+zfeature_active(spa_feature_t f, void *arg)
+{
+       switch (spa_feature_table[f].fi_type) {
+       case ZFEATURE_TYPE_BOOLEAN: {
+               boolean_t val = (boolean_t)arg;
+               ASSERT(val == B_FALSE || val == B_TRUE);
+               return (val);
+       }
+       case ZFEATURE_TYPE_UINT64_ARRAY:
+               /*
+                * In this case, arg is a uint64_t array.  The feature is active
+                * if the array is non-null.
+                */
+               return (arg != NULL);
+       default:
+               panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
+               return (B_FALSE);
+       }
+}
+
+boolean_t
+dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f)
+{
+       return (zfeature_active(f, ds->ds_feature[f]));
+}
+
+/*
+ * The buffers passed out by this function are references to internal buffers;
+ * they should not be freed by callers of this function, and they should not be
+ * used after the dataset has been released.
+ */
+boolean_t
+dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds, spa_feature_t f,
+    uint64_t *outlength, uint64_t **outp)
+{
+       VERIFY(spa_feature_table[f].fi_type & ZFEATURE_TYPE_UINT64_ARRAY);
+       if (!dsl_dataset_feature_is_active(ds, f)) {
+               return (B_FALSE);
+       }
+       struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f];
+       *outp = ftuaa->array;
+       *outlength = ftuaa->length;
+       return (B_TRUE);
+}
+
 void
-dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
+dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, void *arg,
+    dmu_tx_t *tx)
 {
        spa_t *spa = dmu_tx_pool(tx)->dp_spa;
        objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
@@ -798,20 +1011,44 @@ dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
        spa_feature_incr(spa, f, tx);
        dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
 
-       VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
-           sizeof (zero), 1, &zero, tx));
+       switch (spa_feature_table[f].fi_type) {
+       case ZFEATURE_TYPE_BOOLEAN:
+               ASSERT3S((boolean_t)arg, ==, B_TRUE);
+               VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
+                   sizeof (zero), 1, &zero, tx));
+               break;
+       case ZFEATURE_TYPE_UINT64_ARRAY:
+       {
+               struct feature_type_uint64_array_arg *ftuaa = arg;
+               VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
+                   sizeof (uint64_t), ftuaa->length, ftuaa->array, tx));
+               break;
+       }
+       default:
+               panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
+       }
 }
 
 void
-dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
+dsl_dataset_deactivate_feature_impl(dsl_dataset_t *ds, spa_feature_t f,
+    dmu_tx_t *tx)
 {
        spa_t *spa = dmu_tx_pool(tx)->dp_spa;
        objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
+       uint64_t dsobj = ds->ds_object;
 
        VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
 
        VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx));
        spa_feature_decr(spa, f, tx);
+       ds->ds_feature[f] = NULL;
+}
+
+void
+dsl_dataset_deactivate_feature(dsl_dataset_t *ds, spa_feature_t f, dmu_tx_t *tx)
+{
+       unload_zfeature(ds, f);
+       dsl_dataset_deactivate_feature_impl(ds, f, tx);
 }
 
 uint64_t
@@ -875,8 +1112,10 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
                    (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET);
 
                for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
-                       if (origin->ds_feature_inuse[f])
-                               dsl_dataset_activate_feature(dsobj, f, tx);
+                       if (zfeature_active(f, origin->ds_feature[f])) {
+                               dsl_dataset_activate_feature(dsobj, f,
+                                   origin->ds_feature[f], tx);
+                       }
                }
 
                dmu_buf_will_dirty(origin->ds_dbuf, tx);
@@ -1096,20 +1335,19 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
 
        dp = ds->ds_dir->dd_pool;
        if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
+               objset_t *os = ds->ds_objset;
+
                /* up the hold count until we can be written out */
                dmu_buf_add_ref(ds->ds_dbuf, ds);
-       }
-}
 
-boolean_t
-dsl_dataset_is_dirty(dsl_dataset_t *ds)
-{
-       for (int t = 0; t < TXG_SIZE; t++) {
-               if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
-                   ds, t))
-                       return (B_TRUE);
+               /* if this dataset is encrypted, grab a reference to the DCK */
+               if (ds->ds_dir->dd_crypto_obj != 0 &&
+                   !os->os_raw_receive &&
+                   !os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
+                       ASSERT3P(ds->ds_key_mapping, !=, NULL);
+                       key_mapping_add_ref(ds->ds_key_mapping, ds);
+               }
        }
-       return (B_FALSE);
 }
 
 static int
@@ -1407,8 +1645,10 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
        dmu_buf_rele(dbuf, FTAG);
 
        for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
-               if (ds->ds_feature_inuse[f])
-                       dsl_dataset_activate_feature(dsobj, f, tx);
+               if (zfeature_active(f, ds->ds_feature[f])) {
+                       dsl_dataset_activate_feature(dsobj, f,
+                           ds->ds_feature[f], tx);
+               }
        }
 
        ASSERT3U(ds->ds_prev != 0, ==,
@@ -1457,10 +1697,51 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
        dsl_deadlist_add_key(&ds->ds_deadlist,
            dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
 
+       if (dsl_dataset_remap_deadlist_exists(ds)) {
+               uint64_t remap_deadlist_obj =
+                   dsl_dataset_get_remap_deadlist_object(ds);
+               /*
+                * Move the remap_deadlist to the snapshot.  The head
+                * will create a new remap deadlist on demand, from
+                * dsl_dataset_block_remapped().
+                */
+               dsl_dataset_unset_remap_deadlist_object(ds, tx);
+               dsl_deadlist_close(&ds->ds_remap_deadlist);
+
+               dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
+               VERIFY0(zap_add(mos, dsobj, DS_FIELD_REMAP_DEADLIST,
+                   sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj, tx));
+       }
+
+       /*
+        * Create a ivset guid for this snapshot if the dataset is
+        * encrypted. This may be overridden by a raw receive. A
+        * previous implementation of this code did not have this
+        * field as part of the on-disk format for ZFS encryption
+        * (see errata #4). As part of the remediation for this
+        * issue, we ask the user to enable the bookmark_v2 feature
+        * which is now a dependency of the encryption feature. We
+        * use this as a heuristic to determine when the user has
+        * elected to correct any datasets created with the old code.
+        * As a result, we only do this step if the bookmark_v2
+        * feature is enabled, which limits the number of states a
+        * given pool / dataset can be in with regards to terms of
+        * correcting the issue.
+        */
+       if (ds->ds_dir->dd_crypto_obj != 0 &&
+           spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2)) {
+               uint64_t ivset_guid = unique_create();
+
+               dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
+               VERIFY0(zap_add(mos, dsobj, DS_FIELD_IVSET_GUID,
+                   sizeof (ivset_guid), 1, &ivset_guid, tx));
+       }
+
        ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg);
        dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj;
        dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg;
        dsl_dataset_phys(ds)->ds_unique_bytes = 0;
+
        if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
                dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 
@@ -1706,11 +1987,12 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
        dmu_objset_sync(ds->ds_objset, zio, tx);
 
        for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
-               if (ds->ds_feature_activation_needed[f]) {
-                       if (ds->ds_feature_inuse[f])
+               if (zfeature_active(f, ds->ds_feature_activation[f])) {
+                       if (zfeature_active(f, ds->ds_feature[f]))
                                continue;
-                       dsl_dataset_activate_feature(ds->ds_object, f, tx);
-                       ds->ds_feature_inuse[f] = B_TRUE;
+                       dsl_dataset_activate_feature(ds->ds_object, f,
+                           ds->ds_feature_activation[f], tx);
+                       ds->ds_feature[f] = ds->ds_feature_activation[f];
                }
        }
 }
@@ -1736,6 +2018,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
                os->os_synced_dnodes = NULL;
        }
 
+       if (os->os_encrypted)
+               os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
+       else
+               ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]);
+
        ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
 
        dmu_buf_rele(ds->ds_dbuf, ds);
@@ -2541,7 +2828,7 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
        /* must have a most recent snapshot */
        if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {
                dsl_dataset_rele(ds, FTAG);
-               return (SET_ERROR(EINVAL));
+               return (SET_ERROR(ESRCH));
        }
 
        /*
@@ -2561,11 +2848,46 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
         * the latest snapshot is it.
         */
        if (ddra->ddra_tosnap != NULL) {
-               char namebuf[ZFS_MAX_DATASET_NAME_LEN];
+               dsl_dataset_t *snapds;
+
+               /* Check if the target snapshot exists at all. */
+               error = dsl_dataset_hold(dp, ddra->ddra_tosnap, FTAG, &snapds);
+               if (error != 0) {
+                       /*
+                        * ESRCH is used to signal that the target snapshot does
+                        * not exist, while ENOENT is used to report that
+                        * the rolled back dataset does not exist.
+                        * ESRCH is also used to cover other cases where the
+                        * target snapshot is not related to the dataset being
+                        * rolled back such as being in a different pool.
+                        */
+                       if (error == ENOENT || error == EXDEV)
+                               error = SET_ERROR(ESRCH);
+                       dsl_dataset_rele(ds, FTAG);
+                       return (error);
+               }
+               ASSERT(snapds->ds_is_snapshot);
 
-               dsl_dataset_name(ds->ds_prev, namebuf);
-               if (strcmp(namebuf, ddra->ddra_tosnap) != 0)
-                       return (SET_ERROR(EXDEV));
+               /* Check if the snapshot is the latest snapshot indeed. */
+               if (snapds != ds->ds_prev) {
+                       /*
+                        * Distinguish between the case where the only problem
+                        * is intervening snapshots (EEXIST) vs the snapshot
+                        * not being a valid target for rollback (ESRCH).
+                        */
+                       if (snapds->ds_dir == ds->ds_dir ||
+                           (dsl_dir_is_clone(ds->ds_dir) &&
+                           dsl_dir_phys(ds->ds_dir)->dd_origin_obj ==
+                           snapds->ds_object)) {
+                               error = SET_ERROR(EEXIST);
+                       } else {
+                               error = SET_ERROR(ESRCH);
+                       }
+                       dsl_dataset_rele(snapds, FTAG);
+                       dsl_dataset_rele(ds, FTAG);
+                       return (error);
+               }
+               dsl_dataset_rele(snapds, FTAG);
        }
 
        /* must not have any bookmarks after the most recent snapshot */
@@ -2574,8 +2896,10 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
        nvlist_t *bookmarks = fnvlist_alloc();
        error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
        fnvlist_free(proprequest);
-       if (error != 0)
+       if (error != 0) {
+               dsl_dataset_rele(ds, FTAG);
                return (error);
+       }
        for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
            pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
                nvlist_t *valuenv =
@@ -2713,12 +3037,8 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
                return (err);
 
        hds = ddpa->ddpa_clone;
-       snap = list_head(&ddpa->shared_snaps);
-       origin_ds = snap->ds;
        max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1;
 
-       snap = list_head(&ddpa->origin_snaps);
-
        if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) {
                promote_rele(ddpa, FTAG);
                return (SET_ERROR(EXDEV));
@@ -2752,6 +3072,7 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
 
        /* compute origin's new unique space */
        snap = list_tail(&ddpa->clone_snaps);
+       ASSERT(snap != NULL);
        ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,
            origin_ds->ds_object);
        dsl_deadlist_space_range(&snap->ds->ds_deadlist,
@@ -3331,6 +3652,41 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
        return (0);
 }
 
+static void
+dsl_dataset_swap_remap_deadlists(dsl_dataset_t *clone,
+    dsl_dataset_t *origin, dmu_tx_t *tx)
+{
+       uint64_t clone_remap_dl_obj, origin_remap_dl_obj;
+       dsl_pool_t *dp = dmu_tx_pool(tx);
+
+       ASSERT(dsl_pool_sync_context(dp));
+
+       clone_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(clone);
+       origin_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(origin);
+
+       if (clone_remap_dl_obj != 0) {
+               dsl_deadlist_close(&clone->ds_remap_deadlist);
+               dsl_dataset_unset_remap_deadlist_object(clone, tx);
+       }
+       if (origin_remap_dl_obj != 0) {
+               dsl_deadlist_close(&origin->ds_remap_deadlist);
+               dsl_dataset_unset_remap_deadlist_object(origin, tx);
+       }
+
+       if (clone_remap_dl_obj != 0) {
+               dsl_dataset_set_remap_deadlist_object(origin,
+                   clone_remap_dl_obj, tx);
+               dsl_deadlist_open(&origin->ds_remap_deadlist,
+                   dp->dp_meta_objset, clone_remap_dl_obj);
+       }
+       if (origin_remap_dl_obj != 0) {
+               dsl_dataset_set_remap_deadlist_object(clone,
+                   origin_remap_dl_obj, tx);
+               dsl_deadlist_open(&clone->ds_remap_deadlist,
+                   dp->dp_meta_objset, origin_remap_dl_obj);
+       }
+}
+
 void
 dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
     dsl_dataset_t *origin_head, dmu_tx_t *tx)
@@ -3354,31 +3710,31 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
        for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
                if (!(spa_feature_table[f].fi_flags &
                    ZFEATURE_FLAG_PER_DATASET)) {
-                       ASSERT(!clone->ds_feature_inuse[f]);
-                       ASSERT(!origin_head->ds_feature_inuse[f]);
+                       ASSERT(!dsl_dataset_feature_is_active(clone, f));
+                       ASSERT(!dsl_dataset_feature_is_active(origin_head, f));
                        continue;
                }
 
-               boolean_t clone_inuse = clone->ds_feature_inuse[f];
-               boolean_t origin_head_inuse = origin_head->ds_feature_inuse[f];
+               boolean_t clone_inuse = dsl_dataset_feature_is_active(clone, f);
+               void *clone_feature = clone->ds_feature[f];
+               boolean_t origin_head_inuse =
+                   dsl_dataset_feature_is_active(origin_head, f);
+               void *origin_head_feature = origin_head->ds_feature[f];
+
+               if (clone_inuse)
+                       dsl_dataset_deactivate_feature_impl(clone, f, tx);
+               if (origin_head_inuse)
+                       dsl_dataset_deactivate_feature_impl(origin_head, f, tx);
 
                if (clone_inuse) {
-                       dsl_dataset_deactivate_feature(clone->ds_object, f, tx);
-                       clone->ds_feature_inuse[f] = B_FALSE;
-               }
-               if (origin_head_inuse) {
-                       dsl_dataset_deactivate_feature(origin_head->ds_object,
-                           f, tx);
-                       origin_head->ds_feature_inuse[f] = B_FALSE;
-               }
-               if (clone_inuse) {
-                       dsl_dataset_activate_feature(origin_head->ds_object,
-                           f, tx);
-                       origin_head->ds_feature_inuse[f] = B_TRUE;
+                       dsl_dataset_activate_feature(origin_head->ds_object, f,
+                           clone_feature, tx);
+                       origin_head->ds_feature[f] = clone_feature;
                }
                if (origin_head_inuse) {
-                       dsl_dataset_activate_feature(clone->ds_object, f, tx);
-                       clone->ds_feature_inuse[f] = B_TRUE;
+                       dsl_dataset_activate_feature(clone->ds_object, f,
+                           origin_head_feature, tx);
+                       clone->ds_feature[f] = origin_head_feature;
                }
        }
 
@@ -3500,6 +3856,7 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
            dsl_dataset_phys(clone)->ds_deadlist_obj);
        dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset,
            dsl_dataset_phys(origin_head)->ds_deadlist_obj);
+       dsl_dataset_swap_remap_deadlists(clone, origin_head, tx);
 
        dsl_scan_ds_clone_swapped(origin_head, clone, tx);
 
@@ -3670,7 +4027,8 @@ dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
        ddsqra.ddsqra_value = refquota;
 
        return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
-           dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
+           dsl_dataset_set_refquota_sync, &ddsqra, 0,
+           ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
 static int
@@ -3785,8 +4143,8 @@ dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
        ddsqra.ddsqra_value = refreservation;
 
        return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check,
-           dsl_dataset_set_refreservation_sync, &ddsqra,
-           0, ZFS_SPACE_CHECK_NONE));
+           dsl_dataset_set_refreservation_sync, &ddsqra, 0,
+           ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
 /*
@@ -4008,7 +4366,94 @@ dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)
            ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);
 }
 
-#if defined(_KERNEL) && defined(HAVE_SPL)
+uint64_t
+dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds)
+{
+       uint64_t remap_deadlist_obj;
+       int err;
+
+       if (!dsl_dataset_is_zapified(ds))
+               return (0);
+
+       err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object,
+           DS_FIELD_REMAP_DEADLIST, sizeof (remap_deadlist_obj), 1,
+           &remap_deadlist_obj);
+
+       if (err != 0) {
+               VERIFY3S(err, ==, ENOENT);
+               return (0);
+       }
+
+       ASSERT(remap_deadlist_obj != 0);
+       return (remap_deadlist_obj);
+}
+
+boolean_t
+dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds)
+{
+       EQUIV(dsl_deadlist_is_open(&ds->ds_remap_deadlist),
+           dsl_dataset_get_remap_deadlist_object(ds) != 0);
+       return (dsl_deadlist_is_open(&ds->ds_remap_deadlist));
+}
+
+static void
+dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds, uint64_t obj,
+    dmu_tx_t *tx)
+{
+       ASSERT(obj != 0);
+       dsl_dataset_zapify(ds, tx);
+       VERIFY0(zap_add(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object,
+           DS_FIELD_REMAP_DEADLIST, sizeof (obj), 1, &obj, tx));
+}
+
+static void
+dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+       VERIFY0(zap_remove(ds->ds_dir->dd_pool->dp_meta_objset,
+           ds->ds_object, DS_FIELD_REMAP_DEADLIST, tx));
+}
+
+void
+dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+       uint64_t remap_deadlist_object;
+       spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
+
+       ASSERT(dmu_tx_is_syncing(tx));
+       ASSERT(dsl_dataset_remap_deadlist_exists(ds));
+
+       remap_deadlist_object = ds->ds_remap_deadlist.dl_object;
+       dsl_deadlist_close(&ds->ds_remap_deadlist);
+       dsl_deadlist_free(spa_meta_objset(spa), remap_deadlist_object, tx);
+       dsl_dataset_unset_remap_deadlist_object(ds, tx);
+       spa_feature_decr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
+}
+
+void
+dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+       uint64_t remap_deadlist_obj;
+       spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
+
+       ASSERT(dmu_tx_is_syncing(tx));
+       ASSERT(MUTEX_HELD(&ds->ds_remap_deadlist_lock));
+       /*
+        * Currently we only create remap deadlists when there are indirect
+        * vdevs with referenced mappings.
+        */
+       ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL));
+
+       remap_deadlist_obj = dsl_deadlist_clone(
+           &ds->ds_deadlist, UINT64_MAX,
+           dsl_dataset_phys(ds)->ds_prev_snap_obj, tx);
+       dsl_dataset_set_remap_deadlist_object(ds,
+           remap_deadlist_obj, tx);
+       dsl_deadlist_open(&ds->ds_remap_deadlist, spa_meta_objset(spa),
+           remap_deadlist_obj);
+       spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
+}
+
+#if defined(_KERNEL)
 #if defined(_LP64)
 module_param(zfs_max_recordsize, int, 0644);
 MODULE_PARM_DESC(zfs_max_recordsize, "Max allowed record size");