Add zstd support to zfs

[mirror_zfs.git] / module / zfs / zfs_ioctl.c
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c

index 633d738aae1697a99d80bd7ac4c2c172d726d8e9..7f623bb046eafde801ede686647e6ebf41c23819 100644 (file)
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -27,16 +27,19 @@
   * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
   * Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
   * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
   * Copyright (c) 2013 Steven Hartland. All rights reserved.
   * Copyright (c) 2014 Integros [integros.com]
   * Copyright 2016 Toomas Soome <tsoome@me.com>
   * Copyright (c) 2016 Actifio, Inc. All rights reserved.
- * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
- * Copyright (c) 2017 Datto Inc. All rights reserved.
+ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
   * Copyright 2017 RackTop Systems.
   * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
   */
  
  /*
@@ -162,6 +165,7 @@
  #include <sys/cmn_err.h>
  #include <sys/stat.h>
  #include <sys/zfs_ioctl.h>
+#include <sys/zfs_quota.h>
  #include <sys/zfs_vfsops.h>
  #include <sys/zfs_znode.h>
  #include <sys/zap.h>
@@ -176,13 +180,13 @@
  #include <sys/dsl_deleg.h>
  #include <sys/dmu_objset.h>
  #include <sys/dmu_impl.h>
+#include <sys/dmu_redact.h>
  #include <sys/dmu_tx.h>
  #include <sys/sunddi.h>
  #include <sys/policy.h>
  #include <sys/zone.h>
  #include <sys/nvpair.h>
  #include <sys/pathname.h>
-#include <sys/sdt.h>
  #include <sys/fs/zfs.h>
  #include <sys/zfs_ctldir.h>
  #include <sys/zfs_dir.h>
@@ -191,8 +195,12 @@
  #include <sys/dsl_scan.h>
  #include <sys/fm/util.h>
  #include <sys/dsl_crypt.h>
+#include <sys/rrwlock.h>
+#include <sys/zfs_file.h>
  
+#include <sys/dmu_recv.h>
  #include <sys/dmu_send.h>
+#include <sys/dmu_recv.h>
  #include <sys/dsl_destroy.h>
  #include <sys/dsl_bookmark.h>
  #include <sys/dsl_userhold.h>
@@ -200,10 +208,9 @@
  #include <sys/zcp.h>
  #include <sys/zio_checksum.h>
  #include <sys/vdev_removal.h>
-#include <sys/zfs_sysfs.h>
-
-#include <linux/miscdevice.h>
-#include <linux/slab.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_initialize.h>
+#include <sys/vdev_trim.h>
  
  #include "zfs_namecheck.h"
  #include "zfs_prop.h"
@@ -212,70 +219,24 @@
  
  #include <sys/lua/lua.h>
  #include <sys/lua/lauxlib.h>
+#include <sys/zfs_ioctl_impl.h>
+
+kmutex_t zfsdev_state_lock;
+zfsdev_state_t *zfsdev_state_list;
  
  /*
   * Limit maximum nvlist size.  We don't want users passing in insane values
   * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
+ * Defaults to 0=auto which is handled by platform code.
   */
-#define        MAX_NVLIST_SRC_SIZE     KMALLOC_MAX_SIZE
-
-kmutex_t zfsdev_state_lock;
-zfsdev_state_t *zfsdev_state_list;
-
-extern void zfs_init(void);
-extern void zfs_fini(void);
+unsigned long zfs_max_nvlist_src_size = 0;
  
  uint_t zfs_fsyncer_key;
-extern uint_t rrw_tsd_key;
-static uint_t zfs_allow_log_key;
-
-typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
-typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
-typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
-
-/*
- * IOC Keys are used to document and validate user->kernel interface inputs.
- * See zfs_keys_recv_new for an example declaration. Any key name that is not
- * listed will be rejected as input.
- *
- * The keyname 'optional' is always allowed, and must be an nvlist if present.
- * Arguments which older kernels can safely ignore can be placed under the
- * "optional" key.
- *
- * When adding new keys to an existing ioc for new functionality, consider:
- *     - adding an entry into zfs_sysfs.c zfs_features[] list
- *     - updating the libzfs_input_check.c test utility
- *
- * Note: in the ZK_WILDCARDLIST case, the name serves as documentation
- * for the expected name (bookmark, snapshot, property, etc) but there
- * is no validation in the preflight zfs_check_input_nvpairs() check.
- */
-typedef enum {
-       ZK_OPTIONAL = 1 << 0,           /* pair is optional */
-       ZK_WILDCARDLIST = 1 << 1,       /* one or more unspecified key names */
-} ioc_key_flag_t;
+uint_t zfs_allow_log_key;
  
  /* DATA_TYPE_ANY is used when zkey_type can vary. */
  #define        DATA_TYPE_ANY   DATA_TYPE_UNKNOWN
  
-typedef struct zfs_ioc_key {
-       const char      *zkey_name;
-       data_type_t     zkey_type;
-       ioc_key_flag_t  zkey_flags;
-} zfs_ioc_key_t;
-
-typedef enum {
-       NO_NAME,
-       POOL_NAME,
-       DATASET_NAME
-} zfs_ioc_namecheck_t;
-
-typedef enum {
-       POOL_CHECK_NONE         = 1 << 0,
-       POOL_CHECK_SUSPENDED    = 1 << 1,
-       POOL_CHECK_READONLY     = 1 << 2,
-} zfs_ioc_poolcheck_t;
-
  typedef struct zfs_ioc_vec {
         zfs_ioc_legacy_func_t   *zvec_legacy_func;
         zfs_ioc_func_t          *zvec_func;
@@ -342,23 +303,6 @@ history_str_get(zfs_cmd_t *zc)
         return (buf);
  }
  
-/*
- * Check to see if the named dataset is currently defined as bootable
- */
-static boolean_t
-zfs_is_bootfs(const char *name)
-{
-       objset_t *os;
-
-       if (dmu_objset_hold(name, FTAG, &os) == 0) {
-               boolean_t ret;
-               ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
-               dmu_objset_rele(os, FTAG);
-               return (ret);
-       }
-       return (B_FALSE);
-}
-
  /*
   * Return non-zero if the spa version is less than requested version.
   */
@@ -483,7 +427,8 @@ zfs_dozonecheck(const char *dataset, cred_t *cr)
  {
         uint64_t zoned;
  
-       if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
+       if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
+           &zoned, NULL))
                 return (SET_ERROR(ENOENT));
  
         return (zfs_dozonecheck_impl(dataset, zoned, cr));
@@ -494,7 +439,7 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
  {
         uint64_t zoned;
  
-       if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
+       if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
                 return (SET_ERROR(ENOENT));
  
         return (zfs_dozonecheck_impl(dataset, zoned, cr));
@@ -679,8 +624,8 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
                          * limit on things *under* (ie. contained by)
                          * the thing they own.
                          */
-                       if (dsl_prop_get_integer(dsname, "zoned", &zoned,
-                           setpoint))
+                       if (dsl_prop_get_integer(dsname,
+                           zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
                                 return (SET_ERROR(EPERM));
                         if (!zoned || strlen(dsname) <= strlen(setpoint))
                                 return (SET_ERROR(EPERM));
@@ -773,65 +718,16 @@ zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
             ZFS_DELEG_PERM_SEND, cr));
  }
  
-#ifdef HAVE_SMB_SHARE
-/* ARGSUSED */
  static int
-zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
-{
-       vnode_t *vp;
-       int error;
-
-       if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
-           NO_FOLLOW, NULL, &vp)) != 0)
-               return (error);
-
-       /* Now make sure mntpnt and dataset are ZFS */
-
-       if (vp->v_vfsp->vfs_fstype != zfsfstype ||
-           (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
-           zc->zc_name) != 0)) {
-               VN_RELE(vp);
-               return (SET_ERROR(EPERM));
-       }
-
-       VN_RELE(vp);
-       return (dsl_deleg_access(zc->zc_name,
-           ZFS_DELEG_PERM_SHARE, cr));
-}
-#endif /* HAVE_SMB_SHARE */
-
-int
  zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
-#ifdef HAVE_SMB_SHARE
-       if (!INGLOBALZONE(curproc))
-               return (SET_ERROR(EPERM));
-
-       if (secpolicy_nfs(cr) == 0) {
-               return (0);
-       } else {
-               return (zfs_secpolicy_deleg_share(zc, innvl, cr));
-       }
-#else
         return (SET_ERROR(ENOTSUP));
-#endif /* HAVE_SMB_SHARE */
  }
  
-int
+static int
  zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
-#ifdef HAVE_SMB_SHARE
-       if (!INGLOBALZONE(curproc))
-               return (SET_ERROR(EPERM));
-
-       if (secpolicy_smb(cr) == 0) {
-               return (0);
-       } else {
-               return (zfs_secpolicy_deleg_share(zc, innvl, cr));
-       }
-#else
         return (SET_ERROR(ENOTSUP));
-#endif /* HAVE_SMB_SHARE */
  }
  
  static int
@@ -1087,14 +983,6 @@ zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
         return (error);
  }
  
-/* ARGSUSED */
-static int
-zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
-{
-       return (zfs_secpolicy_write_perms(zc->zc_name,
-           ZFS_DELEG_PERM_REMAP, cr));
-}
-
  /* ARGSUSED */
  static int
  zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
@@ -1178,7 +1066,7 @@ zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
   * SYS_CONFIG privilege, which is not available in a local zone.
   */
  /* ARGSUSED */
-static int
+int
  zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
         if (secpolicy_sys_config(cr, B_FALSE) != 0)
@@ -1485,10 +1373,7 @@ getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
         mutex_enter(&os->os_user_ptr_lock);
         *zfvp = dmu_objset_get_user(os);
         /* bump s_active only when non-zero to prevent umount race */
-       if (*zfvp == NULL || (*zfvp)->z_sb == NULL ||
-           !atomic_inc_not_zero(&((*zfvp)->z_sb->s_active))) {
-               error = SET_ERROR(ESRCH);
-       }
+       error = zfs_vfs_ref(zfvp);
         mutex_exit(&os->os_user_ptr_lock);
         return (error);
  }
@@ -1542,8 +1427,8 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
  {
         rrm_exit(&zfsvfs->z_teardown_lock, tag);
  
-       if (zfsvfs->z_sb) {
-               deactivate_super(zfsvfs->z_sb);
+       if (zfs_vfs_held(zfsvfs)) {
+               zfs_vfs_rele(zfsvfs);
         } else {
                 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
                 zfsvfs_free(zfsvfs);
@@ -1559,6 +1444,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
         nvlist_t *zplprops = NULL;
         dsl_crypto_params_t *dcp = NULL;
         char *spa_name = zc->zc_name;
+       boolean_t unload_wkey = B_TRUE;
  
         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
             zc->zc_iflags, &config)))
@@ -1586,11 +1472,8 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
                 if (nvl) {
                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
-                       if (error != 0) {
-                               nvlist_free(config);
-                               nvlist_free(props);
-                               return (error);
-                       }
+                       if (error != 0)
+                               goto pool_props_bad;
                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
                 }
  
@@ -1598,11 +1481,8 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
                     &hidden_args);
                 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
                     rootprops, hidden_args, &dcp);
-               if (error != 0) {
-                       nvlist_free(config);
-                       nvlist_free(props);
-                       return (error);
-               }
+               if (error != 0)
+                       goto pool_props_bad;
                 (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
  
                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -1622,15 +1502,17 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
          * Set the remaining root properties
          */
         if (!error && (error = zfs_set_prop_nvlist(spa_name,
-           ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
+           ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
                 (void) spa_destroy(spa_name);
+               unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
+       }
  
  pool_props_bad:
         nvlist_free(rootprops);
         nvlist_free(zplprops);
         nvlist_free(config);
         nvlist_free(props);
-       dsl_crypto_params_free(dcp, !!error);
+       dsl_crypto_params_free(dcp, unload_wkey && !!error);
  
         return (error);
  }
@@ -2042,8 +1924,9 @@ static int
  zfs_ioc_vdev_attach(zfs_cmd_t *zc)
  {
         spa_t *spa;
-       int replacing = zc->zc_cookie;
         nvlist_t *config;
+       int replacing = zc->zc_cookie;
+       int rebuild = zc->zc_simple;
         int error;
  
         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
@@ -2051,7 +1934,8 @@ zfs_ioc_vdev_attach(zfs_cmd_t *zc)
  
         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
             zc->zc_iflags, &config)) == 0) {
-               error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
+               error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
+                   rebuild);
                 nvlist_free(config);
         }
  
@@ -2159,7 +2043,7 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
                  * which we aren't supposed to do with a
                  * DS_MODE_USER hold, because it could be
                  * inconsistent.  So this is a bit of a workaround...
-                * XXX reading with out owning
+                * XXX reading without owning
                  */
                 if (!zc->zc_objset_stats.dds_inconsistent &&
                     dmu_objset_type(os) == DMU_OST_ZVOL) {
@@ -2361,7 +2245,8 @@ top:
   * inputs:
   * zc_name             name of filesystem
   * zc_cookie           zap cursor
- * zc_nvlist_dst_size  size of buffer for property nvlist
+ * zc_nvlist_src       iteration range nvlist
+ * zc_nvlist_src_size  size of iteration range nvlist
   *
   * outputs:
   * zc_name             name of next snapshot
@@ -2372,12 +2257,27 @@ top:
  static int
  zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
  {
-       objset_t *os;
         int error;
+       objset_t *os, *ossnap;
+       dsl_dataset_t *ds;
+       uint64_t min_txg = 0, max_txg = 0;
+
+       if (zc->zc_nvlist_src_size != 0) {
+               nvlist_t *props = NULL;
+               error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+                   zc->zc_iflags, &props);
+               if (error != 0)
+                       return (error);
+               (void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
+                   &min_txg);
+               (void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
+                   &max_txg);
+               nvlist_free(props);
+       }
  
         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
         if (error != 0) {
-               return (error == ENOENT ? ESRCH : error);
+               return (error == ENOENT ? SET_ERROR(ESRCH) : error);
         }
  
         /*
@@ -2390,26 +2290,52 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
                 return (SET_ERROR(ESRCH));
         }
  
-       error = dmu_snapshot_list_next(os,
-           sizeof (zc->zc_name) - strlen(zc->zc_name),
-           zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
-           NULL);
+       while (error == 0) {
+               if (issig(JUSTLOOKING) && issig(FORREAL)) {
+                       error = SET_ERROR(EINTR);
+                       break;
+               }
  
-       if (error == 0 && !zc->zc_simple) {
-               dsl_dataset_t *ds;
-               dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
+               error = dmu_snapshot_list_next(os,
+                   sizeof (zc->zc_name) - strlen(zc->zc_name),
+                   zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
+                   &zc->zc_cookie, NULL);
+               if (error == ENOENT) {
+                       error = SET_ERROR(ESRCH);
+                       break;
+               } else if (error != 0) {
+                       break;
+               }
  
-               error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
-               if (error == 0) {
-                       objset_t *ossnap;
+               error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
+                   FTAG, &ds);
+               if (error != 0)
+                       break;
+
+               if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
+                   (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
+                       dsl_dataset_rele(ds, FTAG);
+                       /* undo snapshot name append */
+                       *(strchr(zc->zc_name, '@') + 1) = '\0';
+                       /* skip snapshot */
+                       continue;
+               }
+
+               if (zc->zc_simple) {
+                       dsl_dataset_rele(ds, FTAG);
+                       break;
+               }
  
-                       error = dmu_objset_from_ds(ds, &ossnap);
-                       if (error == 0)
-                               error = zfs_ioc_objset_stats_impl(zc, ossnap);
+               if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
+                       dsl_dataset_rele(ds, FTAG);
+                       break;
+               }
+               if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
                         dsl_dataset_rele(ds, FTAG);
+                       break;
                 }
-       } else if (error == ENOENT) {
-               error = SET_ERROR(ESRCH);
+               dsl_dataset_rele(ds, FTAG);
+               break;
         }
  
         dmu_objset_rele(os, FTAG);
@@ -2540,6 +2466,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
         case ZFS_PROP_REFRESERVATION:
                 err = dsl_dataset_set_refreservation(dsname, source, intval);
                 break;
+       case ZFS_PROP_COMPRESSION:
+               err = dsl_dataset_set_compression(dsname, source, intval);
+               /*
+                * Set err to -1 to force the zfs_set_prop_nvlist code down the
+                * default path to set the value in the nvlist.
+                */
+               if (err == 0)
+                       err = -1;
+               break;
         case ZFS_PROP_VOLSIZE:
                 err = zvol_set_volsize(dsname, intval);
                 break;
@@ -2563,7 +2498,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                         zfs_cmd_t *zc;
  
                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
-                       (void) strcpy(zc->zc_name, dsname);
+                       (void) strlcpy(zc->zc_name, dsname,
+                           sizeof (zc->zc_name));
                         (void) zfs_ioc_userspace_upgrade(zc);
                         (void) zfs_ioc_id_quota_upgrade(zc);
                         kmem_free(zc, sizeof (zfs_cmd_t));
@@ -2650,7 +2586,8 @@ retry:
                                 case PROP_TYPE_INDEX:
                                         if (zfs_prop_index_to_string(prop,
                                             intval, &unused) != 0)
-                                               err = SET_ERROR(EINVAL);
+                                               err =
+                                                   SET_ERROR(ZFS_ERR_BADPROP);
                                         break;
                                 default:
                                         cmn_err(CE_PANIC,
@@ -2750,10 +2687,9 @@ retry:
   * Check that all the properties are valid user properties.
   */
  static int
-zfs_check_userprops(const char *fsname, nvlist_t *nvl)
+zfs_check_userprops(nvlist_t *nvl)
  {
         nvpair_t *pair = NULL;
-       int error = 0;
  
         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
                 const char *propname = nvpair_name(pair);
@@ -2762,10 +2698,6 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl)
                     nvpair_type(pair) != DATA_TYPE_STRING)
                         return (SET_ERROR(EINVAL));
  
-               if ((error = zfs_secpolicy_write_perms(fsname,
-                   ZFS_DELEG_PERM_USERPROP, CRED())))
-                       return (error);
-
                 if (strlen(propname) >= ZAP_MAXNAMELEN)
                         return (SET_ERROR(ENAMETOOLONG));
  
@@ -3128,8 +3060,9 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
  
         ASSERT(zplprops != NULL);
  
+       /* parent dataset must be a filesystem */
         if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
-               return (SET_ERROR(EINVAL));
+               return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
  
         /*
          * Pull out creator prop choices, if any.
@@ -3208,15 +3141,11 @@ zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
         uint64_t zplver = ZPL_VERSION;
         objset_t *os = NULL;
         char parentname[ZFS_MAX_DATASET_NAME_LEN];
-       char *cp;
         spa_t *spa;
         uint64_t spa_vers;
         int error;
  
-       (void) strlcpy(parentname, dataset, sizeof (parentname));
-       cp = strrchr(parentname, '/');
-       ASSERT(cp != NULL);
-       cp[0] = '\0';
+       zfs_get_parent(dataset, parentname, sizeof (parentname));
  
         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
                 return (error);
@@ -3380,8 +3309,9 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
  
                         /*
                          * Volumes will return EBUSY and cannot be destroyed
-                        * until all asynchronous minor handling has completed.
-                        * Wait for the spa_zvol_taskq to drain then retry.
+                        * until all asynchronous minor handling (e.g. from
+                        * setting the volmode property) has completed. Wait for
+                        * the spa_zvol_taskq to drain then retry.
                          */
                         error2 = dsl_destroy_head(fsname);
                         while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
@@ -3453,11 +3383,8 @@ static const zfs_ioc_key_t zfs_keys_remap[] = {
  static int
  zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
  {
-       if (strchr(fsname, '@') ||
-           strchr(fsname, '%'))
-               return (SET_ERROR(EINVAL));
-
-       return (dmu_objset_remap_indirects(fsname));
+       /* This IOCTL is no longer supported. */
+       return (0);
  }
  
  /*
@@ -3482,19 +3409,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
         nvpair_t *pair;
  
         (void) nvlist_lookup_nvlist(innvl, "props", &props);
-       if ((error = zfs_check_userprops(poolname, props)) != 0)
-               return (error);
-
         if (!nvlist_empty(props) &&
             zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
                 return (SET_ERROR(ENOTSUP));
+       if ((error = zfs_check_userprops(props)) != 0)
+               return (error);
  
         snaps = fnvlist_lookup_nvlist(innvl, "snaps");
         poollen = strlen(poolname);
         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
             pair = nvlist_next_nvpair(snaps, pair)) {
                 const char *name = nvpair_name(pair);
-               const char *cp = strchr(name, '@');
+               char *cp = strchr(name, '@');
  
                 /*
                  * The snap name must contain an @, and the part after it must
@@ -3511,6 +3437,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
                     (name[poollen] != '/' && name[poollen] != '@'))
                         return (SET_ERROR(EXDEV));
  
+               /*
+                * Check for permission to set the properties on the fs.
+                */
+               if (!nvlist_empty(props)) {
+                       *cp = '\0';
+                       error = zfs_secpolicy_write_perms(name,
+                           ZFS_DELEG_PERM_USERPROP, CRED());
+                       *cp = '@';
+                       if (error != 0)
+                               return (error);
+               }
+
                 /* This must be the only snap of this fs. */
                 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
@@ -3554,7 +3492,7 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
                 return (SET_ERROR(EINVAL));
         (void) tsd_set(zfs_allow_log_key, NULL);
         error = spa_open(poolname, &spa, FTAG);
-       strfree(poolname);
+       kmem_strfree(poolname);
         if (error != 0)
                 return (error);
  
@@ -3570,6 +3508,58 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
         return (error);
  }
  
+/*
+ * This ioctl is used to set the bootenv configuration on the current
+ * pool. This configuration is stored in the second padding area of the label,
+ * and it is used by the GRUB bootloader used on Linux to store the contents
+ * of the grubenv file.  The file is stored as raw ASCII, and is protected by
+ * an embedded checksum.  By default, GRUB will check if the boot filesystem
+ * supports storing the environment data in a special location, and if so,
+ * will invoke filesystem specific logic to retrieve it. This can be overridden
+ * by a variable, should the user so desire.
+ */
+/* ARGSUSED */
+static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
+       {"envmap",      DATA_TYPE_STRING,       0},
+};
+
+static int
+zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       char *envmap;
+       int error;
+       spa_t *spa;
+
+       envmap = fnvlist_lookup_string(innvl, "envmap");
+       if ((error = spa_open(name, &spa, FTAG)) != 0)
+               return (error);
+       spa_vdev_state_enter(spa, SCL_ALL);
+       error = vdev_label_write_bootenv(spa->spa_root_vdev, envmap);
+       (void) spa_vdev_state_exit(spa, NULL, 0);
+       spa_close(spa, FTAG);
+       return (error);
+}
+
+static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
+       /* no nvl keys */
+};
+
+/* ARGSUSED */
+static int
+zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       spa_t *spa;
+       int error;
+
+       if ((error = spa_open(name, &spa, FTAG)) != 0)
+               return (error);
+       spa_vdev_state_enter(spa, SCL_ALL);
+       error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
+       (void) spa_vdev_state_exit(spa, NULL, 0);
+       spa_close(spa, FTAG);
+       return (error);
+}
+
  /*
   * The dp_config_rwlock must not be held when calling this, because the
   * unmount may need to write out data.
@@ -3633,34 +3623,53 @@ zfs_destroy_unmount_origin(const char *fsname)
   */
  static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
         {"snaps",       DATA_TYPE_NVLIST,       0},
-       {"defer",       DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
+       {"defer",       DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
  };
  
  /* ARGSUSED */
  static int
  zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  {
+       int poollen;
         nvlist_t *snaps;
         nvpair_t *pair;
         boolean_t defer;
+       spa_t *spa;
  
         snaps = fnvlist_lookup_nvlist(innvl, "snaps");
         defer = nvlist_exists(innvl, "defer");
  
+       poollen = strlen(poolname);
         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
             pair = nvlist_next_nvpair(snaps, pair)) {
+               const char *name = nvpair_name(pair);
+
+               /*
+                * The snap must be in the specified pool to prevent the
+                * invalid removal of zvol minors below.
+                */
+               if (strncmp(name, poolname, poollen) != 0 ||
+                   (name[poollen] != '/' && name[poollen] != '@'))
+                       return (SET_ERROR(EXDEV));
+
                 zfs_unmount_snap(nvpair_name(pair));
+               if (spa_open(name, &spa, FTAG) == 0) {
+                       zvol_remove_minors(spa, name, B_TRUE);
+                       spa_close(spa, FTAG);
+               }
         }
  
         return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
  }
  
  /*
- * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
- * All bookmarks must be in the same pool.
+ * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
+ * All bookmarks and snapshots must be in the same pool.
+ * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
   *
   * innvl: {
- *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
+ *     new_bookmark1 -> existing_snapshot,
+ *     new_bookmark2 -> existing_bookmark,
   * }
   *
   * outnvl: bookmark -> error code (int32)
@@ -3674,25 +3683,6 @@ static const zfs_ioc_key_t zfs_keys_bookmark[] = {
  static int
  zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  {
-       for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
-           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
-               char *snap_name;
-
-               /*
-                * Verify the snapshot argument.
-                */
-               if (nvpair_value_string(pair, &snap_name) != 0)
-                       return (SET_ERROR(EINVAL));
-
-
-               /* Verify that the keys (bookmarks) are unique */
-               for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
-                   pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
-                       if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
-                               return (SET_ERROR(EINVAL));
-               }
-       }
-
         return (dsl_bookmark_create(innvl, outnvl));
  }
  
@@ -3717,6 +3707,37 @@ zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         return (dsl_get_bookmarks(fsname, innvl, outnvl));
  }
  
+/*
+ * innvl is not used.
+ *
+ * outnvl: {
+ *     property 1, property 2, ...
+ * }
+ *
+ */
+static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
+       /* no nvl keys */
+};
+
+/* ARGSUSED */
+static int
+zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
+    nvlist_t *outnvl)
+{
+       char fsname[ZFS_MAX_DATASET_NAME_LEN];
+       char *bmname;
+
+       bmname = strchr(bookmark, '#');
+       if (bmname == NULL)
+               return (SET_ERROR(EINVAL));
+       bmname++;
+
+       (void) strlcpy(fsname, bookmark, sizeof (fsname));
+       *(strchr(fsname, '#')) = '\0';
+
+       return (dsl_get_bookmark_props(fsname, bmname, outnvl));
+}
+
  /*
   * innvl: {
   *     bookmark name 1, bookmark name 2
@@ -3791,9 +3812,9 @@ zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
         nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
  
         if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
             nvarg, outnvl));
@@ -3833,7 +3854,6 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
  /*
   * inputs:
   * zc_name             name of dataset to destroy
- * zc_objset_type      type of objset
   * zc_defer_destroy    mark for deferred destroy
   *
   * outputs:            none
@@ -3841,9 +3861,17 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
  static int
  zfs_ioc_destroy(zfs_cmd_t *zc)
  {
+       objset_t *os;
+       dmu_objset_type_t ost;
         int err;
  
-       if (zc->zc_objset_type == DMU_OST_ZFS)
+       err = dmu_objset_hold(zc->zc_name, FTAG, &os);
+       if (err != 0)
+               return (err);
+       ost = dmu_objset_type(os);
+       dmu_objset_rele(os, FTAG);
+
+       if (ost == DMU_OST_ZFS)
                 zfs_unmount_snap(zc->zc_name);
  
         if (strchr(zc->zc_name, '@')) {
@@ -3884,37 +3912,323 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
  }
  
  /*
- * fsname is name of dataset to rollback (to most recent snapshot)
- *
- * innvl may contain name of expected target snapshot
+ * innvl: {
+ *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
+ *     "initialize_vdevs": { -> guids to initialize (nvlist)
+ *         "vdev_path_1": vdev_guid_1, (uint64),
+ *         "vdev_path_2": vdev_guid_2, (uint64),
+ *         ...
+ *     },
+ * }
   *
- * outnvl: "target" -> name of most recent snapshot
+ * outnvl: {
+ *     "initialize_vdevs": { -> initialization errors (nvlist)
+ *         "vdev_path_1": errno, see function body for possible errnos (uint64)
+ *         "vdev_path_2": errno, ... (uint64)
+ *         ...
+ *     }
   * }
+ *
+ * EINVAL is returned for an unknown commands or if any of the provided vdev
+ * guids have be specified with a type other than uint64.
   */
-static const zfs_ioc_key_t zfs_keys_rollback[] = {
-       {"target",      DATA_TYPE_STRING,       ZK_OPTIONAL},
+static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
+       {ZPOOL_INITIALIZE_COMMAND,      DATA_TYPE_UINT64,       0},
+       {ZPOOL_INITIALIZE_VDEVS,        DATA_TYPE_NVLIST,       0}
  };
  
-/* ARGSUSED */
  static int
-zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  {
-       zfsvfs_t *zfsvfs;
-       zvol_state_t *zv;
-       char *target = NULL;
-       int error;
+       uint64_t cmd_type;
+       if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
+           &cmd_type) != 0) {
+               return (SET_ERROR(EINVAL));
+       }
  
-       (void) nvlist_lookup_string(innvl, "target", &target);
-       if (target != NULL) {
-               const char *cp = strchr(target, '@');
+       if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
+           cmd_type == POOL_INITIALIZE_START ||
+           cmd_type == POOL_INITIALIZE_SUSPEND)) {
+               return (SET_ERROR(EINVAL));
+       }
  
-               /*
-                * The snap name must contain an @, and the part after it must
-                * contain only valid characters.
-                */
-               if (cp == NULL ||
-                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
-                       return (SET_ERROR(EINVAL));
+       nvlist_t *vdev_guids;
+       if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
+           &vdev_guids) != 0) {
+               return (SET_ERROR(EINVAL));
+       }
+
+       for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
+               uint64_t vdev_guid;
+               if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
+                       return (SET_ERROR(EINVAL));
+               }
+       }
+
+       spa_t *spa;
+       int error = spa_open(poolname, &spa, FTAG);
+       if (error != 0)
+               return (error);
+
+       nvlist_t *vdev_errlist = fnvlist_alloc();
+       int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
+           vdev_errlist);
+
+       if (fnvlist_size(vdev_errlist) > 0) {
+               fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
+                   vdev_errlist);
+       }
+       fnvlist_free(vdev_errlist);
+
+       spa_close(spa, FTAG);
+       return (total_errors > 0 ? EINVAL : 0);
+}
+
+/*
+ * innvl: {
+ *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
+ *     "trim_vdevs": { -> guids to TRIM (nvlist)
+ *         "vdev_path_1": vdev_guid_1, (uint64),
+ *         "vdev_path_2": vdev_guid_2, (uint64),
+ *         ...
+ *     },
+ *     "trim_rate" -> Target TRIM rate in bytes/sec.
+ *     "trim_secure" -> Set to request a secure TRIM.
+ * }
+ *
+ * outnvl: {
+ *     "trim_vdevs": { -> TRIM errors (nvlist)
+ *         "vdev_path_1": errno, see function body for possible errnos (uint64)
+ *         "vdev_path_2": errno, ... (uint64)
+ *         ...
+ *     }
+ * }
+ *
+ * EINVAL is returned for an unknown commands or if any of the provided vdev
+ * guids have be specified with a type other than uint64.
+ */
+static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
+       {ZPOOL_TRIM_COMMAND,    DATA_TYPE_UINT64,               0},
+       {ZPOOL_TRIM_VDEVS,      DATA_TYPE_NVLIST,               0},
+       {ZPOOL_TRIM_RATE,       DATA_TYPE_UINT64,               ZK_OPTIONAL},
+       {ZPOOL_TRIM_SECURE,     DATA_TYPE_BOOLEAN_VALUE,        ZK_OPTIONAL},
+};
+
+static int
+zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       uint64_t cmd_type;
+       if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
+               return (SET_ERROR(EINVAL));
+
+       if (!(cmd_type == POOL_TRIM_CANCEL ||
+           cmd_type == POOL_TRIM_START ||
+           cmd_type == POOL_TRIM_SUSPEND)) {
+               return (SET_ERROR(EINVAL));
+       }
+
+       nvlist_t *vdev_guids;
+       if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
+               return (SET_ERROR(EINVAL));
+
+       for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
+               uint64_t vdev_guid;
+               if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
+                       return (SET_ERROR(EINVAL));
+               }
+       }
+
+       /* Optional, defaults to maximum rate when not provided */
+       uint64_t rate;
+       if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
+               rate = 0;
+
+       /* Optional, defaults to standard TRIM when not provided */
+       boolean_t secure;
+       if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
+           &secure) != 0) {
+               secure = B_FALSE;
+       }
+
+       spa_t *spa;
+       int error = spa_open(poolname, &spa, FTAG);
+       if (error != 0)
+               return (error);
+
+       nvlist_t *vdev_errlist = fnvlist_alloc();
+       int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
+           rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
+
+       if (fnvlist_size(vdev_errlist) > 0)
+               fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
+
+       fnvlist_free(vdev_errlist);
+
+       spa_close(spa, FTAG);
+       return (total_errors > 0 ? EINVAL : 0);
+}
+
+/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a tag is provided, it identifies a particular instance of an activity to
+ * wait for. Currently, this is only valid for use with 'initialize', because
+ * that is the only activity for which there can be multiple instances running
+ * concurrently. In the case of 'initialize', the tag corresponds to the guid of
+ * the vdev on which to wait.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ *     "wait_activity" -> int32_t
+ *     (optional) "wait_tag" -> uint64_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
+       {ZPOOL_WAIT_ACTIVITY,   DATA_TYPE_INT32,                0},
+       {ZPOOL_WAIT_TAG,        DATA_TYPE_UINT64,               ZK_OPTIONAL},
+};
+
+static int
+zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       int32_t activity;
+       uint64_t tag;
+       boolean_t waited;
+       int error;
+
+       if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
+               return (EINVAL);
+
+       if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
+               error = spa_wait_tag(name, activity, tag, &waited);
+       else
+               error = spa_wait(name, activity, &waited);
+
+       if (error == 0)
+               fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
+
+       return (error);
+}
+
+/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ *     "wait_activity" -> int32_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
+       {ZFS_WAIT_ACTIVITY,     DATA_TYPE_INT32,                0},
+};
+
+static int
+zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       int32_t activity;
+       boolean_t waited = B_FALSE;
+       int error;
+       dsl_pool_t *dp;
+       dsl_dir_t *dd;
+       dsl_dataset_t *ds;
+
+       if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
+               return (SET_ERROR(EINVAL));
+
+       if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
+               return (SET_ERROR(EINVAL));
+
+       if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
+               return (error);
+
+       if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
+               dsl_pool_rele(dp, FTAG);
+               return (error);
+       }
+
+       dd = ds->ds_dir;
+       mutex_enter(&dd->dd_activity_lock);
+       dd->dd_activity_waiters++;
+
+       /*
+        * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
+        * aren't evicted while we're waiting. Normally this is prevented by
+        * holding the pool, but we can't do that while we're waiting since
+        * that would prevent TXGs from syncing out. Some of the functionality
+        * of long-holds (e.g. preventing deletion) is unnecessary for this
+        * case, since we would cancel the waiters before proceeding with a
+        * deletion. An alternative mechanism for keeping the dataset around
+        * could be developed but this is simpler.
+        */
+       dsl_dataset_long_hold(ds, FTAG);
+       dsl_pool_rele(dp, FTAG);
+
+       error = dsl_dir_wait(dd, ds, activity, &waited);
+
+       dsl_dataset_long_rele(ds, FTAG);
+       dd->dd_activity_waiters--;
+       if (dd->dd_activity_waiters == 0)
+               cv_signal(&dd->dd_activity_cv);
+       mutex_exit(&dd->dd_activity_lock);
+
+       dsl_dataset_rele(ds, FTAG);
+
+       if (error == 0)
+               fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
+
+       return (error);
+}
+
+/*
+ * fsname is name of dataset to rollback (to most recent snapshot)
+ *
+ * innvl may contain name of expected target snapshot
+ *
+ * outnvl: "target" -> name of most recent snapshot
+ * }
+ */
+static const zfs_ioc_key_t zfs_keys_rollback[] = {
+       {"target",      DATA_TYPE_STRING,       ZK_OPTIONAL},
+};
+
+/* ARGSUSED */
+static int
+zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       zfsvfs_t *zfsvfs;
+       zvol_state_handle_t *zv;
+       char *target = NULL;
+       int error;
+
+       (void) nvlist_lookup_string(innvl, "target", &target);
+       if (target != NULL) {
+               const char *cp = strchr(target, '@');
+
+               /*
+                * The snap name must contain an @, and the part after it must
+                * contain only valid characters.
+                */
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
+                       return (SET_ERROR(EINVAL));
         }
  
         if (getzfsvfs(fsname, &zfsvfs) == 0) {
@@ -3930,7 +4244,7 @@ zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
                         resume_err = zfs_resume_fs(zfsvfs, ds);
                         error = error ? error : resume_err;
                 }
-               deactivate_super(zfsvfs->z_sb);
+               zfs_vfs_rele(zfsvfs);
         } else if ((zv = zvol_suspend(fsname)) != NULL) {
                 error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
                     outnvl);
@@ -3949,11 +4263,45 @@ recursive_unmount(const char *fsname, void *arg)
  
         fullname = kmem_asprintf("%s@%s", fsname, snapname);
         zfs_unmount_snap(fullname);
-       strfree(fullname);
+       kmem_strfree(fullname);
  
         return (0);
  }
  
+/*
+ *
+ * snapname is the snapshot to redact.
+ * innvl: {
+ *     "bookname" -> (string)
+ *         shortname of the redaction bookmark to generate
+ *     "snapnv" -> (nvlist, values ignored)
+ *         snapshots to redact snapname with respect to
+ * }
+ *
+ * outnvl is unused
+ */
+
+/* ARGSUSED */
+static const zfs_ioc_key_t zfs_keys_redact[] = {
+       {"bookname",            DATA_TYPE_STRING,       0},
+       {"snapnv",              DATA_TYPE_NVLIST,       0},
+};
+static int
+zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       nvlist_t *redactnvl = NULL;
+       char *redactbook = NULL;
+
+       if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
+               return (SET_ERROR(EINVAL));
+       if (fnvlist_num_pairs(redactnvl) == 0)
+               return (SET_ERROR(ENXIO));
+       if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
+               return (SET_ERROR(EINVAL));
+
+       return (dmu_redact_snap(snapname, redactnvl, redactbook));
+}
+
  /*
   * inputs:
   * zc_name     old name of dataset
@@ -3965,8 +4313,11 @@ recursive_unmount(const char *fsname, void *arg)
  static int
  zfs_ioc_rename(zfs_cmd_t *zc)
  {
+       objset_t *os;
+       dmu_objset_type_t ost;
         boolean_t recursive = zc->zc_cookie & 1;
         char *at;
+       int err;
  
         /* "zfs rename" from and to ...%recv datasets should both fail */
         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
@@ -3976,6 +4327,12 @@ zfs_ioc_rename(zfs_cmd_t *zc)
             strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
                 return (SET_ERROR(EINVAL));
  
+       err = dmu_objset_hold(zc->zc_name, FTAG, &os);
+       if (err != 0)
+               return (err);
+       ost = dmu_objset_type(os);
+       dmu_objset_rele(os, FTAG);
+
         at = strchr(zc->zc_name, '@');
         if (at != NULL) {
                 /* snaps must be in same fs */
@@ -3984,7 +4341,7 @@ zfs_ioc_rename(zfs_cmd_t *zc)
                 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
                         return (SET_ERROR(EXDEV));
                 *at = '\0';
-               if (zc->zc_objset_type == DMU_OST_ZFS) {
+               if (ost == DMU_OST_ZFS) {
                         error = dmu_objset_find(zc->zc_name,
                             recursive_unmount, at + 1,
                             recursive ? DS_FIND_CHILDREN : 0);
@@ -4009,7 +4366,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
         const char *propname = nvpair_name(pair);
         boolean_t issnap = (strchr(dsname, '@') != NULL);
         zfs_prop_t prop = zfs_name_to_prop(propname);
-       uint64_t intval;
+       uint64_t intval, compval;
         int err;
  
         if (prop == ZPROP_INVAL) {
@@ -4091,19 +4448,20 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                  * we'll catch them later.
                  */
                 if (nvpair_value_uint64(pair, &intval) == 0) {
-                       if (intval >= ZIO_COMPRESS_GZIP_1 &&
-                           intval <= ZIO_COMPRESS_GZIP_9 &&
+                       compval = ZIO_COMPRESS_ALGO(intval);
+                       if (compval >= ZIO_COMPRESS_GZIP_1 &&
+                           compval <= ZIO_COMPRESS_GZIP_9 &&
                             zfs_earlier_version(dsname,
                             SPA_VERSION_GZIP_COMPRESSION)) {
                                 return (SET_ERROR(ENOTSUP));
                         }
  
-                       if (intval == ZIO_COMPRESS_ZLE &&
+                       if (compval == ZIO_COMPRESS_ZLE &&
                             zfs_earlier_version(dsname,
                             SPA_VERSION_ZLE_COMPRESSION))
                                 return (SET_ERROR(ENOTSUP));
  
-                       if (intval == ZIO_COMPRESS_LZ4) {
+                       if (compval == ZIO_COMPRESS_LZ4) {
                                 spa_t *spa;
  
                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
@@ -4117,16 +4475,18 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                                 spa_close(spa, FTAG);
                         }
  
-                       /*
-                        * If this is a bootable dataset then
-                        * verify that the compression algorithm
-                        * is supported for booting. We must return
-                        * something other than ENOTSUP since it
-                        * implies a downrev pool version.
-                        */
-                       if (zfs_is_bootfs(dsname) &&
-                           !BOOTFS_COMPRESS_VALID(intval)) {
-                               return (SET_ERROR(ERANGE));
+                       if (compval == ZIO_COMPRESS_ZSTD) {
+                               spa_t *spa;
+
+                               if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+                                       return (err);
+
+                               if (!spa_feature_is_enabled(spa,
+                                   SPA_FEATURE_ZSTD_COMPRESS)) {
+                                       spa_close(spa, FTAG);
+                                       return (SET_ERROR(ENOTSUP));
+                               }
+                               spa_close(spa, FTAG);
                         }
                 }
                 break;
@@ -4169,16 +4529,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                     intval != ZFS_DNSIZE_LEGACY) {
                         spa_t *spa;
  
-                       /*
-                        * If this is a bootable dataset then
-                        * we don't allow large (>512B) dnodes,
-                        * because GRUB doesn't support them.
-                        */
-                       if (zfs_is_bootfs(dsname) &&
-                           intval != ZFS_DNSIZE_LEGACY) {
-                               return (SET_ERROR(EDOM));
-                       }
-
                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
                                 return (err);
  
@@ -4191,6 +4541,15 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                 }
                 break;
  
+       case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
+               /*
+                * This property could require the allocation classes
+                * feature to be active for setting, however we allow
+                * it so that tests of settable properties succeed.
+                * The CLI will issue a warning in this case.
+                */
+               break;
+
         case ZFS_PROP_SHARESMB:
                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
                         return (SET_ERROR(ENOTSUP));
@@ -4427,7 +4786,16 @@ extract_delay_props(nvlist_t *props)
         return (delayprops);
  }
  
-#ifdef DEBUG
+static void
+zfs_allow_log_destroy(void *arg)
+{
+       char *poolname = arg;
+
+       if (poolname != NULL)
+               kmem_strfree(poolname);
+}
+
+#ifdef ZFS_DEBUG
  static boolean_t zfs_ioc_recv_inject_err;
  #endif
  
@@ -4438,33 +4806,37 @@ static boolean_t zfs_ioc_recv_inject_err;
  static int
  zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
      nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
-    boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record,
-    int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
-    uint64_t *action_handle, nvlist_t **errors)
+    boolean_t resumable, int input_fd,
+    dmu_replay_record_t *begin_record, uint64_t *read_bytes,
+    uint64_t *errflags, nvlist_t **errors)
  {
         dmu_recv_cookie_t drc;
         int error = 0;
         int props_error = 0;
-       offset_t off;
+       offset_t off, noff;
         nvlist_t *local_delayprops = NULL;
         nvlist_t *recv_delayprops = NULL;
         nvlist_t *origprops = NULL; /* existing properties */
         nvlist_t *origrecvd = NULL; /* existing received properties */
         boolean_t first_recvd_props = B_FALSE;
-       file_t *input_fp;
+       boolean_t tofs_was_redacted;
+       zfs_file_t *input_fp;
  
         *read_bytes = 0;
         *errflags = 0;
         *errors = fnvlist_alloc();
+       off = 0;
  
-       input_fp = getf(input_fd);
-       if (input_fp == NULL)
-               return (SET_ERROR(EBADF));
+       if ((error = zfs_file_get(input_fd, &input_fp)))
+               return (error);
  
+       noff = off = zfs_file_off(input_fp);
         error = dmu_recv_begin(tofs, tosnap, begin_record, force,
-           resumable, localprops, hidden_args, origin, &drc);
+           resumable, localprops, hidden_args, origin, &drc, input_fp,
+           &off);
         if (error != 0)
                 goto out;
+       tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
  
         /*
          * Set properties before we receive the stream so that they are applied
@@ -4565,18 +4937,19 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
                 nvlist_free(xprops);
         }
  
-       off = input_fp->f_offset;
-       error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
-           action_handle);
+       error = dmu_recv_stream(&drc, &off);
  
         if (error == 0) {
                 zfsvfs_t *zfsvfs = NULL;
-               zvol_state_t *zv = NULL;
+               zvol_state_handle_t *zv = NULL;
  
                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
                         /* online recv */
                         dsl_dataset_t *ds;
                         int end_err;
+                       boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
+                           begin_record->drr_u.drr_begin.
+                           drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
  
                         ds = dmu_objset_ds(zfsvfs->z_os);
                         error = zfs_suspend_fs(zfsvfs);
@@ -4585,10 +4958,19 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
                          * likely also fail, and clean up after itself.
                          */
                         end_err = dmu_recv_end(&drc, zfsvfs);
-                       if (error == 0)
+                       /*
+                        * If the dataset was not redacted, but we received a
+                        * redacted stream onto it, we need to unmount the
+                        * dataset.  Otherwise, resume the filesystem.
+                        */
+                       if (error == 0 && !drc.drc_newfs &&
+                           stream_is_redacted && !tofs_was_redacted) {
+                               error = zfs_end_fs(zfsvfs, ds);
+                       } else if (error == 0) {
                                 error = zfs_resume_fs(zfsvfs, ds);
+                       }
                         error = error ? error : end_err;
-                       deactivate_super(zfsvfs->z_sb);
+                       zfs_vfs_rele(zfsvfs);
                 } else if ((zv = zvol_suspend(tofs)) != NULL) {
                         error = dmu_recv_end(&drc, zvol_tag(zv));
                         zvol_resume(zv);
@@ -4624,12 +5006,9 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
                 ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
                 nvlist_free(local_delayprops);
         }
+       *read_bytes = off - noff;
  
-       *read_bytes = off - input_fp->f_offset;
-       if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
-               input_fp->f_offset = off;
-
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
         if (zfs_ioc_recv_inject_err) {
                 zfs_ioc_recv_inject_err = B_FALSE;
                 error = 1;
@@ -4729,7 +5108,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
                 nvlist_free(inheritprops);
         }
  out:
-       releasef(input_fd);
+       zfs_file_put(input_fd);
         nvlist_free(origrecvd);
         nvlist_free(origprops);
  
@@ -4750,13 +5129,10 @@ out:
   * zc_cookie           file descriptor to recv from
   * zc_begin_record     the BEGIN record of the stream (not byteswapped)
   * zc_guid             force flag
- * zc_cleanup_fd       cleanup-on-exit file descriptor
- * zc_action_handle    handle for this guid/ds mapping (or zero on first call)
   *
   * outputs:
   * zc_cookie           number of bytes read
   * zc_obj              zprop_errflags_t
- * zc_action_handle    handle for this guid/ds mapping
   * zc_nvlist_dst{_size} error for each unapplied received property
   */
  static int
@@ -4799,8 +5175,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
  
         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
             NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
-           zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
-           &zc->zc_action_handle, &errors);
+           &zc->zc_cookie, &zc->zc_obj, &errors);
         nvlist_free(recvdprops);
         nvlist_free(localprops);
  
@@ -4833,15 +5208,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
   *     "input_fd" -> file descriptor to read stream from (int32)
   *     (optional) "force" -> force flag (value ignored)
   *     (optional) "resumable" -> resumable flag (value ignored)
- *     (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
- *     (optional) "action_handle" -> handle for this guid/ds mapping
+ *     (optional) "cleanup_fd" -> unused
+ *     (optional) "action_handle" -> unused
   *     (optional) "hidden_args" -> { "wkeydata" -> value }
   * }
   *
   * outnvl: {
   *     "read_bytes" -> number of bytes read
   *     "error_flags" -> zprop_errflags_t
- *     "action_handle" -> handle for this guid/ds mapping
   *     "errors" -> error for each unapplied received property (nvlist)
   * }
   */
@@ -4874,11 +5248,9 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         char tofs[ZFS_MAX_DATASET_NAME_LEN];
         boolean_t force;
         boolean_t resumable;
-       uint64_t action_handle = 0;
         uint64_t read_bytes = 0;
         uint64_t errflags = 0;
         int input_fd = -1;
-       int cleanup_fd = -1;
         int error;
  
         snapname = fnvlist_lookup_string(innvl, "snapname");
@@ -4888,7 +5260,7 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
             strchr(snapname, '%'))
                 return (SET_ERROR(EINVAL));
  
-       (void) strcpy(tofs, snapname);
+       (void) strlcpy(tofs, snapname, sizeof (tofs));
         tosnap = strchr(tofs, '@');
         *tosnap++ = '\0';
  
@@ -4906,14 +5278,6 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         force = nvlist_exists(innvl, "force");
         resumable = nvlist_exists(innvl, "resumable");
  
-       error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
-       if (error && error != ENOENT)
-               return (error);
-
-       error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
-       if (error && error != ENOENT)
-               return (error);
-
         /* we still use "props" here for backwards compatibility */
         error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
         if (error && error != ENOENT)
@@ -4928,12 +5292,11 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
                 return (error);
  
         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
-           hidden_args, force, resumable, input_fd, begin_record, cleanup_fd,
-           &read_bytes, &errflags, &action_handle, &errors);
+           hidden_args, force, resumable, input_fd, begin_record,
+           &read_bytes, &errflags, &errors);
  
         fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
         fnvlist_add_uint64(outnvl, "error_flags", errflags);
-       fnvlist_add_uint64(outnvl, "action_handle", action_handle);
         fnvlist_add_nvlist(outnvl, "errors", errors);
  
         nvlist_free(errors);
@@ -4943,6 +5306,51 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         return (error);
  }
  
+typedef struct dump_bytes_io {
+       zfs_file_t      *dbi_fp;
+       caddr_t         dbi_buf;
+       int             dbi_len;
+       int             dbi_err;
+} dump_bytes_io_t;
+
+static void
+dump_bytes_cb(void *arg)
+{
+       dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
+       zfs_file_t *fp;
+       caddr_t buf;
+
+       fp = dbi->dbi_fp;
+       buf = dbi->dbi_buf;
+
+       dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
+}
+
+static int
+dump_bytes(objset_t *os, void *buf, int len, void *arg)
+{
+       dump_bytes_io_t dbi;
+
+       dbi.dbi_fp = arg;
+       dbi.dbi_buf = buf;
+       dbi.dbi_len = len;
+
+#if defined(HAVE_LARGE_STACKS)
+       dump_bytes_cb(&dbi);
+#else
+       /*
+        * The vn_rdwr() call is performed in a taskq to ensure that there is
+        * always enough stack space to write safely to the target filesystem.
+        * The ZIO_TYPE_FREE threads are used because there can be a lot of
+        * them and they are used in vdev_file.c for a similar purpose.
+        */
+       spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
+           ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
+#endif /* HAVE_LARGE_STACKS */
+
+       return (dbi.dbi_err);
+}
+
  /*
   * inputs:
   * zc_name     name of snapshot to send
@@ -4970,6 +5378,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
         boolean_t large_block_ok = (zc->zc_flags & 0x2);
         boolean_t compressok = (zc->zc_flags & 0x4);
         boolean_t rawok = (zc->zc_flags & 0x8);
+       boolean_t savedok = (zc->zc_flags & 0x10);
  
         if (zc->zc_obj != 0) {
                 dsl_pool_t *dp;
@@ -5018,44 +5427,48 @@ zfs_ioc_send(zfs_cmd_t *zc)
                         }
                 }
  
-               error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
-                   &zc->zc_objset_type);
+               error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
+                   compressok || rawok, savedok, &zc->zc_objset_type);
  
                 if (fromsnap != NULL)
                         dsl_dataset_rele(fromsnap, FTAG);
                 dsl_dataset_rele(tosnap, FTAG);
                 dsl_pool_rele(dp, FTAG);
         } else {
-               file_t *fp = getf(zc->zc_cookie);
-               if (fp == NULL)
-                       return (SET_ERROR(EBADF));
+               zfs_file_t *fp;
+               dmu_send_outparams_t out = {0};
+
+               if ((error = zfs_file_get(zc->zc_cookie, &fp)))
+                       return (error);
  
-               off = fp->f_offset;
+               off = zfs_file_off(fp);
+               out.dso_outfunc = dump_bytes;
+               out.dso_arg = fp;
+               out.dso_dryrun = B_FALSE;
                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
-                   zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
-                   zc->zc_cookie, fp->f_vnode, &off);
+                   zc->zc_fromobj, embedok, large_block_ok, compressok,
+                   rawok, savedok, zc->zc_cookie, &off, &out);
  
-               if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-                       fp->f_offset = off;
-               releasef(zc->zc_cookie);
+               zfs_file_put(zc->zc_cookie);
         }
         return (error);
  }
  
  /*
   * inputs:
- * zc_name     name of snapshot on which to report progress
- * zc_cookie   file descriptor of send stream
+ * zc_name             name of snapshot on which to report progress
+ * zc_cookie           file descriptor of send stream
   *
   * outputs:
- * zc_cookie   number of bytes written in send stream thus far
+ * zc_cookie           number of bytes written in send stream thus far
+ * zc_objset_type      logical size of data traversed by send thus far
   */
  static int
  zfs_ioc_send_progress(zfs_cmd_t *zc)
  {
         dsl_pool_t *dp;
         dsl_dataset_t *ds;
-       dmu_sendarg_t *dsp = NULL;
+       dmu_sendstatus_t *dsp = NULL;
         int error;
  
         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
@@ -5079,15 +5492,19 @@ zfs_ioc_send_progress(zfs_cmd_t *zc)
  
         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
             dsp = list_next(&ds->ds_sendstreams, dsp)) {
-               if (dsp->dsa_outfd == zc->zc_cookie &&
-                   dsp->dsa_proc->group_leader == curproc->group_leader)
+               if (dsp->dss_outfd == zc->zc_cookie &&
+                   zfs_proc_is_caller(dsp->dss_proc))
                         break;
         }
  
-       if (dsp != NULL)
-               zc->zc_cookie = *(dsp->dsa_off);
-       else
+       if (dsp != NULL) {
+               zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
+                   0, 0);
+               /* This is the closest thing we have to atomic_read_64. */
+               zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
+       } else {
                 error = SET_ERROR(ENOENT);
+       }
  
         mutex_exit(&ds->ds_sendstream_lock);
         dsl_dataset_rele(ds, FTAG);
@@ -5201,6 +5618,13 @@ zfs_ioc_clear(zfs_cmd_t *zc)
         if (error != 0)
                 return (error);
  
+       /*
+        * If multihost is enabled, resuming I/O is unsafe as another
+        * host may have imported the pool.
+        */
+       if (spa_multihost(spa) && spa_suspended(spa))
+               return (SET_ERROR(EINVAL));
+
         spa_vdev_state_enter(spa, SCL_NONE);
  
         if (zc->zc_guid == 0) {
@@ -5208,9 +5632,10 @@ zfs_ioc_clear(zfs_cmd_t *zc)
         } else {
                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
                 if (vd == NULL) {
-                       (void) spa_vdev_state_exit(spa, NULL, ENODEV);
+                       error = SET_ERROR(ENODEV);
+                       (void) spa_vdev_state_exit(spa, NULL, error);
                         spa_close(spa, FTAG);
-                       return (SET_ERROR(ENODEV));
+                       return (error);
                 }
         }
  
@@ -5241,7 +5666,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
   * outnvl is unused
   */
  static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
-       {"scrub_restart",       DATA_TYPE_BOOLEAN_VALUE,        0},
+       {"scrub_restart",       DATA_TYPE_BOOLEAN_VALUE,        ZK_OPTIONAL},
  };
  
  /* ARGSUSED */
@@ -5250,11 +5675,13 @@ zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
  {
         spa_t *spa;
         int error;
-       boolean_t scrub_restart = B_TRUE;
+       boolean_t rc, scrub_restart = B_TRUE;
  
         if (innvl) {
-               scrub_restart = fnvlist_lookup_boolean_value(innvl,
-                   "scrub_restart");
+               error = nvlist_lookup_boolean_value(innvl,
+                   "scrub_restart", &rc);
+               if (error == 0)
+                       scrub_restart = rc;
         }
  
         error = spa_open(pool, &spa, FTAG);
@@ -5447,7 +5874,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
                 }
                 if (error == 0)
                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
-               deactivate_super(zfsvfs->z_sb);
+               zfs_vfs_rele(zfsvfs);
         } else {
                 /* XXX kind of reading contents without owning */
                 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
@@ -5568,8 +5995,8 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
         if (error == 0)
                 (void) strlcpy(zc->zc_value, snap_name,
                     sizeof (zc->zc_value));
-       strfree(snap_name);
-       strfree(hold_name);
+       kmem_strfree(snap_name);
+       kmem_strfree(hold_name);
         zfs_onexit_fd_rele(zc->zc_cleanup_fd);
         return (error);
  }
@@ -5586,178 +6013,25 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
  static int
  zfs_ioc_diff(zfs_cmd_t *zc)
  {
-       file_t *fp;
+       zfs_file_t *fp;
         offset_t off;
         int error;
  
-       fp = getf(zc->zc_cookie);
-       if (fp == NULL)
-               return (SET_ERROR(EBADF));
-
-       off = fp->f_offset;
+       if ((error = zfs_file_get(zc->zc_cookie, &fp)))
+               return (error);
  
-       error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
+       off = zfs_file_off(fp);
+       error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
  
-       if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-               fp->f_offset = off;
-       releasef(zc->zc_cookie);
+       zfs_file_put(zc->zc_cookie);
  
         return (error);
  }
  
-/*
- * Remove all ACL files in shares dir
- */
-#ifdef HAVE_SMB_SHARE
  static int
-zfs_smb_acl_purge(znode_t *dzp)
+zfs_ioc_smb_acl(zfs_cmd_t *zc)
  {
-       zap_cursor_t    zc;
-       zap_attribute_t zap;
-       zfsvfs_t *zfsvfs = ZTOZSB(dzp);
-       int error;
-
-       for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
-           (error = zap_cursor_retrieve(&zc, &zap)) == 0;
-           zap_cursor_advance(&zc)) {
-               if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
-                   NULL, 0)) != 0)
-                       break;
-       }
-       zap_cursor_fini(&zc);
-       return (error);
-}
-#endif /* HAVE_SMB_SHARE */
-
-static int
-zfs_ioc_smb_acl(zfs_cmd_t *zc)
-{
-#ifdef HAVE_SMB_SHARE
-       vnode_t *vp;
-       znode_t *dzp;
-       vnode_t *resourcevp = NULL;
-       znode_t *sharedir;
-       zfsvfs_t *zfsvfs;
-       nvlist_t *nvlist;
-       char *src, *target;
-       vattr_t vattr;
-       vsecattr_t vsec;
-       int error = 0;
-
-       if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
-           NO_FOLLOW, NULL, &vp)) != 0)
-               return (error);
-
-       /* Now make sure mntpnt and dataset are ZFS */
-
-       if (vp->v_vfsp->vfs_fstype != zfsfstype ||
-           (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
-           zc->zc_name) != 0)) {
-               VN_RELE(vp);
-               return (SET_ERROR(EINVAL));
-       }
-
-       dzp = VTOZ(vp);
-       zfsvfs = ZTOZSB(dzp);
-       ZFS_ENTER(zfsvfs);
-
-       /*
-        * Create share dir if its missing.
-        */
-       mutex_enter(&zfsvfs->z_lock);
-       if (zfsvfs->z_shares_dir == 0) {
-               dmu_tx_t *tx;
-
-               tx = dmu_tx_create(zfsvfs->z_os);
-               dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
-                   ZFS_SHARES_DIR);
-               dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-               error = dmu_tx_assign(tx, TXG_WAIT);
-               if (error != 0) {
-                       dmu_tx_abort(tx);
-               } else {
-                       error = zfs_create_share_dir(zfsvfs, tx);
-                       dmu_tx_commit(tx);
-               }
-               if (error != 0) {
-                       mutex_exit(&zfsvfs->z_lock);
-                       VN_RELE(vp);
-                       ZFS_EXIT(zfsvfs);
-                       return (error);
-               }
-       }
-       mutex_exit(&zfsvfs->z_lock);
-
-       ASSERT(zfsvfs->z_shares_dir);
-       if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
-               VN_RELE(vp);
-               ZFS_EXIT(zfsvfs);
-               return (error);
-       }
-
-       switch (zc->zc_cookie) {
-       case ZFS_SMB_ACL_ADD:
-               vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
-               vattr.va_mode = S_IFREG|0777;
-               vattr.va_uid = 0;
-               vattr.va_gid = 0;
-
-               vsec.vsa_mask = VSA_ACE;
-               vsec.vsa_aclentp = &full_access;
-               vsec.vsa_aclentsz = sizeof (full_access);
-               vsec.vsa_aclcnt = 1;
-
-               error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
-                   &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
-               if (resourcevp)
-                       VN_RELE(resourcevp);
-               break;
-
-       case ZFS_SMB_ACL_REMOVE:
-               error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
-                   NULL, 0);
-               break;
-
-       case ZFS_SMB_ACL_RENAME:
-               if ((error = get_nvlist(zc->zc_nvlist_src,
-                   zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
-                       VN_RELE(vp);
-                       VN_RELE(ZTOV(sharedir));
-                       ZFS_EXIT(zfsvfs);
-                       return (error);
-               }
-               if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
-                   nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
-                   &target)) {
-                       VN_RELE(vp);
-                       VN_RELE(ZTOV(sharedir));
-                       ZFS_EXIT(zfsvfs);
-                       nvlist_free(nvlist);
-                       return (error);
-               }
-               error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
-                   kcred, NULL, 0);
-               nvlist_free(nvlist);
-               break;
-
-       case ZFS_SMB_ACL_PURGE:
-               error = zfs_smb_acl_purge(sharedir);
-               break;
-
-       default:
-               error = SET_ERROR(EINVAL);
-               break;
-       }
-
-       VN_RELE(vp);
-       VN_RELE(ZTOV(sharedir));
-
-       ZFS_EXIT(zfsvfs);
-
-       return (error);
-#else
-       return (SET_ERROR(ENOTSUP));
-#endif /* HAVE_SMB_SHARE */
+       return (SET_ERROR(ENOTSUP));
  }
  
  /*
@@ -5804,13 +6078,13 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
                 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
                 if (error != 0)
-                       return (error);
+                       return (SET_ERROR(error));
         }
  
         error = dsl_dataset_user_hold(holds, minor, errlist);
         if (minor != 0)
                 zfs_onexit_fd_rele(cleanup_fd);
-       return (error);
+       return (SET_ERROR(error));
  }
  
  /*
@@ -5940,8 +6214,8 @@ zfs_ioc_events_seek(zfs_cmd_t *zc)
  
  /*
   * inputs:
- * zc_name             name of new filesystem or snapshot
- * zc_value            full name of old snapshot
+ * zc_name             name of later filesystem or snapshot
+ * zc_value            full name of old snapshot or bookmark
   *
   * outputs:
   * zc_cookie           space in bytes
@@ -5953,7 +6227,7 @@ zfs_ioc_space_written(zfs_cmd_t *zc)
  {
         int error;
         dsl_pool_t *dp;
-       dsl_dataset_t *new, *old;
+       dsl_dataset_t *new;
  
         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
         if (error != 0)
@@ -5963,16 +6237,26 @@ zfs_ioc_space_written(zfs_cmd_t *zc)
                 dsl_pool_rele(dp, FTAG);
                 return (error);
         }
-       error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
-       if (error != 0) {
-               dsl_dataset_rele(new, FTAG);
-               dsl_pool_rele(dp, FTAG);
-               return (error);
-       }
+       if (strchr(zc->zc_value, '#') != NULL) {
+               zfs_bookmark_phys_t bmp;
+               error = dsl_bookmark_lookup(dp, zc->zc_value,
+                   new, &bmp);
+               if (error == 0) {
+                       error = dsl_dataset_space_written_bookmark(&bmp, new,
+                           &zc->zc_cookie,
+                           &zc->zc_objset_type, &zc->zc_perm_action);
+               }
+       } else {
+               dsl_dataset_t *old;
+               error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
  
-       error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
-           &zc->zc_objset_type, &zc->zc_perm_action);
-       dsl_dataset_rele(old, FTAG);
+               if (error == 0) {
+                       error = dsl_dataset_space_written(old, new,
+                           &zc->zc_cookie,
+                           &zc->zc_objset_type, &zc->zc_perm_action);
+                       dsl_dataset_rele(old, FTAG);
+               }
+       }
         dsl_dataset_rele(new, FTAG);
         dsl_pool_rele(dp, FTAG);
         return (error);
@@ -6050,8 +6334,13 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
   *         presence indicates compressed DRR_WRITE records are permitted
   *     (optional) "rawok" -> (value ignored)
   *         presence indicates raw encrypted records should be used.
+ *     (optional) "savedok" -> (value ignored)
+ *         presence indicates we should send a partially received snapshot
   *     (optional) "resume_object" and "resume_offset" -> (uint64)
   *         if present, resume send stream from specified object and offset.
+ *     (optional) "redactbook" -> (string)
+ *         if present, use this bookmark's redaction list to generate a redacted
+ *         send stream
   * }
   *
   * outnvl is unused
@@ -6063,8 +6352,10 @@ static const zfs_ioc_key_t zfs_keys_send_new[] = {
         {"embedok",             DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
         {"compressok",          DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
         {"rawok",               DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
+       {"savedok",             DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
         {"resume_object",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
         {"resume_offset",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
+       {"redactbook",          DATA_TYPE_STRING,       ZK_OPTIONAL},
  };
  
  /* ARGSUSED */
@@ -6075,13 +6366,15 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
         offset_t off;
         char *fromname = NULL;
         int fd;
-       file_t *fp;
+       zfs_file_t *fp;
         boolean_t largeblockok;
         boolean_t embedok;
         boolean_t compressok;
         boolean_t rawok;
+       boolean_t savedok;
         uint64_t resumeobj = 0;
         uint64_t resumeoff = 0;
+       char *redactbook = NULL;
  
         fd = fnvlist_lookup_int32(innvl, "fd");
  
@@ -6091,24 +6384,39 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
         embedok = nvlist_exists(innvl, "embedok");
         compressok = nvlist_exists(innvl, "compressok");
         rawok = nvlist_exists(innvl, "rawok");
+       savedok = nvlist_exists(innvl, "savedok");
  
         (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
         (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
  
-       if ((fp = getf(fd)) == NULL)
-               return (SET_ERROR(EBADF));
+       (void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
  
-       off = fp->f_offset;
-       error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
-           rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
+       if ((error = zfs_file_get(fd, &fp)))
+               return (error);
+
+       off = zfs_file_off(fp);
  
-       if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-               fp->f_offset = off;
+       dmu_send_outparams_t out = {0};
+       out.dso_outfunc = dump_bytes;
+       out.dso_arg = fp;
+       out.dso_dryrun = B_FALSE;
+       error = dmu_send(snapname, fromname, embedok, largeblockok,
+           compressok, rawok, savedok, resumeobj, resumeoff,
+           redactbook, fd, &off, &out);
  
-       releasef(fd);
+       zfs_file_put(fd);
         return (error);
  }
  
+/* ARGSUSED */
+static int
+send_space_sum(objset_t *os, void *buf, int len, void *arg)
+{
+       uint64_t *size = arg;
+       *size += len;
+       return (0);
+}
+
  /*
   * Determine approximately how large a zfs send stream will be -- the number
   * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
@@ -6124,6 +6432,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
   *         presence indicates compressed DRR_WRITE records are permitted
   *     (optional) "rawok" -> (value ignored)
   *         presence indicates raw encrypted records should be used.
+ *     (optional) "fd" -> file descriptor to use as a cookie for progress
+ *         tracking (int32)
   * }
   *
   * outnvl: {
@@ -6137,6 +6447,11 @@ static const zfs_ioc_key_t zfs_keys_send_space[] = {
         {"embedok",             DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
         {"compressok",          DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
         {"rawok",               DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
+       {"fd",                  DATA_TYPE_INT32,        ZK_OPTIONAL},
+       {"redactbook",          DATA_TYPE_STRING,       ZK_OPTIONAL},
+       {"resumeobj",                   DATA_TYPE_UINT64,       ZK_OPTIONAL},
+       {"resumeoff",                   DATA_TYPE_UINT64,       ZK_OPTIONAL},
+       {"bytes",                       DATA_TYPE_UINT64,       ZK_OPTIONAL},
  };
  
  static int
@@ -6144,11 +6459,22 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
  {
         dsl_pool_t *dp;
         dsl_dataset_t *tosnap;
+       dsl_dataset_t *fromsnap = NULL;
         int error;
-       char *fromname;
+       char *fromname = NULL;
+       char *redactlist_book = NULL;
+       boolean_t largeblockok;
+       boolean_t embedok;
         boolean_t compressok;
         boolean_t rawok;
-       uint64_t space;
+       boolean_t savedok;
+       uint64_t space = 0;
+       boolean_t full_estimate = B_FALSE;
+       uint64_t resumeobj = 0;
+       uint64_t resumeoff = 0;
+       uint64_t resume_bytes = 0;
+       int32_t fd = -1;
+       zfs_bookmark_phys_t zbm = {0};
  
         error = dsl_pool_hold(snapname, FTAG, &dp);
         if (error != 0)
@@ -6159,61 +6485,102 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
                 dsl_pool_rele(dp, FTAG);
                 return (error);
         }
+       (void) nvlist_lookup_int32(innvl, "fd", &fd);
  
+       largeblockok = nvlist_exists(innvl, "largeblockok");
+       embedok = nvlist_exists(innvl, "embedok");
         compressok = nvlist_exists(innvl, "compressok");
         rawok = nvlist_exists(innvl, "rawok");
+       savedok = nvlist_exists(innvl, "savedok");
+       boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
+       boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
+           &redactlist_book) == 0);
+
+       (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
+       (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
+       (void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
+
+       if (altbook) {
+               full_estimate = B_TRUE;
+       } else if (from) {
+               if (strchr(fromname, '#')) {
+                       error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
  
-       error = nvlist_lookup_string(innvl, "from", &fromname);
-       if (error == 0) {
-               if (strchr(fromname, '@') != NULL) {
                         /*
-                        * If from is a snapshot, hold it and use the more
-                        * efficient dmu_send_estimate to estimate send space
-                        * size using deadlists.
+                        * dsl_bookmark_lookup() will fail with EXDEV if
+                        * the from-bookmark and tosnap are at the same txg.
+                        * However, it's valid to do a send (and therefore,
+                        * a send estimate) from and to the same time point,
+                        * if the bookmark is redacted (the incremental send
+                        * can change what's redacted on the target).  In
+                        * this case, dsl_bookmark_lookup() fills in zbm
+                        * but returns EXDEV.  Ignore this error.
                          */
-                       dsl_dataset_t *fromsnap;
+                       if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
+                           zbm.zbm_guid ==
+                           dsl_dataset_phys(tosnap)->ds_guid)
+                               error = 0;
+
+                       if (error != 0) {
+                               dsl_dataset_rele(tosnap, FTAG);
+                               dsl_pool_rele(dp, FTAG);
+                               return (error);
+                       }
+                       if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
+                           ZBM_FLAG_HAS_FBN)) {
+                               full_estimate = B_TRUE;
+                       }
+               } else if (strchr(fromname, '@')) {
                         error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
-                       if (error != 0)
-                               goto out;
-                       error = dmu_send_estimate(tosnap, fromsnap,
-                           compressok || rawok, &space);
-                       dsl_dataset_rele(fromsnap, FTAG);
-               } else if (strchr(fromname, '#') != NULL) {
-                       /*
-                        * If from is a bookmark, fetch the creation TXG of the
-                        * snapshot it was created from and use that to find
-                        * blocks that were born after it.
-                        */
-                       zfs_bookmark_phys_t frombm;
+                       if (error != 0) {
+                               dsl_dataset_rele(tosnap, FTAG);
+                               dsl_pool_rele(dp, FTAG);
+                               return (error);
+                       }
  
-                       error = dsl_bookmark_lookup(dp, fromname, tosnap,
-                           &frombm);
-                       if (error != 0)
-                               goto out;
-                       error = dmu_send_estimate_from_txg(tosnap,
-                           frombm.zbm_creation_txg, compressok || rawok,
-                           &space);
+                       if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
+                               full_estimate = B_TRUE;
+                               dsl_dataset_rele(fromsnap, FTAG);
+                       }
                 } else {
                         /*
                          * from is not properly formatted as a snapshot or
                          * bookmark
                          */
-                       error = SET_ERROR(EINVAL);
-                       goto out;
+                       dsl_dataset_rele(tosnap, FTAG);
+                       dsl_pool_rele(dp, FTAG);
+                       return (SET_ERROR(EINVAL));
                 }
-       } else {
+       }
+
+       if (full_estimate) {
+               dmu_send_outparams_t out = {0};
+               offset_t off = 0;
+               out.dso_outfunc = send_space_sum;
+               out.dso_arg = &space;
+               out.dso_dryrun = B_TRUE;
                 /*
-                * If estimating the size of a full send, use dmu_send_estimate.
+                * We have to release these holds so dmu_send can take them.  It
+                * will do all the error checking we need.
                  */
-               error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
-                   &space);
+               dsl_dataset_rele(tosnap, FTAG);
+               dsl_pool_rele(dp, FTAG);
+               error = dmu_send(snapname, fromname, embedok, largeblockok,
+                   compressok, rawok, savedok, resumeobj, resumeoff,
+                   redactlist_book, fd, &off, &out);
+       } else {
+               error = dmu_send_estimate_fast(tosnap, fromsnap,
+                   (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
+                   compressok || rawok, savedok, &space);
+               space -= resume_bytes;
+               if (fromsnap != NULL)
+                       dsl_dataset_rele(fromsnap, FTAG);
+               dsl_dataset_rele(tosnap, FTAG);
+               dsl_pool_rele(dp, FTAG);
         }
  
         fnvlist_add_uint64(outnvl, "space", space);
  
-out:
-       dsl_dataset_rele(tosnap, FTAG);
-       dsl_pool_rele(dp, FTAG);
         return (error);
  }
  
@@ -6415,7 +6782,7 @@ zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
   * See the block comment at the beginning of this file for details on
   * each argument to this function.
   */
-static void
+void
  zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
      zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
      zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
@@ -6451,7 +6818,7 @@ zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
             POOL_NAME, log_history, pool_check);
  }
  
-static void
+void
  zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
      zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
  {
@@ -6536,7 +6903,7 @@ zfs_ioctl_init(void)
             zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
  
         zfs_ioctl_register("remap", ZFS_IOC_REMAP,
-           zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
+           zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
             zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
  
@@ -6574,6 +6941,11 @@ zfs_ioctl_init(void)
             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
             zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
  
+       zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
+           zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
+           POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
+           ARRAY_SIZE(zfs_keys_get_bookmark_props));
+
         zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
             zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
             POOL_NAME,
@@ -6613,6 +6985,11 @@ zfs_ioctl_init(void)
             B_TRUE, zfs_keys_channel_program,
             ARRAY_SIZE(zfs_keys_channel_program));
  
+       zfs_ioctl_register("redact", ZFS_IOC_REDACT,
+           zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
+           zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
+
         zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
             zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
@@ -6625,6 +7002,36 @@ zfs_ioctl_init(void)
             zfs_keys_pool_discard_checkpoint,
             ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
  
+       zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
+           zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
+           zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
+
+       zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
+           zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
+           zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
+
+       zfs_ioctl_register("wait", ZFS_IOC_WAIT,
+           zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+           zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
+
+       zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
+           zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+           zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
+
+       zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
+           zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
+           zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
+
+       zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
+           zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
+           POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
+           zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
+
         /* IOCTLS that use the legacy function signature */
  
         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -6756,15 +7163,14 @@ zfs_ioctl_init(void)
             zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
  
-       /*
-        * ZoL functions
-        */
         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
+
+       zfs_ioctl_init_os();
  }
  
  /*
@@ -6828,7 +7234,7 @@ zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
                         continue;
  
                 if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
-                       /* at least one non-optionial key is expected here */
+                       /* at least one non-optional key is expected here */
                         if (!required_keys_found)
                                 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
                         continue;
@@ -6841,14 +7247,15 @@ zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
         return (0);
  }
  
-int
+static int
  pool_status_check(const char *name, zfs_ioc_namecheck_t type,
      zfs_ioc_poolcheck_t check)
  {
         spa_t *spa;
         int error;
  
-       ASSERT(type == POOL_NAME || type == DATASET_NAME);
+       ASSERT(type == POOL_NAME || type == DATASET_NAME ||
+           type == ENTITY_NAME);
  
         if (check & POOL_CHECK_NONE)
                 return (0);
@@ -6864,6 +7271,41 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type,
         return (error);
  }
  
+int
+zfsdev_getminor(int fd, minor_t *minorp)
+{
+       zfsdev_state_t *zs, *fpd;
+       zfs_file_t *fp;
+       int rc;
+
+       ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
+
+       if ((rc = zfs_file_get(fd, &fp)))
+               return (rc);
+
+       fpd = zfs_file_private(fp);
+       if (fpd == NULL)
+               return (SET_ERROR(EBADF));
+
+       mutex_enter(&zfsdev_state_lock);
+
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+
+               if (zs->zs_minor == -1)
+                       continue;
+
+               if (fpd == zs) {
+                       *minorp = fpd->zs_minor;
+                       mutex_exit(&zfsdev_state_lock);
+                       return (0);
+               }
+       }
+
+       mutex_exit(&zfsdev_state_lock);
+
+       return (SET_ERROR(EBADF));
+}
+
  static void *
  zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
  {
@@ -6896,37 +7338,6 @@ zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
         return (ptr);
  }
  
-int
-zfsdev_getminor(struct file *filp, minor_t *minorp)
-{
-       zfsdev_state_t *zs, *fpd;
-
-       ASSERT(filp != NULL);
-       ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
-
-       fpd = filp->private_data;
-       if (fpd == NULL)
-               return (SET_ERROR(EBADF));
-
-       mutex_enter(&zfsdev_state_lock);
-
-       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
-
-               if (zs->zs_minor == -1)
-                       continue;
-
-               if (fpd == zs) {
-                       *minorp = fpd->zs_minor;
-                       mutex_exit(&zfsdev_state_lock);
-                       return (0);
-               }
-       }
-
-       mutex_exit(&zfsdev_state_lock);
-
-       return (SET_ERROR(EBADF));
-}
-
  /*
   * Find a free minor number.  The zfsdev_state_list is expected to
   * be short since it is only a list of currently open file handles.
@@ -6951,111 +7362,22 @@ zfsdev_minor_alloc(void)
         return (0);
  }
  
-static int
-zfsdev_state_init(struct file *filp)
+long
+zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
  {
-       zfsdev_state_t *zs, *zsprev = NULL;
-       minor_t minor;
-       boolean_t newzs = B_FALSE;
-
-       ASSERT(MUTEX_HELD(&zfsdev_state_lock));
-
-       minor = zfsdev_minor_alloc();
-       if (minor == 0)
-               return (SET_ERROR(ENXIO));
-
-       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
-               if (zs->zs_minor == -1)
-                       break;
-               zsprev = zs;
-       }
-
-       if (!zs) {
-               zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
-               newzs = B_TRUE;
-       }
-
-       zs->zs_file = filp;
-       filp->private_data = zs;
-
-       zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
-       zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
-
-
-       /*
-        * In order to provide for lock-free concurrent read access
-        * to the minor list in zfsdev_get_state_impl(), new entries
-        * must be completely written before linking them into the
-        * list whereas existing entries are already linked; the last
-        * operation must be updating zs_minor (from -1 to the new
-        * value).
-        */
-       if (newzs) {
-               zs->zs_minor = minor;
-               smp_wmb();
-               zsprev->zs_next = zs;
-       } else {
-               smp_wmb();
-               zs->zs_minor = minor;
-       }
-
-       return (0);
-}
-
-static int
-zfsdev_state_destroy(struct file *filp)
-{
-       zfsdev_state_t *zs;
-
-       ASSERT(MUTEX_HELD(&zfsdev_state_lock));
-       ASSERT(filp->private_data != NULL);
-
-       zs = filp->private_data;
-       zs->zs_minor = -1;
-       zfs_onexit_destroy(zs->zs_onexit);
-       zfs_zevent_destroy(zs->zs_zevent);
-
-       return (0);
-}
-
-static int
-zfsdev_open(struct inode *ino, struct file *filp)
-{
-       int error;
-
-       mutex_enter(&zfsdev_state_lock);
-       error = zfsdev_state_init(filp);
-       mutex_exit(&zfsdev_state_lock);
-
-       return (-error);
-}
-
-static int
-zfsdev_release(struct inode *ino, struct file *filp)
-{
-       int error;
-
-       mutex_enter(&zfsdev_state_lock);
-       error = zfsdev_state_destroy(filp);
-       mutex_exit(&zfsdev_state_lock);
-
-       return (-error);
-}
-
-static long
-zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
-{
-       zfs_cmd_t *zc;
-       uint_t vecnum;
-       int error, rc, flag = 0;
+       int error, cmd;
         const zfs_ioc_vec_t *vec;
         char *saved_poolname = NULL;
+       uint64_t max_nvlist_src_size;
+       size_t saved_poolname_len = 0;
         nvlist_t *innvl = NULL;
         fstrans_cookie_t cookie;
  
-       vecnum = cmd - ZFS_IOC_FIRST;
+       cmd = vecnum;
+       error = 0;
         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
-               return (-SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
+               return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
+
         vec = &zfs_ioc_vec[vecnum];
  
         /*
@@ -7063,18 +7385,11 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
          * a normal or legacy handler are registered.
          */
         if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
-               return (-SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
-
-       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
-
-       error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
-       if (error != 0) {
-               error = SET_ERROR(EFAULT);
-               goto out;
-       }
+               return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
  
         zc->zc_iflags = flag & FKIOCTL;
-       if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
+       max_nvlist_src_size = zfs_max_nvlist_src_size_os();
+       if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
                 /*
                  * Make sure the user doesn't pass in an insane value for
                  * zc_nvlist_src_size.  We have to check, since we will end
@@ -7119,10 +7434,18 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                             vec->zvec_namecheck, vec->zvec_pool_check);
                 break;
  
+       case ENTITY_NAME:
+               if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
+                       error = SET_ERROR(EINVAL);
+               } else {
+                       error = pool_status_check(zc->zc_name,
+                           vec->zvec_namecheck, vec->zvec_pool_check);
+               }
+               break;
+
         case NO_NAME:
                 break;
         }
-
         /*
          * Ensure that all input pairs are valid before we pass them down
          * to the lower layers.
@@ -7147,13 +7470,15 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                 goto out;
  
         /* legacy ioctls can modify zc_name */
-       saved_poolname = strdup(zc->zc_name);
-       if (saved_poolname == NULL) {
-               error = SET_ERROR(ENOMEM);
-               goto out;
-       } else {
-               saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
-       }
+       /*
+        * Can't use kmem_strdup() as we might truncate the string and
+        * kmem_strfree() would then free with incorrect size.
+        */
+       saved_poolname_len = strlen(zc->zc_name) + 1;
+       saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
+
+       strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
+       saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
  
         if (vec->zvec_func != NULL) {
                 nvlist_t *outnvl;
@@ -7226,167 +7551,69 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
  
  out:
         nvlist_free(innvl);
-       rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
-       if (error == 0 && rc != 0)
-               error = SET_ERROR(EFAULT);
         if (error == 0 && vec->zvec_allow_log) {
                 char *s = tsd_get(zfs_allow_log_key);
                 if (s != NULL)
-                       strfree(s);
-               (void) tsd_set(zfs_allow_log_key, saved_poolname);
-       } else {
-               if (saved_poolname != NULL)
-                       strfree(saved_poolname);
-       }
-
-       kmem_free(zc, sizeof (zfs_cmd_t));
-       return (-error);
-}
-
-#ifdef CONFIG_COMPAT
-static long
-zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
-{
-       return (zfsdev_ioctl(filp, cmd, arg));
-}
-#else
-#define        zfsdev_compat_ioctl     NULL
-#endif
-
-static const struct file_operations zfsdev_fops = {
-       .open           = zfsdev_open,
-       .release        = zfsdev_release,
-       .unlocked_ioctl = zfsdev_ioctl,
-       .compat_ioctl   = zfsdev_compat_ioctl,
-       .owner          = THIS_MODULE,
-};
-
-static struct miscdevice zfs_misc = {
-       .minor          = ZFS_MINOR,
-       .name           = ZFS_DRIVER,
-       .fops           = &zfsdev_fops,
-};
-
-MODULE_ALIAS_MISCDEV(ZFS_MINOR);
-MODULE_ALIAS("devname:zfs");
-
-static int
-zfs_attach(void)
-{
-       int error;
-
-       mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
-       zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
-       zfsdev_state_list->zs_minor = -1;
-
-       error = misc_register(&zfs_misc);
-       if (error == -EBUSY) {
-               /*
-                * Fallback to dynamic minor allocation in the event of a
-                * collision with a reserved minor in linux/miscdevice.h.
-                * In this case the kernel modules must be manually loaded.
-                */
-               printk(KERN_INFO "ZFS: misc_register() with static minor %d "
-                   "failed %d, retrying with MISC_DYNAMIC_MINOR\n",
-                   ZFS_MINOR, error);
-
-               zfs_misc.minor = MISC_DYNAMIC_MINOR;
-               error = misc_register(&zfs_misc);
+                       kmem_strfree(s);
+               (void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
         }
-
-       if (error)
-               printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
+       if (saved_poolname != NULL)
+               kmem_free(saved_poolname, saved_poolname_len);
  
         return (error);
  }
  
-static void
-zfs_detach(void)
-{
-       zfsdev_state_t *zs, *zsprev = NULL;
-
-       misc_deregister(&zfs_misc);
-       mutex_destroy(&zfsdev_state_lock);
-
-       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
-               if (zsprev)
-                       kmem_free(zsprev, sizeof (zfsdev_state_t));
-               zsprev = zs;
-       }
-       if (zsprev)
-               kmem_free(zsprev, sizeof (zfsdev_state_t));
-}
-
-static void
-zfs_allow_log_destroy(void *arg)
-{
-       char *poolname = arg;
-
-       if (poolname != NULL)
-               strfree(poolname);
-}
-
-#ifdef DEBUG
-#define        ZFS_DEBUG_STR   " (DEBUG mode)"
-#else
-#define        ZFS_DEBUG_STR   ""
-#endif
-
-static int __init
-_init(void)
+int
+zfs_kmod_init(void)
  {
         int error;
  
-       error = -vn_set_pwd("/");
-       if (error) {
-               printk(KERN_NOTICE
-                   "ZFS: Warning unable to set pwd to '/': %d\n", error);
-               return (error);
-       }
-
-       if ((error = -zvol_init()) != 0)
+       if ((error = zvol_init()) != 0)
                 return (error);
  
-       spa_init(FREAD | FWRITE);
+       spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
         zfs_init();
  
         zfs_ioctl_init();
-       zfs_sysfs_init();
  
-       if ((error = zfs_attach()) != 0)
+       mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
+       zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+       zfsdev_state_list->zs_minor = -1;
+
+       if ((error = zfsdev_attach()) != 0)
                 goto out;
  
         tsd_create(&zfs_fsyncer_key, NULL);
         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
         tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
  
-       printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
-           "ZFS pool version %s, ZFS filesystem version %s\n",
-           ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
-           SPA_VERSION_STRING, ZPL_VERSION_STRING);
-#ifndef CONFIG_FS_POSIX_ACL
-       printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
-#endif /* CONFIG_FS_POSIX_ACL */
-
         return (0);
-
  out:
-       zfs_sysfs_fini();
         zfs_fini();
         spa_fini();
-       (void) zvol_fini();
-       printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
-           ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
-           ZFS_DEBUG_STR, error);
+       zvol_fini();
  
         return (error);
  }
  
-static void __exit
-_fini(void)
+void
+zfs_kmod_fini(void)
  {
-       zfs_detach();
-       zfs_sysfs_fini();
+       zfsdev_state_t *zs, *zsnext = NULL;
+
+       zfsdev_detach();
+
+       mutex_destroy(&zfsdev_state_lock);
+
+       for (zs = zfsdev_state_list; zs != NULL; zs = zsnext) {
+               zsnext = zs->zs_next;
+               if (zs->zs_onexit)
+                       zfs_onexit_destroy(zs->zs_onexit);
+               if (zs->zs_zevent)
+                       zfs_zevent_destroy(zs->zs_zevent);
+               kmem_free(zs, sizeof (zfsdev_state_t));
+       }
+
         zfs_fini();
         spa_fini();
         zvol_fini();
@@ -7394,17 +7621,9 @@ _fini(void)
         tsd_destroy(&zfs_fsyncer_key);
         tsd_destroy(&rrw_tsd_key);
         tsd_destroy(&zfs_allow_log_key);
-
-       printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
-           ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
  }
  
-#if defined(_KERNEL)
-module_init(_init);
-module_exit(_fini);
-
-MODULE_DESCRIPTION("ZFS");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
-#endif
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
+    "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
+/* END CSTYLED */