OpenZFS 7614, 9064 - zfs device evacuation/removal

[mirror_zfs.git] / module / zfs / zfs_ioctl.c
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c

index cd7697058983a2349d1c26482ed494cdfd7dfc18..b1ac149b38af200071f6e9663fc6c31921cf76ec 100644 (file)
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -22,14 +22,21 @@
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   * Portions Copyright 2011 Martin Matuska
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
   * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 201i3 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
   * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
   * Copyright (c) 2013 Steven Hartland. All rights reserved.
- * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2017 Datto Inc. All rights reserved.
+ * Copyright 2017 RackTop Systems.
+ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
   */
  
  /*
@@ -152,6 +159,7 @@
  #include <sys/spa.h>
  #include <sys/spa_impl.h>
  #include <sys/vdev.h>
+#include <sys/vdev_impl.h>
  #include <sys/priv_impl.h>
  #include <sys/dmu.h>
  #include <sys/dsl_dir.h>
@@ -178,20 +186,34 @@
  #include <sys/dsl_scan.h>
  #include <sharefs/share.h>
  #include <sys/fm/util.h>
+#include <sys/dsl_crypt.h>
  
  #include <sys/dmu_send.h>
  #include <sys/dsl_destroy.h>
  #include <sys/dsl_bookmark.h>
  #include <sys/dsl_userhold.h>
  #include <sys/zfeature.h>
+#include <sys/zcp.h>
+#include <sys/zio_checksum.h>
+#include <sys/vdev_removal.h>
  
  #include <linux/miscdevice.h>
+#include <linux/slab.h>
  
  #include "zfs_namecheck.h"
  #include "zfs_prop.h"
  #include "zfs_deleg.h"
  #include "zfs_comutil.h"
  
+#include <sys/lua/lua.h>
+#include <sys/lua/lauxlib.h>
+
+/*
+ * Limit maximum nvlist size.  We don't want users passing in insane values
+ * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
+ */
+#define        MAX_NVLIST_SRC_SIZE     KMALLOC_MAX_SIZE
+
  kmutex_t zfsdev_state_lock;
  zfsdev_state_t *zfsdev_state_list;
  
@@ -235,9 +257,18 @@ static const char *userquota_perms[] = {
         ZFS_DELEG_PERM_USERQUOTA,
         ZFS_DELEG_PERM_GROUPUSED,
         ZFS_DELEG_PERM_GROUPQUOTA,
+       ZFS_DELEG_PERM_USEROBJUSED,
+       ZFS_DELEG_PERM_USEROBJQUOTA,
+       ZFS_DELEG_PERM_GROUPOBJUSED,
+       ZFS_DELEG_PERM_GROUPOBJQUOTA,
+       ZFS_DELEG_PERM_PROJECTUSED,
+       ZFS_DELEG_PERM_PROJECTQUOTA,
+       ZFS_DELEG_PERM_PROJECTOBJUSED,
+       ZFS_DELEG_PERM_PROJECTOBJQUOTA,
  };
  
  static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
+static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
  static int zfs_check_settable(const char *name, nvpair_t *property,
      cred_t *cr);
  static int zfs_check_clearable(char *dataset, nvlist_t *props,
@@ -247,55 +278,6 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
  int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
  static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
  
-#if defined(HAVE_DECLARE_EVENT_CLASS)
-void
-__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
-{
-       const char *newfile;
-       size_t size = 4096;
-       char *buf = kmem_alloc(size, KM_SLEEP);
-       char *nl;
-       va_list adx;
-
-       /*
-        * Get rid of annoying prefix to filename.
-        */
-       newfile = strrchr(file, '/');
-       if (newfile != NULL) {
-               newfile = newfile + 1; /* Get rid of leading / */
-       } else {
-               newfile = file;
-       }
-
-       va_start(adx, fmt);
-       (void) vsnprintf(buf, size, fmt, adx);
-       va_end(adx);
-
-       /*
-        * Get rid of trailing newline.
-        */
-       nl = strrchr(buf, '\n');
-       if (nl != NULL)
-               *nl = '\0';
-
-       /*
-        * To get this data enable the zfs__dprintf trace point as shown:
-        *
-        * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
-        * $ echo 1 > /sys/module/zfs/parameters/zfs_flags
-        * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
-        * $ echo 0 > /sys/kernel/debug/tracing/trace
-        *
-        * # Dump the ring buffer.
-        * $ cat /sys/kernel/debug/tracing/trace
-        */
-       DTRACE_PROBE4(zfs__dprintf,
-           char *, newfile, char *, func, int, line, char *, buf);
-
-       kmem_free(buf, size);
-}
-#endif /* HAVE_DECLARE_EVENT_CLASS */
-
  static void
  history_str_free(char *buf)
  {
@@ -502,6 +484,14 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
         dsl_dataset_t *ds;
         dsl_pool_t *dp;
  
+       /*
+        * First do a quick check for root in the global zone, which
+        * is allowed to do all write_perms.  This ensures that zfs_ioc_*
+        * will get to handle nonexistent datasets.
+        */
+       if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
+               return (0);
+
         error = dsl_pool_hold(name, FTAG, &dp);
         if (error != 0)
                 return (error);
@@ -575,7 +565,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
         /*
          * If the existing dataset label is nondefault, check if the
          * dataset is mounted (label cannot be changed while mounted).
-        * Get the zfs_sb_t; if there isn't one, then the dataset isn't
+        * Get the zfsvfs_t; if there isn't one, then the dataset isn't
          * mounted (or isn't a dataset, doesn't exist, ...).
          */
         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
@@ -586,12 +576,12 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
                  * Try to own the dataset; abort if there is any error,
                  * (e.g., already mounted, in use, or other error).
                  */
-               error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
+               error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
                     setsl_tag, &os);
                 if (error != 0)
                         return (SET_ERROR(EPERM));
  
-               dmu_objset_disown(os, setsl_tag);
+               dmu_objset_disown(os, B_TRUE, setsl_tag);
  
                 if (new_default) {
                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
@@ -616,7 +606,7 @@ out_check:
                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
         return (0);
  #else
-       return (ENOTSUP);
+       return (SET_ERROR(ENOTSUP));
  #endif /* HAVE_MLSLABEL */
  }
  
@@ -641,12 +631,14 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
                 break;
  
         case ZFS_PROP_QUOTA:
+       case ZFS_PROP_FILESYSTEM_LIMIT:
+       case ZFS_PROP_SNAPSHOT_LIMIT:
                 if (!INGLOBALZONE(curproc)) {
                         uint64_t zoned;
-                       char setpoint[MAXNAMELEN];
+                       char setpoint[ZFS_MAX_DATASET_NAME_LEN];
                         /*
                          * Unprivileged users are allowed to modify the
-                        * quota on things *under* (ie. contained by)
+                        * limit on things *under* (ie. contained by)
                          * the thing they own.
                          */
                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
@@ -885,7 +877,7 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  int
  zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
  {
-       char    parentname[MAXNAMELEN];
+       char    parentname[ZFS_MAX_DATASET_NAME_LEN];
         int     error;
  
         if ((error = zfs_secpolicy_write_perms(from,
@@ -938,13 +930,13 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
  
         if (error == 0) {
-               char parentname[MAXNAMELEN];
+               char parentname[ZFS_MAX_DATASET_NAME_LEN];
                 dsl_dataset_t *origin = NULL;
                 dsl_dir_t *dd;
                 dd = clone->ds_dir;
  
                 error = dsl_dataset_hold_obj(dd->dd_pool,
-                   dd->dd_phys->dd_origin_obj, FTAG, &origin);
+                   dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
                 if (error != 0) {
                         dsl_dataset_rele(clone, FTAG);
                         dsl_pool_rele(dp, FTAG);
@@ -984,6 +976,13 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
             ZFS_DELEG_PERM_CREATE, cr));
  }
  
+/* ARGSUSED */
+static int
+zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       return (zfs_secpolicy_recv(zc, innvl, cr));
+}
+
  int
  zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
  {
@@ -1030,9 +1029,8 @@ static int
  zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
         int error = 0;
-       nvpair_t *pair;
  
-       for (pair = nvlist_next_nvpair(innvl, NULL);
+       for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
                 char *name = nvpair_name(pair);
                 char *hashp = strchr(name, '#');
@@ -1051,6 +1049,14 @@ zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
         return (error);
  }
  
+/* ARGSUSED */
+static int
+zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       return (zfs_secpolicy_write_perms(zc->zc_name,
+           ZFS_DELEG_PERM_REMAP, cr));
+}
+
  /* ARGSUSED */
  static int
  zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
@@ -1108,7 +1114,7 @@ zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  static int
  zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
-       char    parentname[MAXNAMELEN];
+       char    parentname[ZFS_MAX_DATASET_NAME_LEN];
         int     error;
         char    *origin;
  
@@ -1202,13 +1208,19 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                  * themself, allow it.
                  */
                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
-                   zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
+                   zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
+                   zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
+                   zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
                         if (zc->zc_guid == crgetuid(cr))
                                 return (0);
-               } else {
+               } else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
+                   zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
+                   zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
+                   zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
                         if (groupmember(zc->zc_guid, cr))
                                 return (0);
                 }
+               /* else is for project quota/used */
         }
  
         return (zfs_secpolicy_write_perms(zc->zc_name,
@@ -1251,7 +1263,7 @@ zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  
         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
             pair = nvlist_next_nvpair(holds, pair)) {
-               char fsname[MAXNAMELEN];
+               char fsname[ZFS_MAX_DATASET_NAME_LEN];
                 error = dmu_fsname(nvpair_name(pair), fsname);
                 if (error != 0)
                         return (error);
@@ -1272,7 +1284,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  
         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
             pair = nvlist_next_nvpair(innvl, pair)) {
-               char fsname[MAXNAMELEN];
+               char fsname[ZFS_MAX_DATASET_NAME_LEN];
                 error = dmu_fsname(nvpair_name(pair), fsname);
                 if (error != 0)
                         return (error);
@@ -1311,6 +1323,20 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
         return (error);
  }
  
+static int
+zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       return (zfs_secpolicy_write_perms(zc->zc_name,
+           ZFS_DELEG_PERM_LOAD_KEY, cr));
+}
+
+static int
+zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       return (zfs_secpolicy_write_perms(zc->zc_name,
+           ZFS_DELEG_PERM_CHANGE_KEY, cr));
+}
+
  /*
   * Returns the nvlist as specified by the user in the zfs_cmd_t.
   */
@@ -1332,7 +1358,7 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
             iflag)) != 0) {
                 vmem_free(packed, size);
-               return (error);
+               return (SET_ERROR(EFAULT));
         }
  
         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
@@ -1409,55 +1435,63 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
         return (error);
  }
  
-static int
-get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
+int
+getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
  {
-       objset_t *os;
-       int error;
-
-       error = dmu_objset_hold(dsname, FTAG, &os);
-       if (error != 0)
-               return (error);
+       int error = 0;
         if (dmu_objset_type(os) != DMU_OST_ZFS) {
-               dmu_objset_rele(os, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
         mutex_enter(&os->os_user_ptr_lock);
-       *zsbp = dmu_objset_get_user(os);
-       if (*zsbp && (*zsbp)->z_sb) {
-               atomic_inc(&((*zsbp)->z_sb->s_active));
-       } else {
+       *zfvp = dmu_objset_get_user(os);
+       /* bump s_active only when non-zero to prevent umount race */
+       if (*zfvp == NULL || (*zfvp)->z_sb == NULL ||
+           !atomic_inc_not_zero(&((*zfvp)->z_sb->s_active))) {
                 error = SET_ERROR(ESRCH);
         }
         mutex_exit(&os->os_user_ptr_lock);
+       return (error);
+}
+
+int
+getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
+{
+       objset_t *os;
+       int error;
+
+       error = dmu_objset_hold(dsname, FTAG, &os);
+       if (error != 0)
+               return (error);
+
+       error = getzfsvfs_impl(os, zfvp);
         dmu_objset_rele(os, FTAG);
         return (error);
  }
  
  /*
- * Find a zfs_sb_t for a mounted filesystem, or create our own, in which
+ * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
   * case its z_sb will be NULL, and it will be opened as the owner.
   * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
   * which prevents all inode ops from running.
   */
  static int
-zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
+zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
  {
         int error = 0;
  
-       if (get_zfs_sb(name, zsbp) != 0)
-               error = zfs_sb_create(name, zsbp);
+       if (getzfsvfs(name, zfvp) != 0)
+               error = zfsvfs_create(name, B_FALSE, zfvp);
         if (error == 0) {
-               rrw_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
+               rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
                     RW_READER, tag);
-               if ((*zsbp)->z_unmounted) {
+               if ((*zfvp)->z_unmounted) {
                         /*
                          * XXX we could probably try again, since the unmounting
                          * thread should be just about to disassociate the
-                        * objset from the zsb.
+                        * objset from the zfsvfs.
                          */
-                       rrw_exit(&(*zsbp)->z_teardown_lock, tag);
+                       rrm_exit(&(*zfvp)->z_teardown_lock, tag);
                         return (SET_ERROR(EBUSY));
                 }
         }
@@ -1465,15 +1499,15 @@ zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
  }
  
  static void
-zfs_sb_rele(zfs_sb_t *zsb, void *tag)
+zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
  {
-       rrw_exit(&zsb->z_teardown_lock, tag);
+       rrm_exit(&zfsvfs->z_teardown_lock, tag);
  
-       if (zsb->z_sb) {
-               deactivate_super(zsb->z_sb);
+       if (zfsvfs->z_sb) {
+               deactivate_super(zfsvfs->z_sb);
         } else {
-               dmu_objset_disown(zsb->z_os, zsb);
-               zfs_sb_free(zsb);
+               dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
+               zfsvfs_free(zfsvfs);
         }
  }
  
@@ -1484,6 +1518,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
         nvlist_t *config, *props = NULL;
         nvlist_t *rootprops = NULL;
         nvlist_t *zplprops = NULL;
+       dsl_crypto_params_t *dcp = NULL;
  
         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
             zc->zc_iflags, &config)))
@@ -1498,6 +1533,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
  
         if (props) {
                 nvlist_t *nvl = NULL;
+               nvlist_t *hidden_args = NULL;
                 uint64_t version = SPA_VERSION;
  
                 (void) nvlist_lookup_uint64(props,
@@ -1516,6 +1552,18 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
                         }
                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
                 }
+
+               (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
+                   &hidden_args);
+               error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
+                   rootprops, hidden_args, &dcp);
+               if (error != 0) {
+                       nvlist_free(config);
+                       nvlist_free(props);
+                       return (error);
+               }
+               (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
+
                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
                 error = zfs_fill_zplprops_root(version, rootprops,
                     zplprops, NULL);
@@ -1523,7 +1571,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
                         goto pool_props_bad;
         }
  
-       error = spa_create(zc->zc_name, config, props, zplprops);
+       error = spa_create(zc->zc_name, config, props, zplprops, dcp);
  
         /*
          * Set the remaining root properties
@@ -1537,6 +1585,7 @@ pool_props_bad:
         nvlist_free(zplprops);
         nvlist_free(config);
         nvlist_free(props);
+       dsl_crypto_params_free(dcp, !!error);
  
         return (error);
  }
@@ -1547,8 +1596,7 @@ zfs_ioc_pool_destroy(zfs_cmd_t *zc)
         int error;
         zfs_log_history(zc);
         error = spa_destroy(zc->zc_name);
-       if (error == 0)
-               zvol_remove_minors(zc->zc_name);
+
         return (error);
  }
  
@@ -1584,9 +1632,7 @@ zfs_ioc_pool_import(zfs_cmd_t *zc)
         }
  
         nvlist_free(config);
-
-       if (props)
-               nvlist_free(props);
+       nvlist_free(props);
  
         return (error);
  }
@@ -1600,8 +1646,7 @@ zfs_ioc_pool_export(zfs_cmd_t *zc)
  
         zfs_log_history(zc);
         error = spa_export(zc->zc_name, NULL, force, hardforce);
-       if (error == 0)
-               zvol_remove_minors(zc->zc_name);
+
         return (error);
  }
  
@@ -1664,7 +1709,7 @@ zfs_ioc_pool_stats(zfs_cmd_t *zc)
  static int
  zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
  {
-       nvlist_t *tryconfig, *config;
+       nvlist_t *tryconfig, *config = NULL;
         int error;
  
         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
@@ -1688,6 +1733,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
   * inputs:
   * zc_name              name of the pool
   * zc_cookie            scan func (pool_scan_func_t)
+ * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
   */
  static int
  zfs_ioc_pool_scan(zfs_cmd_t *zc)
@@ -1695,10 +1741,15 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
         spa_t *spa;
         int error;
  
+       if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
+               return (SET_ERROR(EINVAL));
+
         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
                 return (error);
  
-       if (zc->zc_cookie == POOL_SCAN_NONE)
+       if (zc->zc_flags == POOL_SCRUB_PAUSE)
+               error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
+       else if (zc->zc_cookie == POOL_SCAN_NONE)
                 error = spa_scan_stop(spa);
         else
                 error = spa_scan(spa, zc->zc_cookie);
@@ -1810,15 +1861,16 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc)
         int error;
  
         /* XXX reading from objset not owned */
-       if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
+       if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
+           FTAG, &os)) != 0)
                 return (error);
         if (dmu_objset_type(os) != DMU_OST_ZFS) {
-               dmu_objset_rele(os, FTAG);
+               dmu_objset_rele_flags(os, B_TRUE, FTAG);
                 return (SET_ERROR(EINVAL));
         }
         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
             sizeof (zc->zc_value));
-       dmu_objset_rele(os, FTAG);
+       dmu_objset_rele_flags(os, B_TRUE, FTAG);
  
         return (error);
  }
@@ -1839,15 +1891,16 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
         int error;
  
         /* XXX reading from objset not owned */
-       if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
+       if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
+           FTAG, &os)) != 0)
                 return (error);
         if (dmu_objset_type(os) != DMU_OST_ZFS) {
-               dmu_objset_rele(os, FTAG);
+               dmu_objset_rele_flags(os, B_TRUE, FTAG);
                 return (SET_ERROR(EINVAL));
         }
         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
             sizeof (zc->zc_value));
-       dmu_objset_rele(os, FTAG);
+       dmu_objset_rele_flags(os, B_TRUE, FTAG);
  
         return (error);
  }
@@ -1876,8 +1929,8 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc)
  /*
   * inputs:
   * zc_name             name of the pool
- * zc_nvlist_conf      nvlist of devices to remove
- * zc_cookie           to stop the remove?
+ * zc_guid             guid of vdev to remove
+ * zc_cookie           cancel removal
   */
  static int
  zfs_ioc_vdev_remove(zfs_cmd_t *zc)
@@ -1888,7 +1941,11 @@ zfs_ioc_vdev_remove(zfs_cmd_t *zc)
         error = spa_open(zc->zc_name, &spa, FTAG);
         if (error != 0)
                 return (error);
-       error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
+       if (zc->zc_cookie != 0) {
+               error = spa_vdev_remove_cancel(spa);
+       } else {
+               error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
+       }
         spa_close(spa, FTAG);
         return (error);
  }
@@ -1913,7 +1970,8 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
  
         case VDEV_STATE_FAULTED:
                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
-                   zc->zc_obj != VDEV_AUX_EXTERNAL)
+                   zc->zc_obj != VDEV_AUX_EXTERNAL &&
+                   zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
  
                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
@@ -2061,8 +2119,10 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
                 if (!zc->zc_objset_stats.dds_inconsistent &&
                     dmu_objset_type(os) == DMU_OST_ZVOL) {
                         error = zvol_get_stats(os, nv);
-                       if (error == EIO)
+                       if (error == EIO) {
+                               nvlist_free(nv);
                                 return (error);
+                       }
                         VERIFY0(error);
                 }
                 if (error == 0)
@@ -2296,7 +2356,8 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
          * A dataset name of maximum length cannot have any snapshots,
          * so exit immediately.
          */
-       if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
+       if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
+           ZFS_MAX_DATASET_NAME_LEN) {
                 dmu_objset_rele(os, FTAG);
                 return (SET_ERROR(ESRCH));
         }
@@ -2341,7 +2402,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
         zfs_userquota_prop_t type;
         uint64_t rid;
         uint64_t quota;
-       zfs_sb_t *zsb;
+       zfsvfs_t *zfsvfs;
         int err;
  
         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
@@ -2366,10 +2427,10 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
         rid = valary[1];
         quota = valary[2];
  
-       err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
+       err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
         if (err == 0) {
-               err = zfs_set_userquota(zsb, type, domain, rid, quota);
-               zfs_sb_rele(zsb, FTAG);
+               err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
+               zfsvfs_rele(zfsvfs, FTAG);
         }
  
         return (err);
@@ -2389,8 +2450,9 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
  {
         const char *propname = nvpair_name(pair);
         zfs_prop_t prop = zfs_name_to_prop(propname);
-       uint64_t intval;
-       int err;
+       uint64_t intval = 0;
+       char *strval = NULL;
+       int err = -1;
  
         if (prop == ZPROP_INVAL) {
                 if (zfs_prop_userquota(propname))
@@ -2405,10 +2467,12 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                     &pair) == 0);
         }
  
-       if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
-               return (-1);
-
-       VERIFY(0 == nvpair_value_uint64(pair, &intval));
+       /* all special properties are numeric except for keylocation */
+       if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
+               strval = fnvpair_value_string(pair);
+       } else {
+               intval = fnvpair_value_uint64(pair);
+       }
  
         switch (prop) {
         case ZFS_PROP_QUOTA:
@@ -2417,6 +2481,31 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
         case ZFS_PROP_REFQUOTA:
                 err = dsl_dataset_set_refquota(dsname, source, intval);
                 break;
+       case ZFS_PROP_FILESYSTEM_LIMIT:
+       case ZFS_PROP_SNAPSHOT_LIMIT:
+               if (intval == UINT64_MAX) {
+                       /* clearing the limit, just do it */
+                       err = 0;
+               } else {
+                       err = dsl_dir_activate_fs_ss_limit(dsname);
+               }
+               /*
+                * Set err to -1 to force the zfs_set_prop_nvlist code down the
+                * default path to set the value in the nvlist.
+                */
+               if (err == 0)
+                       err = -1;
+               break;
+       case ZFS_PROP_KEYLOCATION:
+               err = dsl_crypto_can_set_keylocation(dsname, strval);
+
+               /*
+                * Set err to -1 to force the zfs_set_prop_nvlist code down the
+                * default path to set the value in the nvlist.
+                */
+               if (err == 0)
+                       err = -1;
+               break;
         case ZFS_PROP_RESERVATION:
                 err = dsl_dir_set_reservation(dsname, source, intval);
                 break;
@@ -2427,17 +2516,20 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                 err = zvol_set_volsize(dsname, intval);
                 break;
         case ZFS_PROP_SNAPDEV:
-               err = zvol_set_snapdev(dsname, intval);
+               err = zvol_set_snapdev(dsname, source, intval);
+               break;
+       case ZFS_PROP_VOLMODE:
+               err = zvol_set_volmode(dsname, source, intval);
                 break;
         case ZFS_PROP_VERSION:
         {
-               zfs_sb_t *zsb;
+               zfsvfs_t *zfsvfs;
  
-               if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0)
+               if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
                         break;
  
-               err = zfs_set_version(zsb, intval);
-               zfs_sb_rele(zsb, FTAG);
+               err = zfs_set_version(zfsvfs, intval);
+               zfsvfs_rele(zfsvfs, FTAG);
  
                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
                         zfs_cmd_t *zc;
@@ -2445,6 +2537,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
                         (void) strcpy(zc->zc_name, dsname);
                         (void) zfs_ioc_userspace_upgrade(zc);
+                       (void) zfs_ioc_id_quota_upgrade(zc);
                         kmem_free(zc, sizeof (zfs_cmd_t));
                 }
                 break;
@@ -2496,7 +2589,11 @@ retry:
                 }
  
                 /* Validate value type */
-               if (err == 0 && prop == ZPROP_INVAL) {
+               if (err == 0 && source == ZPROP_SRC_INHERITED) {
+                       /* inherited properties are expected to be booleans */
+                       if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
+                               err = SET_ERROR(EINVAL);
+               } else if (err == 0 && prop == ZPROP_INVAL) {
                         if (zfs_prop_user(propname)) {
                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
                                         err = SET_ERROR(EINVAL);
@@ -2541,7 +2638,11 @@ retry:
                         err = zfs_check_settable(dsname, pair, CRED());
  
                 if (err == 0) {
-                       err = zfs_prop_set_special(dsname, source, pair);
+                       if (source == ZPROP_SRC_INHERITED)
+                               err = -1; /* does not need special handling */
+                       else
+                               err = zfs_prop_set_special(dsname, source,
+                                   pair);
                         if (err == -1) {
                                 /*
                                  * For better performance we build up a list of
@@ -2593,6 +2694,9 @@ retry:
                                 strval = fnvpair_value_string(propval);
                                 err = dsl_prop_set_string(dsname, propname,
                                     source, strval);
+                       } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
+                               err = dsl_prop_inherit(dsname, propname,
+                                   source);
                         } else {
                                 intval = fnvpair_value_uint64(propval);
                                 err = dsl_prop_set_int(dsname, propname, source,
@@ -2745,50 +2849,12 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
         zprop_source_t source = (received
             ? ZPROP_SRC_NONE            /* revert to received value, if any */
             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
+       nvlist_t *dummy;
+       nvpair_t *pair;
+       zprop_type_t type;
+       int err;
  
-       if (received) {
-               nvlist_t *dummy;
-               nvpair_t *pair;
-               zprop_type_t type;
-               int err;
-
-               /*
-                * zfs_prop_set_special() expects properties in the form of an
-                * nvpair with type info.
-                */
-               if (prop == ZPROP_INVAL) {
-                       if (!zfs_prop_user(propname))
-                               return (SET_ERROR(EINVAL));
-
-                       type = PROP_TYPE_STRING;
-               } else if (prop == ZFS_PROP_VOLSIZE ||
-                   prop == ZFS_PROP_VERSION) {
-                       return (SET_ERROR(EINVAL));
-               } else {
-                       type = zfs_prop_get_type(prop);
-               }
-
-               VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
-               switch (type) {
-               case PROP_TYPE_STRING:
-                       VERIFY(0 == nvlist_add_string(dummy, propname, ""));
-                       break;
-               case PROP_TYPE_NUMBER:
-               case PROP_TYPE_INDEX:
-                       VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
-                       break;
-               default:
-                       nvlist_free(dummy);
-                       return (SET_ERROR(EINVAL));
-               }
-
-               pair = nvlist_next_nvpair(dummy, NULL);
-               err = zfs_prop_set_special(zc->zc_name, source, pair);
-               nvlist_free(dummy);
-               if (err != -1)
-                       return (err); /* special property already handled */
-       } else {
+       if (!received) {
                 /*
                  * Only check this in the non-received case. We want to allow
                  * 'inherit -S' to revert non-inheritable properties like quota
@@ -2799,8 +2865,49 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
                         return (SET_ERROR(EINVAL));
         }
  
-       /* property name has been validated by zfs_secpolicy_inherit_prop() */
-       return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
+       if (prop == ZPROP_INVAL) {
+               if (!zfs_prop_user(propname))
+                       return (SET_ERROR(EINVAL));
+
+               type = PROP_TYPE_STRING;
+       } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
+               return (SET_ERROR(EINVAL));
+       } else {
+               type = zfs_prop_get_type(prop);
+       }
+
+       /*
+        * zfs_prop_set_special() expects properties in the form of an
+        * nvpair with type info.
+        */
+       dummy = fnvlist_alloc();
+
+       switch (type) {
+       case PROP_TYPE_STRING:
+               VERIFY(0 == nvlist_add_string(dummy, propname, ""));
+               break;
+       case PROP_TYPE_NUMBER:
+       case PROP_TYPE_INDEX:
+               VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
+               break;
+       default:
+               err = SET_ERROR(EINVAL);
+               goto errout;
+       }
+
+       pair = nvlist_next_nvpair(dummy, NULL);
+       if (pair == NULL) {
+               err = SET_ERROR(EINVAL);
+       } else {
+               err = zfs_prop_set_special(zc->zc_name, source, pair);
+               if (err == -1) /* property is not "special", needs handling */
+                       err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
+                           source);
+       }
+
+errout:
+       nvlist_free(dummy);
+       return (err);
  }
  
  static int
@@ -2826,7 +2933,7 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
                 mutex_enter(&spa_namespace_lock);
                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
                         spa_configfile_set(spa, props, B_FALSE);
-                       spa_config_sync(spa, B_FALSE, B_TRUE);
+                       spa_write_cachefile(spa, B_FALSE, B_TRUE);
                 }
                 mutex_exit(&spa_namespace_lock);
                 if (spa != NULL) {
@@ -2993,6 +3100,9 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
  
         ASSERT(zplprops != NULL);
  
+       if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
+               return (SET_ERROR(EINVAL));
+
         /*
          * Pull out creator prop choices, if any.
          */
@@ -3069,7 +3179,7 @@ zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
         boolean_t fuids_ok, sa_ok;
         uint64_t zplver = ZPL_VERSION;
         objset_t *os = NULL;
-       char parentname[MAXNAMELEN];
+       char parentname[ZFS_MAX_DATASET_NAME_LEN];
         char *cp;
         spa_t *spa;
         uint64_t spa_vers;
@@ -3124,6 +3234,8 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
   * innvl: {
   *     "type" -> dmu_objset_type_t (int32)
   *     (optional) "props" -> { prop -> value }
+ *     (optional) "hidden_args" -> { "wkeydata" -> value }
+ *         raw uint8_t array of encryption wrapping key data (32 bytes)
   * }
   *
   * outnvl: propname -> error code (int32)
@@ -3134,15 +3246,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         int error = 0;
         zfs_creat_t zct = { 0 };
         nvlist_t *nvprops = NULL;
+       nvlist_t *hidden_args = NULL;
         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
         int32_t type32;
         dmu_objset_type_t type;
         boolean_t is_insensitive = B_FALSE;
+       dsl_crypto_params_t *dcp = NULL;
  
         if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
                 return (SET_ERROR(EINVAL));
         type = type32;
         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
+       (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
  
         switch (type) {
         case DMU_OST_ZFS:
@@ -3184,7 +3299,7 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
                         volblocksize = zfs_prop_default_numeric(
                             ZFS_PROP_VOLBLOCKSIZE);
  
-               if ((error = zvol_check_volblocksize(
+               if ((error = zvol_check_volblocksize(fsname,
                     volblocksize)) != 0 ||
                     (error = zvol_check_volsize(volsize,
                     volblocksize)) != 0)
@@ -3208,9 +3323,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
                 }
         }
  
+       error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
+           hidden_args, &dcp);
+       if (error != 0) {
+               nvlist_free(zct.zct_zplprops);
+               return (error);
+       }
+
         error = dmu_objset_create(fsname, type,
-           is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
+           is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
+
         nvlist_free(zct.zct_zplprops);
+       dsl_crypto_params_free(dcp, !!error);
  
         /*
          * It would be nice to do this atomically.
@@ -3218,15 +3342,26 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         if (error == 0) {
                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
                     nvprops, outnvl);
-               if (error != 0)
-                       (void) dsl_destroy_head(fsname);
-       }
-
-#ifdef _KERNEL
-       if (error == 0 && type == DMU_OST_ZVOL)
-               zvol_create_minors(fsname);
-#endif
+               if (error != 0) {
+                       spa_t *spa;
+                       int error2;
  
+                       /*
+                        * Volumes will return EBUSY and cannot be destroyed
+                        * until all asynchronous minor handling has completed.
+                        * Wait for the spa_zvol_taskq to drain then retry.
+                        */
+                       error2 = dsl_destroy_head(fsname);
+                       while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
+                               error2 = spa_open(fsname, &spa, FTAG);
+                               if (error2 == 0) {
+                                       taskq_wait(spa->spa_zvol_taskq);
+                                       spa_close(spa, FTAG);
+                               }
+                               error2 = dsl_destroy_head(fsname);
+                       }
+               }
+       }
         return (error);
  }
  
@@ -3234,6 +3369,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
   * innvl: {
   *     "origin" -> name of origin snapshot
   *     (optional) "props" -> { prop -> value }
+ *     (optional) "hidden_args" -> { "wkeydata" -> value }
+ *         raw uint8_t array of encryption wrapping key data (32 bytes)
   * }
   *
   * outputs:
@@ -3256,9 +3393,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
  
         if (dataset_namecheck(origin_name, NULL, NULL) != 0)
                 return (SET_ERROR(EINVAL));
+
         error = dmu_objset_clone(fsname, origin_name);
-       if (error != 0)
-               return (error);
  
         /*
          * It would be nice to do this atomically.
@@ -3269,13 +3405,18 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
                 if (error != 0)
                         (void) dsl_destroy_head(fsname);
         }
+       return (error);
+}
  
-#ifdef _KERNEL
-       if (error == 0)
-               zvol_create_minors(fsname);
-#endif
+/* ARGSUSED */
+static int
+zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       if (strchr(fsname, '@') ||
+           strchr(fsname, '%'))
+               return (SET_ERROR(EINVAL));
  
-       return (error);
+       return (dmu_objset_remap_indirects(fsname));
  }
  
  /*
@@ -3292,7 +3433,7 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
         nvlist_t *snaps;
         nvlist_t *props = NULL;
         int error, poollen;
-       nvpair_t *pair, *pair2;
+       nvpair_t *pair;
  
         (void) nvlist_lookup_nvlist(innvl, "props", &props);
         if ((error = zfs_check_userprops(poolname, props)) != 0)
@@ -3326,7 +3467,7 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
                         return (SET_ERROR(EXDEV));
  
                 /* This must be the only snap of this fs. */
-               for (pair2 = nvlist_next_nvpair(snaps, pair);
+               for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
                         if (strncmp(name, nvpair_name(pair2), cp - name + 1)
                             == 0) {
@@ -3337,11 +3478,6 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  
         error = dsl_dataset_snapshot(snaps, props, outnvl);
  
-#ifdef _KERNEL
-       if (error == 0)
-               zvol_create_minors(poolname);
-#endif
-
         return (error);
  }
  
@@ -3365,6 +3501,8 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
          * we clear the TSD here.
          */
         poolname = tsd_get(zfs_allow_log_key);
+       if (poolname == NULL)
+               return (SET_ERROR(EINVAL));
         (void) tsd_set(zfs_allow_log_key, NULL);
         error = spa_open(poolname, &spa, FTAG);
         strfree(poolname);
@@ -3393,46 +3531,24 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
   * This function is best-effort.  Callers must deal gracefully if it
   * remains mounted (or is remounted after this call).
   *
- * XXX: This function should detect a failure to unmount a snapdir of a dataset
- * and return the appropriate error code when it is mounted. Its Illumos and
- * FreeBSD counterparts do this. We do not do this on Linux because there is no
- * clear way to access the mount information that FreeBSD and Illumos use to
- * distinguish between things with mounted snapshot directories, and things
- * without mounted snapshot directories, which include zvols. Returning a
- * failure for the latter causes `zfs destroy` to fail on zvol snapshots.
+ * Returns 0 if the argument is not a snapshot, or it is not currently a
+ * filesystem, or we were able to unmount it.  Returns error code otherwise.
   */
-int
+void
  zfs_unmount_snap(const char *snapname)
  {
-       zfs_sb_t *zsb = NULL;
-       char *dsname;
-       char *fullname;
-       char *ptr;
-
-       if ((ptr = strchr(snapname, '@')) == NULL)
-               return (0);
-
-       dsname = kmem_alloc(ptr - snapname + 1, KM_SLEEP);
-       strlcpy(dsname, snapname, ptr - snapname + 1);
-       fullname = strdup(snapname);
-
-       if (zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE) == 0) {
-               ASSERT(!dsl_pool_config_held(dmu_objset_pool(zsb->z_os)));
-               (void) zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE);
-               zfs_sb_rele(zsb, FTAG);
-       }
-
-       kmem_free(dsname, ptr - snapname + 1);
-       strfree(fullname);
+       if (strchr(snapname, '@') == NULL)
+               return;
  
-       return (0);
+       (void) zfsctl_snapshot_unmount((char *)snapname, MNT_FORCE);
  }
  
  /* ARGSUSED */
  static int
  zfs_unmount_snap_cb(const char *snapname, void *arg)
  {
-       return (zfs_unmount_snap(snapname));
+       zfs_unmount_snap(snapname);
+       return (0);
  }
  
  /*
@@ -3452,10 +3568,10 @@ zfs_destroy_unmount_origin(const char *fsname)
                 return;
         ds = dmu_objset_ds(os);
         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
-               char originname[MAXNAMELEN];
+               char originname[ZFS_MAX_DATASET_NAME_LEN];
                 dsl_dataset_name(ds->ds_prev, originname);
                 dmu_objset_rele(os, FTAG);
-               (void) zfs_unmount_snap(originname);
+               zfs_unmount_snap(originname);
         } else {
                 dmu_objset_rele(os, FTAG);
         }
@@ -3483,8 +3599,7 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  
         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
             pair = nvlist_next_nvpair(snaps, pair)) {
-               (void) zfs_unmount_snap(nvpair_name(pair));
-               (void) zvol_remove_minor(nvpair_name(pair));
+               zfs_unmount_snap(nvpair_name(pair));
         }
  
         return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
@@ -3505,9 +3620,7 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  static int
  zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  {
-       nvpair_t *pair, *pair2;
-
-       for (pair = nvlist_next_nvpair(innvl, NULL);
+       for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
                 char *snap_name;
  
@@ -3519,7 +3632,7 @@ zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  
  
                 /* Verify that the keys (bookmarks) are unique */
-               for (pair2 = nvlist_next_nvpair(innvl, pair);
+               for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
                     pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
                         if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
                                 return (SET_ERROR(EINVAL));
@@ -3559,10 +3672,9 @@ zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
      nvlist_t *outnvl)
  {
         int error, poollen;
-       nvpair_t *pair;
  
         poollen = strlen(poolname);
-       for (pair = nvlist_next_nvpair(innvl, NULL);
+       for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
                 const char *name = nvpair_name(pair);
                 const char *cp = strchr(name, '#');
@@ -3587,6 +3699,40 @@ zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
         return (error);
  }
  
+static int
+zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
+    nvlist_t *outnvl)
+{
+       char *program;
+       uint64_t instrlimit, memlimit;
+       boolean_t sync_flag;
+       nvpair_t *nvarg = NULL;
+
+       if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
+               return (EINVAL);
+       }
+       if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
+               sync_flag = B_TRUE;
+       }
+       if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
+               instrlimit = ZCP_DEFAULT_INSTRLIMIT;
+       }
+       if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
+               memlimit = ZCP_DEFAULT_MEMLIMIT;
+       }
+       if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
+               return (EINVAL);
+       }
+
+       if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
+               return (EINVAL);
+       if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
+               return (EINVAL);
+
+       return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
+           nvarg, outnvl));
+}
+
  /*
   * inputs:
   * zc_name             name of dataset to destroy
@@ -3600,48 +3746,96 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
  {
         int err;
  
-       if (zc->zc_objset_type == DMU_OST_ZFS) {
-               err = zfs_unmount_snap(zc->zc_name);
-               if (err != 0)
-                       return (err);
-       }
+       if (zc->zc_objset_type == DMU_OST_ZFS)
+               zfs_unmount_snap(zc->zc_name);
  
-       if (strchr(zc->zc_name, '@'))
+       if (strchr(zc->zc_name, '@')) {
                 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
-       else
+       } else {
                 err = dsl_destroy_head(zc->zc_name);
-       if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
-               (void) zvol_remove_minor(zc->zc_name);
+               if (err == EEXIST) {
+                       /*
+                        * It is possible that the given DS may have
+                        * hidden child (%recv) datasets - "leftovers"
+                        * resulting from the previously interrupted
+                        * 'zfs receive'.
+                        *
+                        * 6 extra bytes for /%recv
+                        */
+                       char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
+
+                       if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
+                           zc->zc_name, recv_clone_name) >=
+                           sizeof (namebuf))
+                               return (SET_ERROR(EINVAL));
+
+                       /*
+                        * Try to remove the hidden child (%recv) and after
+                        * that try to remove the target dataset.
+                        * If the hidden child (%recv) does not exist
+                        * the original error (EEXIST) will be returned
+                        */
+                       err = dsl_destroy_head(namebuf);
+                       if (err == 0)
+                               err = dsl_destroy_head(zc->zc_name);
+                       else if (err == ENOENT)
+                               err = SET_ERROR(EEXIST);
+               }
+       }
+
         return (err);
  }
  
  /*
   * fsname is name of dataset to rollback (to most recent snapshot)
   *
- * innvl is not used.
+ * innvl may contain name of expected target snapshot
   *
   * outnvl: "target" -> name of most recent snapshot
   * }
   */
  /* ARGSUSED */
  static int
-zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
+zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
  {
-       zfs_sb_t *zsb;
+       zfsvfs_t *zfsvfs;
+       zvol_state_t *zv;
+       char *target = NULL;
         int error;
  
-       if (get_zfs_sb(fsname, &zsb) == 0) {
-               error = zfs_suspend_fs(zsb);
+       (void) nvlist_lookup_string(innvl, "target", &target);
+       if (target != NULL) {
+               const char *cp = strchr(target, '@');
+
+               /*
+                * The snap name must contain an @, and the part after it must
+                * contain only valid characters.
+                */
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
+                       return (SET_ERROR(EINVAL));
+       }
+
+       if (getzfsvfs(fsname, &zfsvfs) == 0) {
+               dsl_dataset_t *ds;
+
+               ds = dmu_objset_ds(zfsvfs->z_os);
+               error = zfs_suspend_fs(zfsvfs);
                 if (error == 0) {
                         int resume_err;
  
-                       error = dsl_dataset_rollback(fsname, zsb, outnvl);
-                       resume_err = zfs_resume_fs(zsb, fsname);
+                       error = dsl_dataset_rollback(fsname, target, zfsvfs,
+                           outnvl);
+                       resume_err = zfs_resume_fs(zfsvfs, ds);
                         error = error ? error : resume_err;
                 }
-               deactivate_super(zsb->z_sb);
+               deactivate_super(zfsvfs->z_sb);
+       } else if ((zv = zvol_suspend(fsname)) != NULL) {
+               error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
+                   outnvl);
+               zvol_resume(zv);
         } else {
-               error = dsl_dataset_rollback(fsname, NULL, outnvl);
+               error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
         }
         return (error);
  }
@@ -3651,13 +3845,12 @@ recursive_unmount(const char *fsname, void *arg)
  {
         const char *snapname = arg;
         char *fullname;
-       int error;
  
         fullname = kmem_asprintf("%s@%s", fsname, snapname);
-       error = zfs_unmount_snap(fullname);
+       zfs_unmount_snap(fullname);
         strfree(fullname);
  
-       return (error);
+       return (0);
  }
  
  /*
@@ -3674,9 +3867,12 @@ zfs_ioc_rename(zfs_cmd_t *zc)
         boolean_t recursive = zc->zc_cookie & 1;
         char *at;
  
+       /* "zfs rename" from and to ...%recv datasets should both fail */
+       zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
-       if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
-           strchr(zc->zc_value, '%'))
+       if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
+           dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
+           strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
                 return (SET_ERROR(EINVAL));
  
         at = strchr(zc->zc_name, '@');
@@ -3729,15 +3925,35 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
                         const char *gq_prefix =
                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
+                       const char *uiq_prefix =
+                           zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
+                       const char *giq_prefix =
+                           zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
+                       const char *pq_prefix =
+                           zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
+                       const char *piq_prefix = zfs_userquota_prop_prefixes[\
+                           ZFS_PROP_PROJECTOBJQUOTA];
  
                         if (strncmp(propname, uq_prefix,
                             strlen(uq_prefix)) == 0) {
                                 perm = ZFS_DELEG_PERM_USERQUOTA;
+                       } else if (strncmp(propname, uiq_prefix,
+                           strlen(uiq_prefix)) == 0) {
+                               perm = ZFS_DELEG_PERM_USEROBJQUOTA;
                         } else if (strncmp(propname, gq_prefix,
                             strlen(gq_prefix)) == 0) {
                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
+                       } else if (strncmp(propname, giq_prefix,
+                           strlen(giq_prefix)) == 0) {
+                               perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
+                       } else if (strncmp(propname, pq_prefix,
+                           strlen(pq_prefix)) == 0) {
+                               perm = ZFS_DELEG_PERM_PROJECTQUOTA;
+                       } else if (strncmp(propname, piq_prefix,
+                           strlen(piq_prefix)) == 0) {
+                               perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
                         } else {
-                               /* USERUSED and GROUPUSED are read-only */
+                               /* {USER|GROUP|PROJECT}USED are read-only */
                                 return (SET_ERROR(EINVAL));
                         }
  
@@ -3773,8 +3989,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                  * the SPA supports it. We ignore any errors here since
                  * we'll catch them later.
                  */
-               if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
-                   nvpair_value_uint64(pair, &intval) == 0) {
+               if (nvpair_value_uint64(pair, &intval) == 0) {
                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
                             intval <= ZIO_COMPRESS_GZIP_9 &&
                             zfs_earlier_version(dsname,
@@ -3820,9 +4035,59 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                         return (SET_ERROR(ENOTSUP));
                 break;
  
-       case ZFS_PROP_DEDUP:
-               if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
-                       return (SET_ERROR(ENOTSUP));
+       case ZFS_PROP_VOLBLOCKSIZE:
+       case ZFS_PROP_RECORDSIZE:
+               /* Record sizes above 128k need the feature to be enabled */
+               if (nvpair_value_uint64(pair, &intval) == 0 &&
+                   intval > SPA_OLD_MAXBLOCKSIZE) {
+                       spa_t *spa;
+
+                       /*
+                        * We don't allow setting the property above 1MB,
+                        * unless the tunable has been changed.
+                        */
+                       if (intval > zfs_max_recordsize ||
+                           intval > SPA_MAXBLOCKSIZE)
+                               return (SET_ERROR(ERANGE));
+
+                       if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+                               return (err);
+
+                       if (!spa_feature_is_enabled(spa,
+                           SPA_FEATURE_LARGE_BLOCKS)) {
+                               spa_close(spa, FTAG);
+                               return (SET_ERROR(ENOTSUP));
+                       }
+                       spa_close(spa, FTAG);
+               }
+               break;
+
+       case ZFS_PROP_DNODESIZE:
+               /* Dnode sizes above 512 need the feature to be enabled */
+               if (nvpair_value_uint64(pair, &intval) == 0 &&
+                   intval != ZFS_DNSIZE_LEGACY) {
+                       spa_t *spa;
+
+                       /*
+                        * If this is a bootable dataset then
+                        * we don't allow large (>512B) dnodes,
+                        * because GRUB doesn't support them.
+                        */
+                       if (zfs_is_bootfs(dsname) &&
+                           intval != ZFS_DNSIZE_LEGACY) {
+                               return (SET_ERROR(EDOM));
+                       }
+
+                       if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+                               return (err);
+
+                       if (!spa_feature_is_enabled(spa,
+                           SPA_FEATURE_LARGE_DNODE)) {
+                               spa_close(spa, FTAG);
+                               return (SET_ERROR(ENOTSUP));
+                       }
+                       spa_close(spa, FTAG);
+               }
                 break;
  
         case ZFS_PROP_SHARESMB:
@@ -3839,6 +4104,47 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                                 return (SET_ERROR(ENOTSUP));
                 }
                 break;
+       case ZFS_PROP_CHECKSUM:
+       case ZFS_PROP_DEDUP:
+       {
+               spa_feature_t feature;
+               spa_t *spa;
+               uint64_t intval;
+               int err;
+
+               /* dedup feature version checks */
+               if (prop == ZFS_PROP_DEDUP &&
+                   zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
+                       return (SET_ERROR(ENOTSUP));
+
+               if (nvpair_value_uint64(pair, &intval) != 0)
+                       return (SET_ERROR(EINVAL));
+
+               /* check prop value is enabled in features */
+               feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
+               if (feature == SPA_FEATURE_NONE)
+                       break;
+
+               if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+                       return (err);
+               /*
+                * Salted checksums are not supported on root pools.
+                */
+               if (spa_bootfs(spa) != 0 &&
+                   intval < ZIO_CHECKSUM_FUNCTIONS &&
+                   (zio_checksum_table[intval].ci_flags &
+                   ZCHECKSUM_FLAG_SALTED)) {
+                       spa_close(spa, FTAG);
+                       return (SET_ERROR(ERANGE));
+               }
+               if (!spa_feature_is_enabled(spa, feature)) {
+                       spa_close(spa, FTAG);
+                       return (SET_ERROR(ENOTSUP));
+               }
+               spa_close(spa, FTAG);
+               break;
+       }
+
         default:
                 break;
         }
@@ -3874,12 +4180,13 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
  
         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
-       (void) strcpy(zc->zc_name, dataset);
+       (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
         pair = nvlist_next_nvpair(props, NULL);
         while (pair != NULL) {
                 next_pair = nvlist_next_nvpair(props, pair);
  
-               (void) strcpy(zc->zc_value, nvpair_name(pair));
+               (void) strlcpy(zc->zc_value, nvpair_name(pair),
+                   sizeof (zc->zc_value));
                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
                     (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
@@ -3974,74 +4281,94 @@ next:
         }
  }
  
+/*
+ * Extract properties that cannot be set PRIOR to the receipt of a dataset.
+ * For example, refquota cannot be set until after the receipt of a dataset,
+ * because in replication streams, an older/earlier snapshot may exceed the
+ * refquota.  We want to receive the older/earlier snapshot, but setting
+ * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
+ * the older/earlier snapshot from being received (with EDQUOT).
+ *
+ * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
+ *
+ * libzfs will need to be judicious handling errors encountered by props
+ * extracted by this function.
+ */
+static nvlist_t *
+extract_delay_props(nvlist_t *props)
+{
+       nvlist_t *delayprops;
+       nvpair_t *nvp, *tmp;
+       static const zfs_prop_t delayable[] = {
+               ZFS_PROP_REFQUOTA,
+               ZFS_PROP_KEYLOCATION,
+               0
+       };
+       int i;
+
+       VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+       for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
+           nvp = nvlist_next_nvpair(props, nvp)) {
+               /*
+                * strcmp() is safe because zfs_prop_to_name() always returns
+                * a bounded string.
+                */
+               for (i = 0; delayable[i] != 0; i++) {
+                       if (strcmp(zfs_prop_to_name(delayable[i]),
+                           nvpair_name(nvp)) == 0) {
+                               break;
+                       }
+               }
+               if (delayable[i] != 0) {
+                       tmp = nvlist_prev_nvpair(props, nvp);
+                       VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
+                       VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
+                       nvp = tmp;
+               }
+       }
+
+       if (nvlist_empty(delayprops)) {
+               nvlist_free(delayprops);
+               delayprops = NULL;
+       }
+       return (delayprops);
+}
+
  #ifdef DEBUG
  static boolean_t zfs_ioc_recv_inject_err;
  #endif
  
  /*
- * inputs:
- * zc_name             name of containing filesystem
- * zc_nvlist_src{_size}        nvlist of properties to apply
- * zc_value            name of snapshot to create
- * zc_string           name of clone origin (if DRR_FLAG_CLONE)
- * zc_cookie           file descriptor to recv from
- * zc_begin_record     the BEGIN record of the stream (not byteswapped)
- * zc_guid             force flag
- * zc_cleanup_fd       cleanup-on-exit file descriptor
- * zc_action_handle    handle for this guid/ds mapping (or zero on first call)
- *
- * outputs:
- * zc_cookie           number of bytes read
- * zc_nvlist_dst{_size} error for each unapplied received property
- * zc_obj              zprop_errflags_t
- * zc_action_handle    handle for this guid/ds mapping
+ * nvlist 'errors' is always allocated. It will contain descriptions of
+ * encountered errors, if any. It's the callers responsibility to free.
   */
  static int
-zfs_ioc_recv(zfs_cmd_t *zc)
+zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
+    nvlist_t *localprops, boolean_t force, boolean_t resumable, int input_fd,
+    dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes,
+    uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors)
  {
-       file_t *fp;
         dmu_recv_cookie_t drc;
-       boolean_t force = (boolean_t)zc->zc_guid;
-       int fd;
         int error = 0;
         int props_error = 0;
-       nvlist_t *errors;
         offset_t off;
-       nvlist_t *props = NULL; /* sent properties */
+       nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
         nvlist_t *origprops = NULL; /* existing properties */
-       char *origin = NULL;
-       char *tosnap;
-       char tofs[ZFS_MAXNAMELEN];
+       nvlist_t *origrecvd = NULL; /* existing received properties */
         boolean_t first_recvd_props = B_FALSE;
+       file_t *input_fp;
  
-       if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
-           strchr(zc->zc_value, '@') == NULL ||
-           strchr(zc->zc_value, '%'))
-               return (SET_ERROR(EINVAL));
+       *read_bytes = 0;
+       *errflags = 0;
+       *errors = fnvlist_alloc();
  
-       (void) strcpy(tofs, zc->zc_value);
-       tosnap = strchr(tofs, '@');
-       *tosnap++ = '\0';
-
-       if (zc->zc_nvlist_src != 0 &&
-           (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-           zc->zc_iflags, &props)) != 0)
-               return (error);
-
-       fd = zc->zc_cookie;
-       fp = getf(fd);
-       if (fp == NULL) {
-               nvlist_free(props);
+       input_fp = getf(input_fd);
+       if (input_fp == NULL)
                 return (SET_ERROR(EBADF));
-       }
-
-       VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
-       if (zc->zc_string[0])
-               origin = zc->zc_string;
  
         error = dmu_recv_begin(tofs, tosnap,
-           &zc->zc_begin_record, force, origin, &drc);
+           begin_record, force, resumable, origin, &drc);
         if (error != 0)
                 goto out;
  
@@ -4050,7 +4377,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
          * to the new data. Note that we must call dmu_recv_stream() if
          * dmu_recv_begin() succeeds.
          */
-       if (props != NULL && !drc.drc_newfs) {
+       if (recvprops != NULL && !drc.drc_newfs) {
                 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
                     SPA_VERSION_RECVD_PROPS &&
                     !dsl_prop_get_hasrecvd(tofs))
@@ -4061,7 +4388,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
                  * completely replace the existing received properties, so stash
                  * away the existing ones.
                  */
-               if (dsl_prop_get_received(tofs, &origprops) == 0) {
+               if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
                         nvlist_t *errlist = NULL;
                         /*
                          * Don't bother writing a property if its value won't
@@ -4072,67 +4399,132 @@ zfs_ioc_recv(zfs_cmd_t *zc)
                          * regardless.
                          */
                         if (!first_recvd_props)
-                               props_reduce(props, origprops);
-                       if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
-                               (void) nvlist_merge(errors, errlist, 0);
+                               props_reduce(recvprops, origrecvd);
+                       if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
+                               (void) nvlist_merge(*errors, errlist, 0);
                         nvlist_free(errlist);
  
-                       if (clear_received_props(tofs, origprops,
-                           first_recvd_props ? NULL : props) != 0)
-                               zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+                       if (clear_received_props(tofs, origrecvd,
+                           first_recvd_props ? NULL : recvprops) != 0)
+                               *errflags |= ZPROP_ERR_NOCLEAR;
                 } else {
-                       zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+                       *errflags |= ZPROP_ERR_NOCLEAR;
                 }
         }
  
-       if (props != NULL) {
+       /*
+        * Stash away existing properties so we can restore them on error unless
+        * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
+        * case "origrecvd" will take care of that.
+        */
+       if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
+               objset_t *os;
+               if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
+                       if (dsl_prop_get_all(os, &origprops) != 0) {
+                               *errflags |= ZPROP_ERR_NOCLEAR;
+                       }
+                       dmu_objset_rele(os, FTAG);
+               } else {
+                       *errflags |= ZPROP_ERR_NOCLEAR;
+               }
+       }
+
+       if (recvprops != NULL) {
                 props_error = dsl_prop_set_hasrecvd(tofs);
  
                 if (props_error == 0) {
+                       delayprops = extract_delay_props(recvprops);
                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
-                           props, errors);
+                           recvprops, *errors);
                 }
         }
  
-       if (zc->zc_nvlist_dst_size != 0 &&
-           (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
-           put_nvlist(zc, errors) != 0)) {
-               /*
-                * Caller made zc->zc_nvlist_dst less than the minimum expected
-                * size or supplied an invalid address.
-                */
-               props_error = SET_ERROR(EINVAL);
+       if (localprops != NULL) {
+               nvlist_t *oprops = fnvlist_alloc();
+               nvlist_t *xprops = fnvlist_alloc();
+               nvpair_t *nvp = NULL;
+
+               while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
+                       if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
+                               /* -x property */
+                               const char *name = nvpair_name(nvp);
+                               zfs_prop_t prop = zfs_name_to_prop(name);
+                               if (prop != ZPROP_INVAL) {
+                                       if (!zfs_prop_inheritable(prop))
+                                               continue;
+                               } else if (!zfs_prop_user(name))
+                                       continue;
+                               fnvlist_add_boolean(xprops, name);
+                       } else {
+                               /* -o property=value */
+                               fnvlist_add_nvpair(oprops, nvp);
+                       }
+               }
+               (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
+                   oprops, *errors);
+               (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
+                   xprops, *errors);
+
+               nvlist_free(oprops);
+               nvlist_free(xprops);
         }
  
-       off = fp->f_offset;
-       error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
-           &zc->zc_action_handle);
+       off = input_fp->f_offset;
+       error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
+           action_handle);
  
         if (error == 0) {
-               zfs_sb_t *zsb = NULL;
+               zfsvfs_t *zfsvfs = NULL;
+               zvol_state_t *zv = NULL;
  
-               if (get_zfs_sb(tofs, &zsb) == 0) {
+               if (getzfsvfs(tofs, &zfsvfs) == 0) {
                         /* online recv */
+                       dsl_dataset_t *ds;
                         int end_err;
  
-                       error = zfs_suspend_fs(zsb);
+                       ds = dmu_objset_ds(zfsvfs->z_os);
+                       error = zfs_suspend_fs(zfsvfs);
                         /*
                          * If the suspend fails, then the recv_end will
                          * likely also fail, and clean up after itself.
                          */
-                       end_err = dmu_recv_end(&drc, zsb);
+                       end_err = dmu_recv_end(&drc, zfsvfs);
                         if (error == 0)
-                               error = zfs_resume_fs(zsb, tofs);
+                               error = zfs_resume_fs(zfsvfs, ds);
                         error = error ? error : end_err;
-                       deactivate_super(zsb->z_sb);
+                       deactivate_super(zfsvfs->z_sb);
+               } else if ((zv = zvol_suspend(tofs)) != NULL) {
+                       error = dmu_recv_end(&drc, zvol_tag(zv));
+                       zvol_resume(zv);
                 } else {
                         error = dmu_recv_end(&drc, NULL);
                 }
+
+               /* Set delayed properties now, after we're done receiving. */
+               if (delayprops != NULL && error == 0) {
+                       (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
+                           delayprops, *errors);
+               }
         }
  
-       zc->zc_cookie = off - fp->f_offset;
-       if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-               fp->f_offset = off;
+       if (delayprops != NULL) {
+               /*
+                * Merge delayed props back in with initial props, in case
+                * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
+                * we have to make sure clear_received_props() includes
+                * the delayed properties).
+                *
+                * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
+                * using ASSERT() will be just like a VERIFY.
+                */
+               ASSERT(nvlist_merge(recvprops, delayprops, 0) == 0);
+               nvlist_free(delayprops);
+       }
+
+
+       *read_bytes = off - input_fp->f_offset;
+       if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
+               input_fp->f_offset = off;
  
  #ifdef DEBUG
         if (zfs_ioc_recv_inject_err) {
@@ -4141,53 +4533,102 @@ zfs_ioc_recv(zfs_cmd_t *zc)
         }
  #endif
  
-#ifdef _KERNEL
-       if (error == 0)
-               zvol_create_minors(tofs);
-#endif
-
         /*
          * On error, restore the original props.
          */
-       if (error != 0 && props != NULL && !drc.drc_newfs) {
-               if (clear_received_props(tofs, props, NULL) != 0) {
+       if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
+               if (clear_received_props(tofs, recvprops, NULL) != 0) {
                         /*
                          * We failed to clear the received properties.
                          * Since we may have left a $recvd value on the
                          * system, we can't clear the $hasrecvd flag.
                          */
-                       zc->zc_obj |= ZPROP_ERR_NORESTORE;
+                       *errflags |= ZPROP_ERR_NORESTORE;
                 } else if (first_recvd_props) {
                         dsl_prop_unset_hasrecvd(tofs);
                 }
  
-               if (origprops == NULL && !drc.drc_newfs) {
+               if (origrecvd == NULL && !drc.drc_newfs) {
                         /* We failed to stash the original properties. */
-                       zc->zc_obj |= ZPROP_ERR_NORESTORE;
+                       *errflags |= ZPROP_ERR_NORESTORE;
                 }
  
                 /*
                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
-                * explictly if we're restoring local properties cleared in the
+                * explicitly if we're restoring local properties cleared in the
                  * first new-style receive.
                  */
-               if (origprops != NULL &&
+               if (origrecvd != NULL &&
                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
-                   origprops, NULL) != 0) {
+                   origrecvd, NULL) != 0) {
                         /*
                          * We stashed the original properties but failed to
                          * restore them.
                          */
-                       zc->zc_obj |= ZPROP_ERR_NORESTORE;
+                       *errflags |= ZPROP_ERR_NORESTORE;
+               }
+       }
+       if (error != 0 && localprops != NULL && !drc.drc_newfs &&
+           !first_recvd_props) {
+               nvlist_t *setprops;
+               nvlist_t *inheritprops;
+               nvpair_t *nvp;
+
+               if (origprops == NULL) {
+                       /* We failed to stash the original properties. */
+                       *errflags |= ZPROP_ERR_NORESTORE;
+                       goto out;
+               }
+
+               /* Restore original props */
+               setprops = fnvlist_alloc();
+               inheritprops = fnvlist_alloc();
+               nvp = NULL;
+               while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
+                       const char *name = nvpair_name(nvp);
+                       const char *source;
+                       nvlist_t *attrs;
+
+                       if (!nvlist_exists(origprops, name)) {
+                               /*
+                                * Property was not present or was explicitly
+                                * inherited before the receive, restore this.
+                                */
+                               fnvlist_add_boolean(inheritprops, name);
+                               continue;
+                       }
+                       attrs = fnvlist_lookup_nvlist(origprops, name);
+                       source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
+
+                       /* Skip received properties */
+                       if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
+                               continue;
+
+                       if (strcmp(source, tofs) == 0) {
+                               /* Property was locally set */
+                               fnvlist_add_nvlist(setprops, name, attrs);
+                       } else {
+                               /* Property was implicitly inherited */
+                               fnvlist_add_boolean(inheritprops, name);
+                       }
                 }
+
+               if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
+                   NULL) != 0)
+                       *errflags |= ZPROP_ERR_NORESTORE;
+               if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
+                   NULL) != 0)
+                       *errflags |= ZPROP_ERR_NORESTORE;
+
+               nvlist_free(setprops);
+               nvlist_free(inheritprops);
         }
  out:
-       nvlist_free(props);
+       releasef(input_fd);
+       nvlist_free(origrecvd);
         nvlist_free(origprops);
-       nvlist_free(errors);
-       releasef(fd);
  
         if (error == 0)
                 error = props_error;
@@ -4195,6 +4636,194 @@ out:
         return (error);
  }
  
+/*
+ * inputs:
+ * zc_name             name of containing filesystem (unused)
+ * zc_nvlist_src{_size}        nvlist of properties to apply
+ * zc_nvlist_conf{_size}       nvlist of properties to exclude
+ *                     (DATA_TYPE_BOOLEAN) and override (everything else)
+ * zc_value            name of snapshot to create
+ * zc_string           name of clone origin (if DRR_FLAG_CLONE)
+ * zc_cookie           file descriptor to recv from
+ * zc_begin_record     the BEGIN record of the stream (not byteswapped)
+ * zc_guid             force flag
+ * zc_cleanup_fd       cleanup-on-exit file descriptor
+ * zc_action_handle    handle for this guid/ds mapping (or zero on first call)
+ *
+ * outputs:
+ * zc_cookie           number of bytes read
+ * zc_obj              zprop_errflags_t
+ * zc_action_handle    handle for this guid/ds mapping
+ * zc_nvlist_dst{_size} error for each unapplied received property
+ */
+static int
+zfs_ioc_recv(zfs_cmd_t *zc)
+{
+       dmu_replay_record_t begin_record;
+       nvlist_t *errors = NULL;
+       nvlist_t *recvdprops = NULL;
+       nvlist_t *localprops = NULL;
+       char *origin = NULL;
+       char *tosnap;
+       char tofs[ZFS_MAX_DATASET_NAME_LEN];
+       int error = 0;
+
+       if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
+           strchr(zc->zc_value, '@') == NULL ||
+           strchr(zc->zc_value, '%'))
+               return (SET_ERROR(EINVAL));
+
+       (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
+       tosnap = strchr(tofs, '@');
+       *tosnap++ = '\0';
+
+       if (zc->zc_nvlist_src != 0 &&
+           (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+           zc->zc_iflags, &recvdprops)) != 0)
+               return (error);
+
+       if (zc->zc_nvlist_conf != 0 &&
+           (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
+           zc->zc_iflags, &localprops)) != 0)
+               return (error);
+
+       if (zc->zc_string[0])
+               origin = zc->zc_string;
+
+       begin_record.drr_type = DRR_BEGIN;
+       begin_record.drr_payloadlen = 0;
+       begin_record.drr_u.drr_begin = zc->zc_begin_record;
+
+       error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
+           zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
+           zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
+           &zc->zc_action_handle, &errors);
+       nvlist_free(recvdprops);
+       nvlist_free(localprops);
+
+       /*
+        * Now that all props, initial and delayed, are set, report the prop
+        * errors to the caller.
+        */
+       if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
+           (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
+           put_nvlist(zc, errors) != 0)) {
+               /*
+                * Caller made zc->zc_nvlist_dst less than the minimum expected
+                * size or supplied an invalid address.
+                */
+               error = SET_ERROR(EINVAL);
+       }
+
+       nvlist_free(errors);
+
+       return (error);
+}
+
+/*
+ * innvl: {
+ *     "snapname" -> full name of the snapshot to create
+ *     (optional) "props" -> received properties to set (nvlist)
+ *     (optional) "localprops" -> override and exclude properties (nvlist)
+ *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
+ *     "begin_record" -> non-byteswapped dmu_replay_record_t
+ *     "input_fd" -> file descriptor to read stream from (int32)
+ *     (optional) "force" -> force flag (value ignored)
+ *     (optional) "resumable" -> resumable flag (value ignored)
+ *     (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
+ *     (optional) "action_handle" -> handle for this guid/ds mapping
+ * }
+ *
+ * outnvl: {
+ *     "read_bytes" -> number of bytes read
+ *     "error_flags" -> zprop_errflags_t
+ *     "action_handle" -> handle for this guid/ds mapping
+ *     "errors" -> error for each unapplied received property (nvlist)
+ * }
+ */
+static int
+zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       dmu_replay_record_t *begin_record;
+       uint_t begin_record_size;
+       nvlist_t *errors = NULL;
+       nvlist_t *recvprops = NULL;
+       nvlist_t *localprops = NULL;
+       char *snapname = NULL;
+       char *origin = NULL;
+       char *tosnap;
+       char tofs[ZFS_MAX_DATASET_NAME_LEN];
+       boolean_t force;
+       boolean_t resumable;
+       uint64_t action_handle = 0;
+       uint64_t read_bytes = 0;
+       uint64_t errflags = 0;
+       int input_fd = -1;
+       int cleanup_fd = -1;
+       int error;
+
+       error = nvlist_lookup_string(innvl, "snapname", &snapname);
+       if (error != 0)
+               return (SET_ERROR(EINVAL));
+
+       if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
+           strchr(snapname, '@') == NULL ||
+           strchr(snapname, '%'))
+               return (SET_ERROR(EINVAL));
+
+       (void) strcpy(tofs, snapname);
+       tosnap = strchr(tofs, '@');
+       *tosnap++ = '\0';
+
+       error = nvlist_lookup_string(innvl, "origin", &origin);
+       if (error && error != ENOENT)
+               return (error);
+
+       error = nvlist_lookup_byte_array(innvl, "begin_record",
+           (uchar_t **)&begin_record, &begin_record_size);
+       if (error != 0 || begin_record_size != sizeof (*begin_record))
+               return (SET_ERROR(EINVAL));
+
+       error = nvlist_lookup_int32(innvl, "input_fd", &input_fd);
+       if (error != 0)
+               return (SET_ERROR(EINVAL));
+
+       force = nvlist_exists(innvl, "force");
+       resumable = nvlist_exists(innvl, "resumable");
+
+       error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
+       if (error && error != ENOENT)
+               return (error);
+
+       error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
+       if (error && error != ENOENT)
+               return (error);
+
+       /* we still use "props" here for backwards compatibility */
+       error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
+       if (error && error != ENOENT)
+               return (error);
+
+       error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
+       if (error && error != ENOENT)
+               return (error);
+
+       error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
+           force, resumable, input_fd, begin_record, cleanup_fd, &read_bytes,
+           &errflags, &action_handle, &errors);
+
+       fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
+       fnvlist_add_uint64(outnvl, "error_flags", errflags);
+       fnvlist_add_uint64(outnvl, "action_handle", action_handle);
+       fnvlist_add_nvlist(outnvl, "errors", errors);
+
+       nvlist_free(errors);
+       nvlist_free(recvprops);
+       nvlist_free(localprops);
+
+       return (error);
+}
+
  /*
   * inputs:
   * zc_name     name of snapshot to send
@@ -4204,10 +4833,13 @@ out:
   * zc_fromobj  objsetid of incremental fromsnap (may be zero)
   * zc_guid     if set, estimate size of stream only.  zc_cookie is ignored.
   *             output size in zc_objset_type.
- * zc_flags    if =1, WRITE_EMBEDDED records are permitted
+ * zc_flags    lzc_send_flags
   *
   * outputs:
   * zc_objset_type      estimated size, if zc_guid is set
+ *
+ * NOTE: This is no longer the preferred interface, any new functionality
+ *       should be added to zfs_ioc_send_new() instead.
   */
  static int
  zfs_ioc_send(zfs_cmd_t *zc)
@@ -4216,6 +4848,9 @@ zfs_ioc_send(zfs_cmd_t *zc)
         offset_t off;
         boolean_t estimate = (zc->zc_guid != 0);
         boolean_t embedok = (zc->zc_flags & 0x1);
+       boolean_t large_block_ok = (zc->zc_flags & 0x2);
+       boolean_t compressok = (zc->zc_flags & 0x4);
+       boolean_t rawok = (zc->zc_flags & 0x8);
  
         if (zc->zc_obj != 0) {
                 dsl_pool_t *dp;
@@ -4232,7 +4867,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
                 }
  
                 if (dsl_dir_is_clone(tosnap->ds_dir))
-                       zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj;
+                       zc->zc_fromobj =
+                           dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
                 dsl_dataset_rele(tosnap, FTAG);
                 dsl_pool_rele(dp, FTAG);
         }
@@ -4246,7 +4882,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
                 if (error != 0)
                         return (error);
  
-               error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
+               error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
+                   FTAG, &tosnap);
                 if (error != 0) {
                         dsl_pool_rele(dp, FTAG);
                         return (error);
@@ -4262,7 +4899,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
                         }
                 }
  
-               error = dmu_send_estimate(tosnap, fromsnap,
+               error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
                     &zc->zc_objset_type);
  
                 if (fromsnap != NULL)
@@ -4276,7 +4913,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
  
                 off = fp->f_offset;
                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
-                   zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off);
+                   zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
+                   zc->zc_cookie, fp->f_vnode, &off);
  
                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                         fp->f_offset = off;
@@ -4459,7 +5097,8 @@ zfs_ioc_clear(zfs_cmd_t *zc)
  
         vdev_clear(spa, vd);
  
-       (void) spa_vdev_state_exit(spa, NULL, 0);
+       (void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
+           NULL : spa->spa_root_vdev, 0);
  
         /*
          * Resume any suspended I/Os.
@@ -4472,25 +5111,46 @@ zfs_ioc_clear(zfs_cmd_t *zc)
         return (error);
  }
  
+/*
+ * Reopen all the vdevs associated with the pool.
+ *
+ * innvl: {
+ *  "scrub_restart" -> when true and scrub is running, allow to restart
+ *              scrub as the side effect of the reopen (boolean).
+ * }
+ *
+ * outnvl is unused
+ */
+/* ARGSUSED */
  static int
-zfs_ioc_pool_reopen(zfs_cmd_t *zc)
+zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
  {
         spa_t *spa;
         int error;
+       boolean_t scrub_restart = B_TRUE;
  
-       error = spa_open(zc->zc_name, &spa, FTAG);
+       if (innvl) {
+               if (nvlist_lookup_boolean_value(innvl, "scrub_restart",
+                   &scrub_restart) != 0) {
+                       return (SET_ERROR(EINVAL));
+               }
+       }
+
+       error = spa_open(pool, &spa, FTAG);
         if (error != 0)
                 return (error);
  
         spa_vdev_state_enter(spa, SCL_NONE);
  
         /*
-        * If a resilver is already in progress then set the
-        * spa_scrub_reopen flag to B_TRUE so that we don't restart
-        * the scan as a side effect of the reopen. Otherwise, let
-        * vdev_open() decided if a resilver is required.
+        * If the scrub_restart flag is B_FALSE and a scrub is already
+        * in progress then set spa_scrub_reopen flag to B_TRUE so that
+        * we don't restart the scrub as a side effect of the reopen.
+        * Otherwise, let vdev_open() decided if a resilver is required.
          */
-       spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
+
+       spa->spa_scrub_reopen = (!scrub_restart &&
+           dsl_scan_scrubbing(spa->spa_dsl_pool));
         vdev_reopen(spa->spa_root_vdev);
         spa->spa_scrub_reopen = B_FALSE;
  
@@ -4498,10 +5158,10 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc)
         spa_close(spa, FTAG);
         return (0);
  }
+
  /*
   * inputs:
   * zc_name     name of filesystem
- * zc_value    name of origin snapshot
   *
   * outputs:
   * zc_string   name of conflicting snapshot, if there is one
@@ -4509,22 +5169,60 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc)
  static int
  zfs_ioc_promote(zfs_cmd_t *zc)
  {
+       dsl_pool_t *dp;
+       dsl_dataset_t *ds, *ods;
+       char origin[ZFS_MAX_DATASET_NAME_LEN];
         char *cp;
+       int error;
+
+       zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
+       if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
+           strchr(zc->zc_name, '%'))
+               return (SET_ERROR(EINVAL));
+
+       error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+       if (error != 0)
+               return (error);
+
+       error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
+       if (error != 0) {
+               dsl_pool_rele(dp, FTAG);
+               return (error);
+       }
+
+       if (!dsl_dir_is_clone(ds->ds_dir)) {
+               dsl_dataset_rele(ds, FTAG);
+               dsl_pool_rele(dp, FTAG);
+               return (SET_ERROR(EINVAL));
+       }
+
+       error = dsl_dataset_hold_obj(dp,
+           dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
+       if (error != 0) {
+               dsl_dataset_rele(ds, FTAG);
+               dsl_pool_rele(dp, FTAG);
+               return (error);
+       }
+
+       dsl_dataset_name(ods, origin);
+       dsl_dataset_rele(ods, FTAG);
+       dsl_dataset_rele(ds, FTAG);
+       dsl_pool_rele(dp, FTAG);
  
         /*
          * We don't need to unmount *all* the origin fs's snapshots, but
          * it's easier.
          */
-       cp = strchr(zc->zc_value, '@');
+       cp = strchr(origin, '@');
         if (cp)
                 *cp = '\0';
-       (void) dmu_objset_find(zc->zc_value,
+       (void) dmu_objset_find(origin,
             zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
  }
  
  /*
- * Retrieve a single {user|group}{used|quota}@... property.
+ * Retrieve a single {user|group|project}{used|quota}@... property.
   *
   * inputs:
   * zc_name     name of filesystem
@@ -4538,19 +5236,19 @@ zfs_ioc_promote(zfs_cmd_t *zc)
  static int
  zfs_ioc_userspace_one(zfs_cmd_t *zc)
  {
-       zfs_sb_t *zsb;
+       zfsvfs_t *zfsvfs;
         int error;
  
         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
                 return (SET_ERROR(EINVAL));
  
-       error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
+       error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
         if (error != 0)
                 return (error);
  
-       error = zfs_userspace_one(zsb,
+       error = zfs_userspace_one(zfsvfs,
             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
-       zfs_sb_rele(zsb, FTAG);
+       zfsvfs_rele(zfsvfs, FTAG);
  
         return (error);
  }
@@ -4569,21 +5267,19 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc)
  static int
  zfs_ioc_userspace_many(zfs_cmd_t *zc)
  {
-       zfs_sb_t *zsb;
+       zfsvfs_t *zfsvfs;
         int bufsize = zc->zc_nvlist_dst_size;
-       int error;
-       void *buf;
  
         if (bufsize <= 0)
                 return (SET_ERROR(ENOMEM));
  
-       error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
+       int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
         if (error != 0)
                 return (error);
  
-       buf = vmem_alloc(bufsize, KM_SLEEP);
+       void *buf = vmem_alloc(bufsize, KM_SLEEP);
  
-       error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie,
+       error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
             buf, &zc->zc_nvlist_dst_size);
  
         if (error == 0) {
@@ -4592,7 +5288,7 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc)
                     zc->zc_nvlist_dst_size);
         }
         vmem_free(buf, bufsize);
-       zfs_sb_rele(zsb, FTAG);
+       zfsvfs_rele(zfsvfs, FTAG);
  
         return (error);
  }
@@ -4609,35 +5305,81 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
  {
         objset_t *os;
         int error = 0;
-       zfs_sb_t *zsb;
+       zfsvfs_t *zfsvfs;
  
-       if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
-               if (!dmu_objset_userused_enabled(zsb->z_os)) {
+       if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
+               if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
                         /*
                          * If userused is not enabled, it may be because the
                          * objset needs to be closed & reopened (to grow the
                          * objset_phys_t).  Suspend/resume the fs will do that.
                          */
-                       error = zfs_suspend_fs(zsb);
+                       dsl_dataset_t *ds, *newds;
+
+                       ds = dmu_objset_ds(zfsvfs->z_os);
+                       error = zfs_suspend_fs(zfsvfs);
                         if (error == 0) {
-                               dmu_objset_refresh_ownership(zsb->z_os,
-                                   zsb);
-                               error = zfs_resume_fs(zsb, zc->zc_name);
+                               dmu_objset_refresh_ownership(ds, &newds,
+                                   B_TRUE, zfsvfs);
+                               error = zfs_resume_fs(zfsvfs, newds);
                         }
                 }
                 if (error == 0)
-                       error = dmu_objset_userspace_upgrade(zsb->z_os);
-               deactivate_super(zsb->z_sb);
+                       error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
+               deactivate_super(zfsvfs->z_sb);
         } else {
                 /* XXX kind of reading contents without owning */
-               error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+               error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
                 if (error != 0)
                         return (error);
  
                 error = dmu_objset_userspace_upgrade(os);
-               dmu_objset_rele(os, FTAG);
+               dmu_objset_rele_flags(os, B_TRUE, FTAG);
+       }
+
+       return (error);
+}
+
+/*
+ * inputs:
+ * zc_name             name of filesystem
+ *
+ * outputs:
+ * none
+ */
+static int
+zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
+{
+       objset_t *os;
+       int error;
+
+       error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
+       if (error != 0)
+               return (error);
+
+       if (dmu_objset_userobjspace_upgradable(os) ||
+           dmu_objset_projectquota_upgradable(os)) {
+               mutex_enter(&os->os_upgrade_lock);
+               if (os->os_upgrade_id == 0) {
+                       /* clear potential error code and retry */
+                       os->os_upgrade_status = 0;
+                       mutex_exit(&os->os_upgrade_lock);
+
+                       dmu_objset_id_quota_upgrade(os);
+               } else {
+                       mutex_exit(&os->os_upgrade_lock);
+               }
+
+               dsl_pool_rele(dmu_objset_pool(os), FTAG);
+
+               taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
+               error = os->os_upgrade_status;
+       } else {
+               dsl_pool_rele(dmu_objset_pool(os), FTAG);
         }
  
+       dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
+
         return (error);
  }
  
@@ -4669,8 +5411,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc)
         if (error != 0)
                 return (error);
  
-       error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
-           os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
+       error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
  
         dmu_objset_rele(os, FTAG);
         return (error);
@@ -4704,7 +5445,8 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
         error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
             hold_name);
         if (error == 0)
-               (void) strcpy(zc->zc_value, snap_name);
+               (void) strlcpy(zc->zc_value, snap_name,
+                   sizeof (zc->zc_value));
         strfree(snap_name);
         strfree(hold_name);
         zfs_onexit_fd_rele(zc->zc_cleanup_fd);
@@ -4751,10 +5493,10 @@ zfs_smb_acl_purge(znode_t *dzp)
  {
         zap_cursor_t    zc;
         zap_attribute_t zap;
-       zfs_sb_t *zsb = ZTOZSB(dzp);
+       zfsvfs_t *zfsvfs = ZTOZSB(dzp);
         int error;
  
-       for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
+       for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
             zap_cursor_advance(&zc)) {
                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
@@ -4774,7 +5516,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
         znode_t *dzp;
         vnode_t *resourcevp = NULL;
         znode_t *sharedir;
-       zfs_sb_t *zsb;
+       zfsvfs_t *zfsvfs;
         nvlist_t *nvlist;
         char *src, *target;
         vattr_t vattr;
@@ -4795,17 +5537,17 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
         }
  
         dzp = VTOZ(vp);
-       zsb = ZTOZSB(dzp);
-       ZFS_ENTER(zsb);
+       zfsvfs = ZTOZSB(dzp);
+       ZFS_ENTER(zfsvfs);
  
         /*
          * Create share dir if its missing.
          */
-       mutex_enter(&zsb->z_lock);
-       if (zsb->z_shares_dir == 0) {
+       mutex_enter(&zfsvfs->z_lock);
+       if (zfsvfs->z_shares_dir == 0) {
                 dmu_tx_t *tx;
  
-               tx = dmu_tx_create(zsb->z_os);
+               tx = dmu_tx_create(zfsvfs->z_os);
                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
                     ZFS_SHARES_DIR);
                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
@@ -4813,22 +5555,22 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
                 if (error != 0) {
                         dmu_tx_abort(tx);
                 } else {
-                       error = zfs_create_share_dir(zsb, tx);
+                       error = zfs_create_share_dir(zfsvfs, tx);
                         dmu_tx_commit(tx);
                 }
                 if (error != 0) {
-                       mutex_exit(&zsb->z_lock);
+                       mutex_exit(&zfsvfs->z_lock);
                         VN_RELE(vp);
-                       ZFS_EXIT(zsb);
+                       ZFS_EXIT(zfsvfs);
                         return (error);
                 }
         }
-       mutex_exit(&zsb->z_lock);
+       mutex_exit(&zfsvfs->z_lock);
  
-       ASSERT(zsb->z_shares_dir);
-       if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) {
+       ASSERT(zfsvfs->z_shares_dir);
+       if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
                 VN_RELE(vp);
-               ZFS_EXIT(zsb);
+               ZFS_EXIT(zfsvfs);
                 return (error);
         }
  
@@ -4859,7 +5601,8 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
                 if ((error = get_nvlist(zc->zc_nvlist_src,
                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
                         VN_RELE(vp);
-                       ZFS_EXIT(zsb);
+                       VN_RELE(ZTOV(sharedir));
+                       ZFS_EXIT(zfsvfs);
                         return (error);
                 }
                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
@@ -4867,7 +5610,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
                     &target)) {
                         VN_RELE(vp);
                         VN_RELE(ZTOV(sharedir));
-                       ZFS_EXIT(zsb);
+                       ZFS_EXIT(zfsvfs);
                         nvlist_free(nvlist);
                         return (error);
                 }
@@ -4888,7 +5631,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
         VN_RELE(vp);
         VN_RELE(ZTOV(sharedir));
  
-       ZFS_EXIT(zsb);
+       ZFS_EXIT(zfsvfs);
  
         return (error);
  #else
@@ -4911,6 +5654,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
  static int
  zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
  {
+       nvpair_t *pair;
         nvlist_t *holds;
         int cleanup_fd = -1;
         int error;
@@ -4920,6 +5664,19 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
         if (error != 0)
                 return (SET_ERROR(EINVAL));
  
+       /* make sure the user didn't pass us any invalid (empty) tags */
+       for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+           pair = nvlist_next_nvpair(holds, pair)) {
+               char *htag;
+
+               error = nvpair_value_string(pair, &htag);
+               if (error != 0)
+                       return (SET_ERROR(error));
+
+               if (strlen(htag) == 0)
+                       return (SET_ERROR(EINVAL));
+       }
+
         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
                 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
                 if (error != 0)
@@ -4944,6 +5701,7 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
  static int
  zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
  {
+       ASSERT3P(args, ==, NULL);
         return (dsl_dataset_get_holds(snapname, outnvl));
  }
  
@@ -4989,7 +5747,7 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
  
         do {
                 error = zfs_zevent_next(ze, &event,
-                       &zc->zc_nvlist_dst_size, &dropped);
+                   &zc->zc_nvlist_dst_size, &dropped);
                 if (event != NULL) {
                         zc->zc_cookie = dropped;
                         error = put_nvlist(zc, event);
@@ -5117,11 +5875,19 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
                 return (error);
  
         error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
+       if (error == 0 && !new->ds_is_snapshot) {
+               dsl_dataset_rele(new, FTAG);
+               error = SET_ERROR(EINVAL);
+       }
         if (error != 0) {
                 dsl_pool_rele(dp, FTAG);
                 return (error);
         }
         error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
+       if (error == 0 && !old->ds_is_snapshot) {
+               dsl_dataset_rele(old, FTAG);
+               error = SET_ERROR(EINVAL);
+       }
         if (error != 0) {
                 dsl_dataset_rele(new, FTAG);
                 dsl_pool_rele(dp, FTAG);
@@ -5142,8 +5908,16 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
   * innvl: {
   *     "fd" -> file descriptor to write stream to (int32)
   *     (optional) "fromsnap" -> full snap name to send an incremental from
+ *     (optional) "largeblockok" -> (value ignored)
+ *         indicates that blocks > 128KB are permitted
   *     (optional) "embedok" -> (value ignored)
   *         presence indicates DRR_WRITE_EMBEDDED records are permitted
+ *     (optional) "compressok" -> (value ignored)
+ *         presence indicates compressed DRR_WRITE records are permitted
+ *     (optional) "rawok" -> (value ignored)
+ *         presence indicates raw encrypted records should be used.
+ *     (optional) "resume_object" and "resume_offset" -> (uint64)
+ *         if present, resume send stream from specified object and offset.
   * }
   *
   * outnvl is unused
@@ -5157,7 +5931,12 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
         char *fromname = NULL;
         int fd;
         file_t *fp;
+       boolean_t largeblockok;
         boolean_t embedok;
+       boolean_t compressok;
+       boolean_t rawok;
+       uint64_t resumeobj = 0;
+       uint64_t resumeoff = 0;
  
         error = nvlist_lookup_int32(innvl, "fd", &fd);
         if (error != 0)
@@ -5165,13 +5944,20 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
  
         (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
  
+       largeblockok = nvlist_exists(innvl, "largeblockok");
         embedok = nvlist_exists(innvl, "embedok");
+       compressok = nvlist_exists(innvl, "compressok");
+       rawok = nvlist_exists(innvl, "rawok");
+
+       (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
+       (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
  
         if ((fp = getf(fd)) == NULL)
                 return (SET_ERROR(EBADF));
  
         off = fp->f_offset;
-       error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off);
+       error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
+           rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
  
         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                 fp->f_offset = off;
@@ -5185,7 +5971,16 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
   * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
   *
   * innvl: {
- *     (optional) "fromsnap" -> full snap name to send an incremental from
+ *     (optional) "from" -> full snap or bookmark name to send an incremental
+ *                          from
+ *     (optional) "largeblockok" -> (value ignored)
+ *         indicates that blocks > 128KB are permitted
+ *     (optional) "embedok" -> (value ignored)
+ *         presence indicates DRR_WRITE_EMBEDDED records are permitted
+ *     (optional) "compressok" -> (value ignored)
+ *         presence indicates compressed DRR_WRITE records are permitted
+ *     (optional) "rawok" -> (value ignored)
+ *         presence indicates raw encrypted records should be used.
   * }
   *
   * outnvl: {
@@ -5196,10 +5991,11 @@ static int
  zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
  {
         dsl_pool_t *dp;
-       dsl_dataset_t *fromsnap = NULL;
         dsl_dataset_t *tosnap;
         int error;
         char *fromname;
+       boolean_t compressok;
+       boolean_t rawok;
         uint64_t space;
  
         error = dsl_pool_hold(snapname, FTAG, &dp);
@@ -5212,26 +6008,224 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
                 return (error);
         }
  
-       error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+       compressok = nvlist_exists(innvl, "compressok");
+       rawok = nvlist_exists(innvl, "rawok");
+
+       error = nvlist_lookup_string(innvl, "from", &fromname);
         if (error == 0) {
-               error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
-               if (error != 0) {
-                       dsl_dataset_rele(tosnap, FTAG);
-                       dsl_pool_rele(dp, FTAG);
-                       return (error);
+               if (strchr(fromname, '@') != NULL) {
+                       /*
+                        * If from is a snapshot, hold it and use the more
+                        * efficient dmu_send_estimate to estimate send space
+                        * size using deadlists.
+                        */
+                       dsl_dataset_t *fromsnap;
+                       error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
+                       if (error != 0)
+                               goto out;
+                       error = dmu_send_estimate(tosnap, fromsnap,
+                           compressok || rawok, &space);
+                       dsl_dataset_rele(fromsnap, FTAG);
+               } else if (strchr(fromname, '#') != NULL) {
+                       /*
+                        * If from is a bookmark, fetch the creation TXG of the
+                        * snapshot it was created from and use that to find
+                        * blocks that were born after it.
+                        */
+                       zfs_bookmark_phys_t frombm;
+
+                       error = dsl_bookmark_lookup(dp, fromname, tosnap,
+                           &frombm);
+                       if (error != 0)
+                               goto out;
+                       error = dmu_send_estimate_from_txg(tosnap,
+                           frombm.zbm_creation_txg, compressok || rawok,
+                           &space);
+               } else {
+                       /*
+                        * from is not properly formatted as a snapshot or
+                        * bookmark
+                        */
+                       error = SET_ERROR(EINVAL);
+                       goto out;
                 }
+       } else {
+               /*
+                * If estimating the size of a full send, use dmu_send_estimate.
+                */
+               error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
+                   &space);
         }
  
-       error = dmu_send_estimate(tosnap, fromsnap, &space);
         fnvlist_add_uint64(outnvl, "space", space);
  
-       if (fromsnap != NULL)
-               dsl_dataset_rele(fromsnap, FTAG);
+out:
         dsl_dataset_rele(tosnap, FTAG);
         dsl_pool_rele(dp, FTAG);
         return (error);
  }
  
+/*
+ * Sync the currently open TXG to disk for the specified pool.
+ * This is somewhat similar to 'zfs_sync()'.
+ * For cases that do not result in error this ioctl will wait for
+ * the currently open TXG to commit before returning back to the caller.
+ *
+ * innvl: {
+ *  "force" -> when true, force uberblock update even if there is no dirty data.
+ *             In addition this will cause the vdev configuration to be written
+ *             out including updating the zpool cache file. (boolean_t)
+ * }
+ *
+ * onvl is unused
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
+{
+       int err;
+       boolean_t force = B_FALSE;
+       spa_t *spa;
+
+       if ((err = spa_open(pool, &spa, FTAG)) != 0)
+               return (err);
+
+       if (innvl) {
+               if (nvlist_lookup_boolean_value(innvl, "force", &force) != 0) {
+                       err = SET_ERROR(EINVAL);
+                       goto out;
+               }
+       }
+
+       if (force) {
+               spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
+               vdev_config_dirty(spa->spa_root_vdev);
+               spa_config_exit(spa, SCL_CONFIG, FTAG);
+       }
+       txg_wait_synced(spa_get_dsl(spa), 0);
+out:
+       spa_close(spa, FTAG);
+
+       return (err);
+}
+
+/*
+ * Load a user's wrapping key into the kernel.
+ * innvl: {
+ *     "hidden_args" -> { "wkeydata" -> value }
+ *         raw uint8_t array of encryption wrapping key data (32 bytes)
+ *     (optional) "noop" -> (value ignored)
+ *         presence indicated key should only be verified, not loaded
+ * }
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       int ret;
+       dsl_crypto_params_t *dcp = NULL;
+       nvlist_t *hidden_args;
+       boolean_t noop = nvlist_exists(innvl, "noop");
+
+       if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+               ret = SET_ERROR(EINVAL);
+               goto error;
+       }
+
+       ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
+       if (ret != 0) {
+               ret = SET_ERROR(EINVAL);
+               goto error;
+       }
+
+       ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
+           hidden_args, &dcp);
+       if (ret != 0)
+               goto error;
+
+       ret = spa_keystore_load_wkey(dsname, dcp, noop);
+       if (ret != 0)
+               goto error;
+
+       dsl_crypto_params_free(dcp, noop);
+
+       return (0);
+
+error:
+       dsl_crypto_params_free(dcp, B_TRUE);
+       return (ret);
+}
+
+/*
+ * Unload a user's wrapping key from the kernel.
+ * Both innvl and outnvl are unused.
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       int ret = 0;
+
+       if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+               ret = (SET_ERROR(EINVAL));
+               goto out;
+       }
+
+       ret = spa_keystore_unload_wkey(dsname);
+       if (ret != 0)
+               goto out;
+
+out:
+       return (ret);
+}
+
+/*
+ * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
+ * keylocation, pbkdf2salt, and  pbkdf2iters properties can also be specified
+ * here to change how the key is derived in userspace.
+ *
+ * innvl: {
+ *    "hidden_args" (optional) -> { "wkeydata" -> value }
+ *         raw uint8_t array of new encryption wrapping key data (32 bytes)
+ *    "props" (optional) -> { prop -> value }
+ * }
+ *
+ * outnvl is unused
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       int ret;
+       uint64_t cmd = DCP_CMD_NONE;
+       dsl_crypto_params_t *dcp = NULL;
+       nvlist_t *args = NULL, *hidden_args = NULL;
+
+       if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+               ret = (SET_ERROR(EINVAL));
+               goto error;
+       }
+
+       (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
+       (void) nvlist_lookup_nvlist(innvl, "props", &args);
+       (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
+
+       ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
+       if (ret != 0)
+               goto error;
+
+       ret = spa_keystore_change_key(dsname, dcp);
+       if (ret != 0)
+               goto error;
+
+       dsl_crypto_params_free(dcp, B_FALSE);
+
+       return (0);
+
+error:
+       dsl_crypto_params_free(dcp, B_TRUE);
+       return (ret);
+}
  
  static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
  
@@ -5332,7 +6326,7 @@ zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
  
  static void
  zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
-       zfs_secpolicy_func_t *secpolicy)
+    zfs_secpolicy_func_t *secpolicy)
  {
         zfs_ioctl_register_legacy(ioc, func, secpolicy,
             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
@@ -5369,6 +6363,10 @@ zfs_ioctl_init(void)
             zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
  
+       zfs_ioctl_register("remap", ZFS_IOC_REMAP,
+           zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
+
         zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
             zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
@@ -5401,6 +6399,32 @@ zfs_ioctl_init(void)
             POOL_NAME,
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
  
+       zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
+           zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+       zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
+           zfs_ioc_load_key, zfs_secpolicy_load_key,
+           DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
+       zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
+           zfs_ioc_unload_key, zfs_secpolicy_load_key,
+           DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
+       zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
+           zfs_ioc_change_key, zfs_secpolicy_change_key,
+           DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
+           B_TRUE, B_TRUE);
+
+       zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
+           zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
+       zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
+           zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
+           B_TRUE);
+
+       zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
+           zfs_ioc_channel_program, zfs_secpolicy_config,
+           POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
+           B_TRUE);
+
         /* IOCTLS that use the legacy function signature */
  
         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -5472,9 +6496,7 @@ zfs_ioctl_init(void)
             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
  
         zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
-           zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
-       zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
-           zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
+           zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
  
         zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
             zfs_ioc_space_written);
@@ -5600,13 +6622,35 @@ zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
         return (ptr);
  }
  
-minor_t
-zfsdev_getminor(struct file *filp)
+int
+zfsdev_getminor(struct file *filp, minor_t *minorp)
  {
+       zfsdev_state_t *zs, *fpd;
+
         ASSERT(filp != NULL);
-       ASSERT(filp->private_data != NULL);
+       ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
+
+       fpd = filp->private_data;
+       if (fpd == NULL)
+               return (SET_ERROR(EBADF));
+
+       mutex_enter(&zfsdev_state_lock);
+
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+
+               if (zs->zs_minor == -1)
+                       continue;
  
-       return (((zfsdev_state_t *)filp->private_data)->zs_minor);
+               if (fpd == zs) {
+                       *minorp = fpd->zs_minor;
+                       mutex_exit(&zfsdev_state_lock);
+                       return (0);
+               }
+       }
+
+       mutex_exit(&zfsdev_state_lock);
+
+       return (SET_ERROR(EBADF));
  }
  
  /*
@@ -5733,6 +6777,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
         const zfs_ioc_vec_t *vec;
         char *saved_poolname = NULL;
         nvlist_t *innvl = NULL;
+       fstrans_cookie_t cookie;
  
         vecnum = cmd - ZFS_IOC_FIRST;
         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
@@ -5755,7 +6800,23 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
         }
  
         zc->zc_iflags = flag & FKIOCTL;
-       if (zc->zc_nvlist_src_size != 0) {
+       if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
+               /*
+                * Make sure the user doesn't pass in an insane value for
+                * zc_nvlist_src_size.  We have to check, since we will end
+                * up allocating that much memory inside of get_nvlist().  This
+                * prevents a nefarious user from allocating tons of kernel
+                * memory.
+                *
+                * Also, we return EINVAL instead of ENOMEM here.  The reason
+                * being that returning ENOMEM from an ioctl() has a special
+                * connotation; that the user's size value is too small and
+                * needs to be expanded to hold the nvlist.  See
+                * zcmd_expand_dst_nvlist() for details.
+                */
+               error = SET_ERROR(EINVAL);      /* User's size too big */
+
+       } else if (zc->zc_nvlist_src_size != 0) {
                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
                     zc->zc_iflags, &innvl);
                 if (error != 0)
@@ -5789,8 +6850,11 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
         }
  
  
-       if (error == 0 && !(flag & FKIOCTL))
+       if (error == 0) {
+               cookie = spl_fstrans_mark();
                 error = vec->zvec_secpolicy(zc, innvl, CRED());
+               spl_fstrans_unmark(cookie);
+       }
  
         if (error != 0)
                 goto out;
@@ -5827,14 +6891,27 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                 }
  
                 outnvl = fnvlist_alloc();
+               cookie = spl_fstrans_mark();
                 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
+               spl_fstrans_unmark(cookie);
  
-               if (error == 0 && vec->zvec_allow_log &&
+               /*
+                * Some commands can partially execute, modify state, and still
+                * return an error.  In these cases, attempt to record what
+                * was modified.
+                */
+               if ((error == 0 ||
+                   (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
+                   vec->zvec_allow_log &&
                     spa_open(zc->zc_name, &spa, FTAG) == 0) {
                         if (!nvlist_empty(outnvl)) {
                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
                                     outnvl);
                         }
+                       if (error != 0) {
+                               fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
+                                   error);
+                       }
                         (void) spa_history_log_nvl(spa, lognv);
                         spa_close(spa, FTAG);
                 }
@@ -5855,7 +6932,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
  
                 nvlist_free(outnvl);
         } else {
+               cookie = spl_fstrans_mark();
                 error = vec->zvec_legacy_func(zc);
+               spl_fstrans_unmark(cookie);
         }
  
  out:
@@ -5896,11 +6975,14 @@ static const struct file_operations zfsdev_fops = {
  };
  
  static struct miscdevice zfs_misc = {
-       .minor          = MISC_DYNAMIC_MINOR,
+       .minor          = ZFS_MINOR,
         .name           = ZFS_DRIVER,
         .fops           = &zfsdev_fops,
  };
  
+MODULE_ALIAS_MISCDEV(ZFS_MINOR);
+MODULE_ALIAS("devname:zfs");
+
  static int
  zfs_attach(void)
  {
@@ -5911,24 +6993,32 @@ zfs_attach(void)
         zfsdev_state_list->zs_minor = -1;
  
         error = misc_register(&zfs_misc);
-       if (error != 0) {
-               printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
-               return (error);
+       if (error == -EBUSY) {
+               /*
+                * Fallback to dynamic minor allocation in the event of a
+                * collision with a reserved minor in linux/miscdevice.h.
+                * In this case the kernel modules must be manually loaded.
+                */
+               printk(KERN_INFO "ZFS: misc_register() with static minor %d "
+                   "failed %d, retrying with MISC_DYNAMIC_MINOR\n",
+                   ZFS_MINOR, error);
+
+               zfs_misc.minor = MISC_DYNAMIC_MINOR;
+               error = misc_register(&zfs_misc);
         }
  
-       return (0);
+       if (error)
+               printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
+
+       return (error);
  }
  
  static void
  zfs_detach(void)
  {
-       int error;
         zfsdev_state_t *zs, *zsprev = NULL;
  
-       error = misc_deregister(&zfs_misc);
-       if (error != 0)
-               printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error);
-
+       misc_deregister(&zfs_misc);
         mutex_destroy(&zfsdev_state_lock);
  
         for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
@@ -5944,7 +7034,9 @@ static void
  zfs_allow_log_destroy(void *arg)
  {
         char *poolname = arg;
-       strfree(poolname);
+
+       if (poolname != NULL)
+               strfree(poolname);
  }
  
  #ifdef DEBUG
@@ -5958,23 +7050,23 @@ _init(void)
  {
         int error;
  
-       error = vn_set_pwd("/");
+       error = -vn_set_pwd("/");
         if (error) {
                 printk(KERN_NOTICE
                     "ZFS: Warning unable to set pwd to '/': %d\n", error);
                 return (error);
         }
  
+       if ((error = -zvol_init()) != 0)
+               return (error);
+
         spa_init(FREAD | FWRITE);
         zfs_init();
  
-       if ((error = zvol_init()) != 0)
-               goto out1;
-
         zfs_ioctl_init();
  
         if ((error = zfs_attach()) != 0)
-               goto out2;
+               goto out;
  
         tsd_create(&zfs_fsyncer_key, NULL);
         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
@@ -5990,11 +7082,10 @@ _init(void)
  
         return (0);
  
-out2:
-       (void) zvol_fini();
-out1:
+out:
         zfs_fini();
         spa_fini();
+       (void) zvol_fini();
         printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
             ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
             ZFS_DEBUG_STR, error);
@@ -6006,9 +7097,9 @@ static void __exit
  _fini(void)
  {
         zfs_detach();
-       zvol_fini();
         zfs_fini();
         spa_fini();
+       zvol_fini();
  
         tsd_destroy(&zfs_fsyncer_key);
         tsd_destroy(&rrw_tsd_key);