Add `zfs allow` and `zfs unallow` support

[mirror_zfs.git] / module / zfs / zfs_ioctl.c
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c

index acc54e5a7830d488148373540370edb3833d1def..c63af167af644a0745d8cb0036cbf10c1003de8f 100644 (file)
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -24,11 +24,12 @@
   * Portions Copyright 2011 Martin Matuska
   * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
   * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
   */
  
  /*
@@ -180,18 +181,26 @@
  
  #include <sys/dmu_send.h>
  #include <sys/dsl_destroy.h>
+#include <sys/dsl_bookmark.h>
  #include <sys/dsl_userhold.h>
  #include <sys/zfeature.h>
  
  #include <linux/miscdevice.h>
+#include <linux/slab.h>
  
  #include "zfs_namecheck.h"
  #include "zfs_prop.h"
  #include "zfs_deleg.h"
  #include "zfs_comutil.h"
  
+/*
+ * Limit maximum nvlist size.  We don't want users passing in insane values
+ * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
+ */
+#define        MAX_NVLIST_SRC_SIZE     KMALLOC_MAX_SIZE
+
  kmutex_t zfsdev_state_lock;
-list_t zfsdev_state_list;
+zfsdev_state_t *zfsdev_state_list;
  
  extern void zfs_init(void);
  extern void zfs_fini(void);
@@ -245,8 +254,6 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
  int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
  static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
  
-static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature);
-
  static void
  history_str_free(char *buf)
  {
@@ -261,7 +268,7 @@ history_str_get(zfs_cmd_t *zc)
         if (zc->zc_history == 0)
                 return (NULL);
  
-       buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG);
+       buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
         if (copyinstr((void *)(uintptr_t)zc->zc_history,
             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
                 history_str_free(buf);
@@ -291,9 +298,7 @@ zfs_is_bootfs(const char *name)
  }
  
  /*
- * zfs_earlier_version
- *
- *     Return non-zero if the spa version is less than requested version.
+ * Return non-zero if the spa version is less than requested version.
   */
  static int
  zfs_earlier_version(const char *name, int version)
@@ -311,8 +316,6 @@ zfs_earlier_version(const char *name, int version)
  }
  
  /*
- * zpl_earlier_version
- *
   * Return TRUE if the ZPL version is less than requested version.
   */
  static boolean_t
@@ -376,7 +379,7 @@ zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
             zone_dataset_visible(zc->zc_name, NULL))
                 return (0);
  
-       return (ENOENT);
+       return (SET_ERROR(ENOENT));
  }
  
  static int
@@ -390,7 +393,7 @@ zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
          */
         if (!INGLOBALZONE(curproc) &&
             !zone_dataset_visible(dataset, &writable))
-               return (ENOENT);
+               return (SET_ERROR(ENOENT));
  
         if (INGLOBALZONE(curproc)) {
                 /*
@@ -398,17 +401,17 @@ zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
                  * global zone.
                  */
                 if (secpolicy_zfs(cr) && zoned)
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
         } else {
                 /*
                  * If we are in a local zone, the 'zoned' property must be set.
                  */
                 if (!zoned)
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
  
                 /* must be writable by this zone */
                 if (!writable)
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
         }
         return (0);
  }
@@ -419,7 +422,7 @@ zfs_dozonecheck(const char *dataset, cred_t *cr)
         uint64_t zoned;
  
         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
-               return (ENOENT);
+               return (SET_ERROR(ENOENT));
  
         return (zfs_dozonecheck_impl(dataset, zoned, cr));
  }
@@ -430,7 +433,7 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
         uint64_t zoned;
  
         if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
-               return (ENOENT);
+               return (SET_ERROR(ENOENT));
  
         return (zfs_dozonecheck_impl(dataset, zoned, cr));
  }
@@ -494,14 +497,14 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
         if (error != 0)
-               return (EPERM);
+               return (SET_ERROR(EPERM));
  
         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
                 new_default = TRUE;
  
         /* The label must be translatable */
         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         /*
          * In a non-global zone, disallow attempts to set a label that
@@ -510,7 +513,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
          */
         if (!INGLOBALZONE(curproc)) {
                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
                 return (0);
         }
  
@@ -521,10 +524,10 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
          */
         if (dsl_prop_get_integer(name,
             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
-               return (EPERM);
+               return (SET_ERROR(EPERM));
         if (!zoned) {
                 if (zfs_check_global_label(name, strval) != 0)
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
         }
  
         /*
@@ -544,7 +547,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
                     setsl_tag, &os);
                 if (error != 0)
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
  
                 dmu_objset_disown(os, setsl_tag);
  
@@ -554,7 +557,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
                 }
  
                 if (hexstr_to_label(strval, &new_sl) != 0)
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
  
                 if (blstrictdom(&ds_sl, &new_sl))
                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
@@ -571,7 +574,7 @@ out_check:
                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
         return (0);
  #else
-       return ENOTSUP;
+       return (ENOTSUP);
  #endif /* HAVE_MLSLABEL */
  }
  
@@ -592,29 +595,31 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
                  * Disallow setting of 'zoned' from within a local zone.
                  */
                 if (!INGLOBALZONE(curproc))
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
                 break;
  
         case ZFS_PROP_QUOTA:
+       case ZFS_PROP_FILESYSTEM_LIMIT:
+       case ZFS_PROP_SNAPSHOT_LIMIT:
                 if (!INGLOBALZONE(curproc)) {
                         uint64_t zoned;
                         char setpoint[MAXNAMELEN];
                         /*
                          * Unprivileged users are allowed to modify the
-                        * quota on things *under* (ie. contained by)
+                        * limit on things *under* (ie. contained by)
                          * the thing they own.
                          */
                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
                             setpoint))
-                               return (EPERM);
+                               return (SET_ERROR(EPERM));
                         if (!zoned || strlen(dsname) <= strlen(setpoint))
-                               return (EPERM);
+                               return (SET_ERROR(EPERM));
                 }
                 break;
  
         case ZFS_PROP_MLSLABEL:
                 if (!is_system_labeled())
-                       return (EPERM);
+                       return (SET_ERROR(EPERM));
  
                 if (nvpair_value_string(propval, &strval) == 0) {
                         int err;
@@ -669,7 +674,7 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
          */
         cp = strchr(zc->zc_name, '@');
         if (cp == NULL)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
         if (error != 0)
                 return (error);
@@ -716,7 +721,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
             zc->zc_name) != 0)) {
                 VN_RELE(vp);
-               return (EPERM);
+               return (SET_ERROR(EPERM));
         }
  
         VN_RELE(vp);
@@ -730,7 +735,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
  #ifdef HAVE_SMB_SHARE
         if (!INGLOBALZONE(curproc))
-               return (EPERM);
+               return (SET_ERROR(EPERM));
  
         if (secpolicy_nfs(cr) == 0) {
                 return (0);
@@ -738,7 +743,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
         }
  #else
-       return (ENOTSUP);
+       return (SET_ERROR(ENOTSUP));
  #endif /* HAVE_SMB_SHARE */
  }
  
@@ -747,7 +752,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
  #ifdef HAVE_SMB_SHARE
         if (!INGLOBALZONE(curproc))
-               return (EPERM);
+               return (SET_ERROR(EPERM));
  
         if (secpolicy_smb(cr) == 0) {
                 return (0);
@@ -755,7 +760,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
         }
  #else
-       return (ENOTSUP);
+       return (SET_ERROR(ENOTSUP));
  #endif /* HAVE_SMB_SHARE */
  }
  
@@ -774,7 +779,7 @@ zfs_get_parent(const char *datasetname, char *parent, int parentsize)
         } else {
                 cp = strrchr(parent, '/');
                 if (cp == NULL)
-                       return (ENOENT);
+                       return (SET_ERROR(ENOENT));
                 cp[0] = '\0';
         }
  
@@ -813,25 +818,12 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
         int error = 0;
  
         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
             pair = nextpair) {
-               dsl_pool_t *dp;
-               dsl_dataset_t *ds;
-
-               error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp);
-               if (error != 0)
-                       break;
                 nextpair = nvlist_next_nvpair(snaps, pair);
-               error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds);
-               if (error == 0)
-                       dsl_dataset_rele(ds, FTAG);
-               dsl_pool_rele(dp, FTAG);
-
-               if (error == 0) {
-                       error = zfs_secpolicy_destroy_perms(nvpair_name(pair),
-                           cr);
-               } else if (error == ENOENT) {
+               error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
+               if (error == ENOENT) {
                         /*
                          * Ignore any snapshots that don't exist (we consider
                          * them "already destroyed").  Remove the name from the
@@ -912,7 +904,7 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                 dd = clone->ds_dir;
  
                 error = dsl_dataset_hold_obj(dd->dd_pool,
-                   dd->dd_phys->dd_origin_obj, FTAG, &origin);
+                   dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
                 if (error != 0) {
                         dsl_dataset_rele(clone, FTAG);
                         dsl_pool_rele(dp, FTAG);
@@ -971,14 +963,14 @@ zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
         nvpair_t *pair;
  
         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
             pair = nvlist_next_nvpair(snaps, pair)) {
                 char *name = nvpair_name(pair);
                 char *atp = strchr(name, '@');
  
                 if (atp == NULL) {
-                       error = EINVAL;
+                       error = SET_ERROR(EINVAL);
                         break;
                 }
                 *atp = '\0';
@@ -990,6 +982,76 @@ zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
         return (error);
  }
  
+/*
+ * Check for permission to create each snapshot in the nvlist.
+ */
+/* ARGSUSED */
+static int
+zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       int error = 0;
+       nvpair_t *pair;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
+               char *name = nvpair_name(pair);
+               char *hashp = strchr(name, '#');
+
+               if (hashp == NULL) {
+                       error = SET_ERROR(EINVAL);
+                       break;
+               }
+               *hashp = '\0';
+               error = zfs_secpolicy_write_perms(name,
+                   ZFS_DELEG_PERM_BOOKMARK, cr);
+               *hashp = '#';
+               if (error != 0)
+                       break;
+       }
+       return (error);
+}
+
+/* ARGSUSED */
+static int
+zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       nvpair_t *pair, *nextpair;
+       int error = 0;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
+           pair = nextpair) {
+               char *name = nvpair_name(pair);
+               char *hashp = strchr(name, '#');
+               nextpair = nvlist_next_nvpair(innvl, pair);
+
+               if (hashp == NULL) {
+                       error = SET_ERROR(EINVAL);
+                       break;
+               }
+
+               *hashp = '\0';
+               error = zfs_secpolicy_write_perms(name,
+                   ZFS_DELEG_PERM_DESTROY, cr);
+               *hashp = '#';
+               if (error == ENOENT) {
+                       /*
+                        * Ignore any filesystems that don't exist (we consider
+                        * their bookmarks "already destroyed").  Remove
+                        * the name from the nvl here in case the filesystem
+                        * is created between now and when we try to destroy
+                        * the bookmark (in which case we don't want to
+                        * destroy it since we haven't checked for permission).
+                        */
+                       fnvlist_remove_nvpair(innvl, pair);
+                       error = 0;
+               }
+               if (error != 0)
+                       break;
+       }
+
+       return (error);
+}
+
  /* ARGSUSED */
  static int
  zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
@@ -999,7 +1061,7 @@ zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
          * to log to.
          */
         if (tsd_get(zfs_allow_log_key) == NULL)
-               return (EPERM);
+               return (SET_ERROR(EPERM));
         return (0);
  }
  
@@ -1036,7 +1098,7 @@ static int
  zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
         if (secpolicy_sys_config(cr, B_FALSE) != 0)
-               return (EPERM);
+               return (SET_ERROR(EPERM));
  
         return (0);
  }
@@ -1075,7 +1137,7 @@ zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  
         if (prop == ZPROP_INVAL) {
                 if (!zfs_prop_user(zc->zc_value))
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
                 return (zfs_secpolicy_write_perms(zc->zc_name,
                     ZFS_DELEG_PERM_USERPROP, cr));
         } else {
@@ -1092,7 +1154,7 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                 return (err);
  
         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         if (zc->zc_value[0] == 0) {
                 /*
@@ -1121,7 +1183,7 @@ zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                 return (err);
  
         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         return (zfs_secpolicy_write_perms(zc->zc_name,
             userquota_perms[zc->zc_objset_type], cr));
@@ -1145,7 +1207,7 @@ zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  
         error = nvlist_lookup_nvlist(innvl, "holds", &holds);
         if (error != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
             pair = nvlist_next_nvpair(holds, pair)) {
@@ -1223,22 +1285,22 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
          * Read in and unpack the user-supplied nvlist.
          */
         if (size == 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
-       packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
+       packed = vmem_alloc(size, KM_SLEEP);
  
         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
             iflag)) != 0) {
-               kmem_free(packed, size);
-               return (error);
+               vmem_free(packed, size);
+               return (SET_ERROR(EFAULT));
         }
  
         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
-               kmem_free(packed, size);
+               vmem_free(packed, size);
                 return (error);
         }
  
-       kmem_free(packed, size);
+       vmem_free(packed, size);
  
         *nvp = list;
         return (0);
@@ -1262,7 +1324,7 @@ nvlist_smush(nvlist_t *errors, size_t max)
                 int n = 0;
  
                 if (max < 1024)
-                       return (ENOMEM);
+                       return (SET_ERROR(ENOMEM));
  
                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
                 more_errors = nvlist_prev_nvpair(errors, NULL);
@@ -1293,12 +1355,12 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
         size = fnvlist_size(nvl);
  
         if (size > zc->zc_nvlist_dst_size) {
-               error = ENOMEM;
+               error = SET_ERROR(ENOMEM);
         } else {
                 packed = fnvlist_pack(nvl, &size);
                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
                     size, zc->zc_iflags) != 0)
-                       error = EFAULT;
+                       error = SET_ERROR(EFAULT);
                 fnvlist_pack_free(packed, size);
         }
  
@@ -1318,7 +1380,7 @@ get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
                 return (error);
         if (dmu_objset_type(os) != DMU_OST_ZFS) {
                 dmu_objset_rele(os, FTAG);
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
  
         mutex_enter(&os->os_user_ptr_lock);
@@ -1326,7 +1388,7 @@ get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
         if (*zsbp && (*zsbp)->z_sb) {
                 atomic_inc(&((*zsbp)->z_sb->s_active));
         } else {
-               error = ESRCH;
+               error = SET_ERROR(ESRCH);
         }
         mutex_exit(&os->os_user_ptr_lock);
         dmu_objset_rele(os, FTAG);
@@ -1345,18 +1407,18 @@ zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
         int error = 0;
  
         if (get_zfs_sb(name, zsbp) != 0)
-               error = zfs_sb_create(name, zsbp);
+               error = zfs_sb_create(name, NULL, zsbp);
         if (error == 0) {
-               rrw_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
+               rrm_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
                     RW_READER, tag);
                 if ((*zsbp)->z_unmounted) {
                         /*
                          * XXX we could probably try again, since the unmounting
                          * thread should be just about to disassociate the
-                        * objset from the zfsvfs.
+                        * objset from the zsb.
                          */
-                       rrw_exit(&(*zsbp)->z_teardown_lock, tag);
-                       return (EBUSY);
+                       rrm_exit(&(*zsbp)->z_teardown_lock, tag);
+                       return (SET_ERROR(EBUSY));
                 }
         }
         return (error);
@@ -1365,7 +1427,7 @@ zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
  static void
  zfs_sb_rele(zfs_sb_t *zsb, void *tag)
  {
-       rrw_exit(&zsb->z_teardown_lock, tag);
+       rrm_exit(&zsb->z_teardown_lock, tag);
  
         if (zsb->z_sb) {
                 deactivate_super(zsb->z_sb);
@@ -1401,7 +1463,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
                 (void) nvlist_lookup_uint64(props,
                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
-                       error = EINVAL;
+                       error = SET_ERROR(EINVAL);
                         goto pool_props_bad;
                 }
                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
@@ -1445,8 +1507,7 @@ zfs_ioc_pool_destroy(zfs_cmd_t *zc)
         int error;
         zfs_log_history(zc);
         error = spa_destroy(zc->zc_name);
-       if (error == 0)
-               zvol_remove_minors(zc->zc_name);
+
         return (error);
  }
  
@@ -1470,7 +1531,7 @@ zfs_ioc_pool_import(zfs_cmd_t *zc)
  
         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
             guid != zc->zc_guid)
-               error = EINVAL;
+               error = SET_ERROR(EINVAL);
         else
                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
  
@@ -1482,9 +1543,7 @@ zfs_ioc_pool_import(zfs_cmd_t *zc)
         }
  
         nvlist_free(config);
-
-       if (props)
-               nvlist_free(props);
+       nvlist_free(props);
  
         return (error);
  }
@@ -1498,8 +1557,7 @@ zfs_ioc_pool_export(zfs_cmd_t *zc)
  
         zfs_log_history(zc);
         error = spa_export(zc->zc_name, NULL, force, hardforce);
-       if (error == 0)
-               zvol_remove_minors(zc->zc_name);
+
         return (error);
  }
  
@@ -1510,7 +1568,7 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
         int error;
  
         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
-               return (EEXIST);
+               return (SET_ERROR(EEXIST));
  
         error = put_nvlist(zc, configs);
  
@@ -1574,7 +1632,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
         nvlist_free(tryconfig);
  
         if (config == NULL)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         error = put_nvlist(zc, config);
         nvlist_free(config);
@@ -1632,7 +1690,7 @@ zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
         if (zc->zc_cookie < spa_version(spa) ||
             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
                 spa_close(spa, FTAG);
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
  
         spa_upgrade(spa, zc->zc_cookie);
@@ -1650,14 +1708,14 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc)
         int error;
  
         if ((size = zc->zc_history_len) == 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
                 return (error);
  
         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
                 spa_close(spa, FTAG);
-               return (ENOTSUP);
+               return (SET_ERROR(ENOTSUP));
         }
  
         hist_buf = vmem_alloc(size, KM_SLEEP);
@@ -1712,7 +1770,7 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc)
                 return (error);
         if (dmu_objset_type(os) != DMU_OST_ZFS) {
                 dmu_objset_rele(os, FTAG);
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
             sizeof (zc->zc_value));
@@ -1741,7 +1799,7 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
                 return (error);
         if (dmu_objset_type(os) != DMU_OST_ZFS) {
                 dmu_objset_rele(os, FTAG);
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
             sizeof (zc->zc_value));
@@ -1755,8 +1813,7 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc)
  {
         spa_t *spa;
         int error;
-       nvlist_t *config, **l2cache, **spares;
-       uint_t nl2cache = 0, nspares = 0;
+       nvlist_t *config;
  
         error = spa_open(zc->zc_name, &spa, FTAG);
         if (error != 0)
@@ -1764,28 +1821,6 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc)
  
         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
             zc->zc_iflags, &config);
-       (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
-           &l2cache, &nl2cache);
-
-       (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
-           &spares, &nspares);
-
-       /*
-        * A root pool with concatenated devices is not supported.
-        * Thus, can not add a device to a root pool.
-        *
-        * Intent log device can not be added to a rootpool because
-        * during mountroot, zil is replayed, a seperated log device
-        * can not be accessed during the mountroot time.
-        *
-        * l2cache and spare devices are ok to be added to a rootpool.
-        */
-       if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
-               nvlist_free(config);
-               spa_close(spa, FTAG);
-               return (EDOM);
-       }
-
         if (error == 0) {
                 error = spa_vdev_add(spa, config);
                 nvlist_free(config);
@@ -1849,7 +1884,7 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
                 break;
  
         default:
-               error = EINVAL;
+               error = SET_ERROR(EINVAL);
         }
         zc->zc_cookie = newstate;
         spa_close(spa, FTAG);
@@ -2044,7 +2079,7 @@ zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
          * SPA_VERSION_RECVD_PROPS.
          */
         if (!dsl_prop_get_hasrecvd(zc->zc_name))
-               return (ENOTSUP);
+               return (SET_ERROR(ENOTSUP));
  
         if (zc->zc_nvlist_dst != 0 &&
             (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
@@ -2110,13 +2145,13 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
                         err = put_nvlist(zc, nv);
                 nvlist_free(nv);
         } else {
-               err = ENOENT;
+               err = SET_ERROR(ENOENT);
         }
         dmu_objset_rele(os, FTAG);
         return (err);
  }
  
-static boolean_t
+boolean_t
  dataset_name_hidden(const char *name)
  {
         /*
@@ -2157,7 +2192,7 @@ zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
  top:
         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
                 if (error == ENOENT)
-                       error = ESRCH;
+                       error = SET_ERROR(ESRCH);
                 return (error);
         }
  
@@ -2171,7 +2206,7 @@ top:
                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
                     NULL, &zc->zc_cookie);
                 if (error == ENOENT)
-                       error = ESRCH;
+                       error = SET_ERROR(ESRCH);
         } while (error == 0 && dataset_name_hidden(zc->zc_name));
         dmu_objset_rele(os, FTAG);
  
@@ -2219,7 +2254,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
          */
         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
                 dmu_objset_rele(os, FTAG);
-               return (ESRCH);
+               return (SET_ERROR(ESRCH));
         }
  
         error = dmu_snapshot_list_next(os,
@@ -2241,7 +2276,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
                         dsl_dataset_rele(ds, FTAG);
                 }
         } else if (error == ENOENT) {
-               error = ESRCH;
+               error = SET_ERROR(ESRCH);
         }
  
         dmu_objset_rele(os, FTAG);
@@ -2270,7 +2305,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
                     &pair) != 0)
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
         }
  
         /*
@@ -2280,7 +2315,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
         if ((dash = strchr(propname, '-')) == NULL ||
             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
             vallen != 3)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         domain = dash + 1;
         type = valary[0];
@@ -2311,7 +2346,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
         const char *propname = nvpair_name(pair);
         zfs_prop_t prop = zfs_name_to_prop(propname);
         uint64_t intval;
-       int err;
+       int err = -1;
  
         if (prop == ZPROP_INVAL) {
                 if (zfs_prop_userquota(propname))
@@ -2338,6 +2373,21 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
         case ZFS_PROP_REFQUOTA:
                 err = dsl_dataset_set_refquota(dsname, source, intval);
                 break;
+       case ZFS_PROP_FILESYSTEM_LIMIT:
+       case ZFS_PROP_SNAPSHOT_LIMIT:
+               if (intval == UINT64_MAX) {
+                       /* clearing the limit, just do it */
+                       err = 0;
+               } else {
+                       err = dsl_dir_activate_fs_ss_limit(dsname);
+               }
+               /*
+                * Set err to -1 to force the zfs_set_prop_nvlist code down the
+                * default path to set the value in the nvlist.
+                */
+               if (err == 0)
+                       err = -1;
+               break;
         case ZFS_PROP_RESERVATION:
                 err = dsl_dir_set_reservation(dsname, source, intval);
                 break;
@@ -2348,7 +2398,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                 err = zvol_set_volsize(dsname, intval);
                 break;
         case ZFS_PROP_SNAPDEV:
-               err = zvol_set_snapdev(dsname, intval);
+               err = zvol_set_snapdev(dsname, source, intval);
                 break;
         case ZFS_PROP_VERSION:
         {
@@ -2363,46 +2413,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
                         zfs_cmd_t *zc;
  
-                       zc = kmem_zalloc(sizeof (zfs_cmd_t),
-                           KM_SLEEP | KM_NODEBUG);
+                       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
                         (void) strcpy(zc->zc_name, dsname);
                         (void) zfs_ioc_userspace_upgrade(zc);
                         kmem_free(zc, sizeof (zfs_cmd_t));
                 }
                 break;
         }
-       case ZFS_PROP_COMPRESSION:
-       {
-               if (intval == ZIO_COMPRESS_LZ4) {
-                       zfeature_info_t *feature =
-                           &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
-                       spa_t *spa;
-
-                       if ((err = spa_open(dsname, &spa, FTAG)) != 0)
-                               return (err);
-
-                       /*
-                        * Setting the LZ4 compression algorithm activates
-                        * the feature.
-                        */
-                       if (!spa_feature_is_active(spa, feature)) {
-                               if ((err = zfs_prop_activate_feature(spa,
-                                   feature)) != 0) {
-                                       spa_close(spa, FTAG);
-                                       return (err);
-                               }
-                       }
-
-                       spa_close(spa, FTAG);
-               }
-               /*
-                * We still want the default set action to be performed in the
-                * caller, we only performed zfeature settings here.
-                */
-               err = -1;
-               break;
-       }
-
         default:
                 err = -1;
         }
@@ -2446,25 +2463,25 @@ retry:
                         attrs = fnvpair_value_nvlist(pair);
                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
                             &propval) != 0)
-                               err = EINVAL;
+                               err = SET_ERROR(EINVAL);
                 }
  
                 /* Validate value type */
                 if (err == 0 && prop == ZPROP_INVAL) {
                         if (zfs_prop_user(propname)) {
                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
-                                       err = EINVAL;
+                                       err = SET_ERROR(EINVAL);
                         } else if (zfs_prop_userquota(propname)) {
                                 if (nvpair_type(propval) !=
                                     DATA_TYPE_UINT64_ARRAY)
-                                       err = EINVAL;
+                                       err = SET_ERROR(EINVAL);
                         } else {
-                               err = EINVAL;
+                               err = SET_ERROR(EINVAL);
                         }
                 } else if (err == 0) {
                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
-                                       err = EINVAL;
+                                       err = SET_ERROR(EINVAL);
                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
                                 const char *unused;
  
@@ -2474,19 +2491,19 @@ retry:
                                 case PROP_TYPE_NUMBER:
                                         break;
                                 case PROP_TYPE_STRING:
-                                       err = EINVAL;
+                                       err = SET_ERROR(EINVAL);
                                         break;
                                 case PROP_TYPE_INDEX:
                                         if (zfs_prop_index_to_string(prop,
                                             intval, &unused) != 0)
-                                               err = EINVAL;
+                                               err = SET_ERROR(EINVAL);
                                         break;
                                 default:
                                         cmn_err(CE_PANIC,
                                             "unknown property type");
                                 }
                         } else {
-                               err = EINVAL;
+                               err = SET_ERROR(EINVAL);
                         }
                 }
  
@@ -2579,22 +2596,20 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl)
  
         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
                 const char *propname = nvpair_name(pair);
-               char *valstr;
  
                 if (!zfs_prop_user(propname) ||
                     nvpair_type(pair) != DATA_TYPE_STRING)
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
  
                 if ((error = zfs_secpolicy_write_perms(fsname,
                     ZFS_DELEG_PERM_USERPROP, CRED())))
                         return (error);
  
                 if (strlen(propname) >= ZAP_MAXNAMELEN)
-                       return (ENAMETOOLONG);
+                       return (SET_ERROR(ENAMETOOLONG));
  
-               VERIFY(nvpair_value_string(pair, &valstr) == 0);
-               if (strlen(valstr) >= ZAP_MAXVALUELEN)
-                       return (E2BIG);
+               if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
+                       return (SET_ERROR(E2BIG));
         }
         return (0);
  }
@@ -2714,12 +2729,12 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
                  */
                 if (prop == ZPROP_INVAL) {
                         if (!zfs_prop_user(propname))
-                               return (EINVAL);
+                               return (SET_ERROR(EINVAL));
  
                         type = PROP_TYPE_STRING;
                 } else if (prop == ZFS_PROP_VOLSIZE ||
                     prop == ZFS_PROP_VERSION) {
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
                 } else {
                         type = zfs_prop_get_type(prop);
                 }
@@ -2736,7 +2751,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
                         break;
                 default:
                         nvlist_free(dummy);
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
                 }
  
                 pair = nvlist_next_nvpair(dummy, NULL);
@@ -2752,7 +2767,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
                  * they are not considered inheritable.
                  */
                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
         }
  
         /* property name has been validated by zfs_secpolicy_inherit_prop() */
@@ -2829,36 +2844,12 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc)
         if (error == 0 && zc->zc_nvlist_dst != 0)
                 error = put_nvlist(zc, nvp);
         else
-               error = EFAULT;
+               error = SET_ERROR(EFAULT);
  
         nvlist_free(nvp);
         return (error);
  }
  
-/*
- * inputs:
- * zc_name              name of volume
- *
- * outputs:             none
- */
-static int
-zfs_ioc_create_minor(zfs_cmd_t *zc)
-{
-       return (zvol_create_minor(zc->zc_name));
-}
-
-/*
- * inputs:
- * zc_name              name of volume
- *
- * outputs:             none
- */
-static int
-zfs_ioc_remove_minor(zfs_cmd_t *zc)
-{
-       return (zvol_remove_minor(zc->zc_name));
-}
-
  /*
   * inputs:
   * zc_name             name of filesystem
@@ -2882,7 +2873,7 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc)
          */
         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
                 nvlist_free(fsaclnv);
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
  
         /*
@@ -2943,10 +2934,10 @@ zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
  
  /*
   * inputs:
- * createprops         list of properties requested by creator
- * default_zplver      zpl version to use if unspecified in createprops
- * fuids_ok            fuids allowed in this version of the spa?
   * os                  parent objset pointer (NULL if root fs)
+ * fuids_ok            fuids allowed in this version of the spa?
+ * sa_ok               SAs allowed in this version of the spa?
+ * createprops         list of properties requested by creator
   *
   * outputs:
   * zplprops    values for the zplprops we attach to the master node object
@@ -3005,7 +2996,7 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
             (zplver < ZPL_VERSION_NORMALIZATION &&
             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
             sense != ZFS_PROP_UNDEFINED)))
-               return (ENOTSUP);
+               return (SET_ERROR(ENOTSUP));
  
         /*
          * Put the version in the zplprops
@@ -3120,7 +3111,7 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         boolean_t is_insensitive = B_FALSE;
  
         if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         type = type32;
         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
  
@@ -3139,32 +3130,32 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         }
         if (strchr(fsname, '@') ||
             strchr(fsname, '%'))
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         zct.zct_props = nvprops;
  
         if (cbfunc == NULL)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         if (type == DMU_OST_ZVOL) {
                 uint64_t volsize, volblocksize;
  
                 if (nvprops == NULL)
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
                 if (nvlist_lookup_uint64(nvprops,
                     zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
  
                 if ((error = nvlist_lookup_uint64(nvprops,
                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
                     &volblocksize)) != 0 && error != ENOENT)
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
  
                 if (error != 0)
                         volblocksize = zfs_prop_default_numeric(
                             ZFS_PROP_VOLBLOCKSIZE);
  
-               if ((error = zvol_check_volblocksize(
+               if ((error = zvol_check_volblocksize(fsname,
                     volblocksize)) != 0 ||
                     (error = zvol_check_volsize(volsize,
                     volblocksize)) != 0)
@@ -3198,8 +3189,25 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         if (error == 0) {
                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
                     nvprops, outnvl);
-               if (error != 0)
-                       (void) dsl_destroy_head(fsname);
+               if (error != 0) {
+                       spa_t *spa;
+                       int error2;
+
+                       /*
+                        * Volumes will return EBUSY and cannot be destroyed
+                        * until all asynchronous minor handling has completed.
+                        * Wait for the spa_zvol_taskq to drain then retry.
+                        */
+                       error2 = dsl_destroy_head(fsname);
+                       while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
+                               error2 = spa_open(fsname, &spa, FTAG);
+                               if (error2 == 0) {
+                                       taskq_wait(spa->spa_zvol_taskq);
+                                       spa_close(spa, FTAG);
+                               }
+                               error2 = dsl_destroy_head(fsname);
+                       }
+               }
         }
         return (error);
  }
@@ -3221,15 +3229,15 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         char *origin_name;
  
         if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
  
         if (strchr(fsname, '@') ||
             strchr(fsname, '%'))
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         if (dataset_namecheck(origin_name, NULL, NULL) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         error = dmu_objset_clone(fsname, origin_name);
         if (error != 0)
                 return (error);
@@ -3268,10 +3276,10 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  
         if (!nvlist_empty(props) &&
             zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
-               return (ENOTSUP);
+               return (SET_ERROR(ENOTSUP));
  
         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         poollen = strlen(poolname);
         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
             pair = nvlist_next_nvpair(snaps, pair)) {
@@ -3282,27 +3290,29 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
                  * The snap name must contain an @, and the part after it must
                  * contain only valid characters.
                  */
-               if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
-                       return (EINVAL);
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
+                       return (SET_ERROR(EINVAL));
  
                 /*
                  * The snap must be in the specified pool.
                  */
                 if (strncmp(name, poolname, poollen) != 0 ||
                     (name[poollen] != '/' && name[poollen] != '@'))
-                       return (EXDEV);
+                       return (SET_ERROR(EXDEV));
  
                 /* This must be the only snap of this fs. */
                 for (pair2 = nvlist_next_nvpair(snaps, pair);
                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
                         if (strncmp(name, nvpair_name(pair2), cp - name + 1)
                             == 0) {
-                               return (EXDEV);
+                               return (SET_ERROR(EXDEV));
                         }
                 }
         }
  
         error = dsl_dataset_snapshot(snaps, props, outnvl);
+
         return (error);
  }
  
@@ -3334,12 +3344,12 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
  
         if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
                 spa_close(spa, FTAG);
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
  
         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
                 spa_close(spa, FTAG);
-               return (ENOTSUP);
+               return (SET_ERROR(ENOTSUP));
         }
  
         error = spa_history_log(spa, message);
@@ -3353,40 +3363,30 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
   *
   * This function is best-effort.  Callers must deal gracefully if it
   * remains mounted (or is remounted after this call).
+ *
+ * Returns 0 if the argument is not a snapshot, or it is not currently a
+ * filesystem, or we were able to unmount it.  Returns error code otherwise.
   */
-void
+int
  zfs_unmount_snap(const char *snapname)
  {
-       zfs_sb_t *zsb = NULL;
-       char *dsname;
-       char *fullname;
-       char *ptr;
-
-       if ((ptr = strchr(snapname, '@')) == NULL)
-               return;
+       int err;
  
-       dsname = strdup(snapname);
-       dsname[ptr - snapname] = '\0';
-       snapname = strdup(ptr + 1);
-       fullname = kmem_asprintf("%s@%s", dsname, snapname);
-       if (zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE) == 0) {
-               ASSERT(!dsl_pool_config_held(dmu_objset_pool(zsb->z_os)));
-               (void) zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE);
-               zfs_sb_rele(zsb, FTAG);
-       }
+       if (strchr(snapname, '@') == NULL)
+               return (0);
  
-       strfree(dsname);
-       strfree(fullname);
+       err = zfsctl_snapshot_unmount((char *)snapname, MNT_FORCE);
+       if (err != 0 && err != ENOENT)
+               return (SET_ERROR(err));
  
-       return;
+       return (0);
  }
  
  /* ARGSUSED */
  static int
  zfs_unmount_snap_cb(const char *snapname, void *arg)
  {
-       zfs_unmount_snap(snapname);
-       return (0);
+       return (zfs_unmount_snap(snapname));
  }
  
  /*
@@ -3409,7 +3409,7 @@ zfs_destroy_unmount_origin(const char *fsname)
                 char originname[MAXNAMELEN];
                 dsl_dataset_name(ds->ds_prev, originname);
                 dmu_objset_rele(os, FTAG);
-               zfs_unmount_snap(originname);
+               (void) zfs_unmount_snap(originname);
         } else {
                 dmu_objset_rele(os, FTAG);
         }
@@ -3423,35 +3423,121 @@ zfs_destroy_unmount_origin(const char *fsname)
   *
   * outnvl: snapshot -> error code (int32)
   */
+/* ARGSUSED */
  static int
  zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  {
-       int poollen;
         nvlist_t *snaps;
         nvpair_t *pair;
         boolean_t defer;
  
         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         defer = nvlist_exists(innvl, "defer");
  
-       poollen = strlen(poolname);
         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
             pair = nvlist_next_nvpair(snaps, pair)) {
+               (void) zfs_unmount_snap(nvpair_name(pair));
+       }
+
+       return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
+}
+
+/*
+ * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
+ * All bookmarks must be in the same pool.
+ *
+ * innvl: {
+ *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
+ * }
+ *
+ * outnvl: bookmark -> error code (int32)
+ *
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       nvpair_t *pair, *pair2;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
+               char *snap_name;
+
+               /*
+                * Verify the snapshot argument.
+                */
+               if (nvpair_value_string(pair, &snap_name) != 0)
+                       return (SET_ERROR(EINVAL));
+
+
+               /* Verify that the keys (bookmarks) are unique */
+               for (pair2 = nvlist_next_nvpair(innvl, pair);
+                   pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
+                       if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
+                               return (SET_ERROR(EINVAL));
+               }
+       }
+
+       return (dsl_bookmark_create(innvl, outnvl));
+}
+
+/*
+ * innvl: {
+ *     property 1, property 2, ...
+ * }
+ *
+ * outnvl: {
+ *     bookmark name 1 -> { property 1, property 2, ... },
+ *     bookmark name 2 -> { property 1, property 2, ... }
+ * }
+ *
+ */
+static int
+zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       return (dsl_get_bookmarks(fsname, innvl, outnvl));
+}
+
+/*
+ * innvl: {
+ *     bookmark name 1, bookmark name 2
+ * }
+ *
+ * outnvl: bookmark -> error code (int32)
+ *
+ */
+static int
+zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
+    nvlist_t *outnvl)
+{
+       int error, poollen;
+       nvpair_t *pair;
+
+       poollen = strlen(poolname);
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
                 const char *name = nvpair_name(pair);
+               const char *cp = strchr(name, '#');
  
                 /*
-                * The snap must be in the specified pool.
+                * The bookmark name must contain an #, and the part after it
+                * must contain only valid characters.
                  */
-               if (strncmp(name, poolname, poollen) != 0 ||
-                   (name[poollen] != '/' && name[poollen] != '@'))
-                       return (EXDEV);
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
+                       return (SET_ERROR(EINVAL));
  
-               zfs_unmount_snap(name);
-               (void) zvol_remove_minor(name);
+               /*
+                * The bookmark must be in the specified pool.
+                */
+               if (strncmp(name, poolname, poollen) != 0 ||
+                   (name[poollen] != '/' && name[poollen] != '#'))
+                       return (SET_ERROR(EXDEV));
         }
  
-       return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
+       error = dsl_bookmark_destroy(innvl, outnvl);
+       return (error);
  }
  
  /*
@@ -3466,42 +3552,48 @@ static int
  zfs_ioc_destroy(zfs_cmd_t *zc)
  {
         int err;
-       if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS)
-               zfs_unmount_snap(zc->zc_name);
+
+       if (zc->zc_objset_type == DMU_OST_ZFS) {
+               err = zfs_unmount_snap(zc->zc_name);
+               if (err != 0)
+                       return (err);
+       }
  
         if (strchr(zc->zc_name, '@'))
                 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
         else
                 err = dsl_destroy_head(zc->zc_name);
-       if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
-               (void) zvol_remove_minor(zc->zc_name);
+
         return (err);
  }
  
  /*
- * inputs:
- * zc_name     name of dataset to rollback (to most recent snapshot)
+ * fsname is name of dataset to rollback (to most recent snapshot)
   *
- * outputs:    none
+ * innvl is not used.
+ *
+ * outnvl: "target" -> name of most recent snapshot
+ * }
   */
+/* ARGSUSED */
  static int
-zfs_ioc_rollback(zfs_cmd_t *zc)
+zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
  {
         zfs_sb_t *zsb;
         int error;
  
-       if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
+       if (get_zfs_sb(fsname, &zsb) == 0) {
                 error = zfs_suspend_fs(zsb);
                 if (error == 0) {
                         int resume_err;
  
-                       error = dsl_dataset_rollback(zc->zc_name);
-                       resume_err = zfs_resume_fs(zsb, zc->zc_name);
+                       error = dsl_dataset_rollback(fsname, zsb, outnvl);
+                       resume_err = zfs_resume_fs(zsb, fsname);
                         error = error ? error : resume_err;
                 }
                 deactivate_super(zsb->z_sb);
         } else {
-               error = dsl_dataset_rollback(zc->zc_name);
+               error = dsl_dataset_rollback(fsname, NULL, outnvl);
         }
         return (error);
  }
@@ -3511,11 +3603,13 @@ recursive_unmount(const char *fsname, void *arg)
  {
         const char *snapname = arg;
         char *fullname;
+       int error;
  
         fullname = kmem_asprintf("%s@%s", fsname, snapname);
-       zfs_unmount_snap(fullname);
+       error = zfs_unmount_snap(fullname);
         strfree(fullname);
-       return (0);
+
+       return (error);
  }
  
  /*
@@ -3531,35 +3625,36 @@ zfs_ioc_rename(zfs_cmd_t *zc)
  {
         boolean_t recursive = zc->zc_cookie & 1;
         char *at;
-       int err;
  
         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
             strchr(zc->zc_value, '%'))
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         at = strchr(zc->zc_name, '@');
         if (at != NULL) {
                 /* snaps must be in same fs */
+               int error;
+
                 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
-                       return (EXDEV);
+                       return (SET_ERROR(EXDEV));
                 *at = '\0';
                 if (zc->zc_objset_type == DMU_OST_ZFS) {
-                       int error = dmu_objset_find(zc->zc_name,
+                       error = dmu_objset_find(zc->zc_name,
                             recursive_unmount, at + 1,
                             recursive ? DS_FIND_CHILDREN : 0);
-                       if (error != 0)
+                       if (error != 0) {
+                               *at = '@';
                                 return (error);
+                       }
                 }
-               return (dsl_dataset_rename_snapshot(zc->zc_name,
-                   at + 1, strchr(zc->zc_value, '@') + 1, recursive));
+               error = dsl_dataset_rename_snapshot(zc->zc_name,
+                   at + 1, strchr(zc->zc_value, '@') + 1, recursive);
+               *at = '@';
+
+               return (error);
         } else {
-               err = dsl_dir_rename(zc->zc_name, zc->zc_value);
-               if (!err && zc->zc_objset_type == DMU_OST_ZVOL) {
-                       (void) zvol_remove_minor(zc->zc_name);
-                       (void) zvol_create_minor(zc->zc_value);
-               }
-               return (err);
+               return (dsl_dir_rename(zc->zc_name, zc->zc_value));
         }
  }
  
@@ -3595,7 +3690,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
                         } else {
                                 /* USERUSED and GROUPUSED are read-only */
-                               return (EINVAL);
+                               return (SET_ERROR(EINVAL));
                         }
  
                         if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
@@ -3603,11 +3698,11 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                         return (0);
                 }
  
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
  
         if (issnap)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
                 /*
@@ -3630,32 +3725,29 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                  * the SPA supports it. We ignore any errors here since
                  * we'll catch them later.
                  */
-               if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
-                   nvpair_value_uint64(pair, &intval) == 0) {
+               if (nvpair_value_uint64(pair, &intval) == 0) {
                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
                             intval <= ZIO_COMPRESS_GZIP_9 &&
                             zfs_earlier_version(dsname,
                             SPA_VERSION_GZIP_COMPRESSION)) {
-                               return (ENOTSUP);
+                               return (SET_ERROR(ENOTSUP));
                         }
  
                         if (intval == ZIO_COMPRESS_ZLE &&
                             zfs_earlier_version(dsname,
                             SPA_VERSION_ZLE_COMPRESSION))
-                               return (ENOTSUP);
+                               return (SET_ERROR(ENOTSUP));
  
                         if (intval == ZIO_COMPRESS_LZ4) {
-                               zfeature_info_t *feature =
-                                   &spa_feature_table[
-                                   SPA_FEATURE_LZ4_COMPRESS];
                                 spa_t *spa;
  
                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
                                         return (err);
  
-                               if (!spa_feature_is_enabled(spa, feature)) {
+                               if (!spa_feature_is_enabled(spa,
+                                   SPA_FEATURE_LZ4_COMPRESS)) {
                                         spa_close(spa, FTAG);
-                                       return (ENOTSUP);
+                                       return (SET_ERROR(ENOTSUP));
                                 }
                                 spa_close(spa, FTAG);
                         }
@@ -3669,24 +3761,61 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                          */
                         if (zfs_is_bootfs(dsname) &&
                             !BOOTFS_COMPRESS_VALID(intval)) {
-                               return (ERANGE);
+                               return (SET_ERROR(ERANGE));
                         }
                 }
                 break;
  
         case ZFS_PROP_COPIES:
                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
-                       return (ENOTSUP);
+                       return (SET_ERROR(ENOTSUP));
                 break;
  
         case ZFS_PROP_DEDUP:
                 if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
-                       return (ENOTSUP);
+                       return (SET_ERROR(ENOTSUP));
+               break;
+
+       case ZFS_PROP_VOLBLOCKSIZE:
+       case ZFS_PROP_RECORDSIZE:
+               /* Record sizes above 128k need the feature to be enabled */
+               if (nvpair_value_uint64(pair, &intval) == 0 &&
+                   intval > SPA_OLD_MAXBLOCKSIZE) {
+                       spa_t *spa;
+
+                       /*
+                        * If this is a bootable dataset then
+                        * the we don't allow large (>128K) blocks,
+                        * because GRUB doesn't support them.
+                        */
+                       if (zfs_is_bootfs(dsname) &&
+                           intval > SPA_OLD_MAXBLOCKSIZE) {
+                               return (SET_ERROR(ERANGE));
+                       }
+
+                       /*
+                        * We don't allow setting the property above 1MB,
+                        * unless the tunable has been changed.
+                        */
+                       if (intval > zfs_max_recordsize ||
+                           intval > SPA_MAXBLOCKSIZE)
+                               return (SET_ERROR(ERANGE));
+
+                       if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+                               return (err);
+
+                       if (!spa_feature_is_enabled(spa,
+                           SPA_FEATURE_LARGE_BLOCKS)) {
+                               spa_close(spa, FTAG);
+                               return (SET_ERROR(ENOTSUP));
+                       }
+                       spa_close(spa, FTAG);
+               }
                 break;
  
         case ZFS_PROP_SHARESMB:
                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
-                       return (ENOTSUP);
+                       return (SET_ERROR(ENOTSUP));
                 break;
  
         case ZFS_PROP_ACLINHERIT:
@@ -3695,7 +3824,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
                             zfs_earlier_version(dsname,
                             SPA_VERSION_PASSTHROUGH_X))
-                               return (ENOTSUP);
+                               return (SET_ERROR(ENOTSUP));
                 }
                 break;
         default:
@@ -3705,56 +3834,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
  }
  
-/*
- * Checks for a race condition to make sure we don't increment a feature flag
- * multiple times.
- */
-static int
-zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
-{
-       spa_t *spa = dmu_tx_pool(tx)->dp_spa;
-       zfeature_info_t *feature = arg;
-
-       if (!spa_feature_is_active(spa, feature))
-               return (0);
-       else
-               return (EBUSY);
-}
-
-/*
- * The callback invoked on feature activation in the sync task caused by
- * zfs_prop_activate_feature.
- */
-static void
-zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
-{
-       spa_t *spa = dmu_tx_pool(tx)->dp_spa;
-       zfeature_info_t *feature = arg;
-
-       spa_feature_incr(spa, feature, tx);
-}
-
-/*
- * Activates a feature on a pool in response to a property setting. This
- * creates a new sync task which modifies the pool to reflect the feature
- * as being active.
- */
-static int
-zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature)
-{
-       int err;
-
-       /* EBUSY here indicates that the feature is already active */
-       err = dsl_sync_task(spa_name(spa),
-           zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
-           feature, 2);
-
-       if (err != 0 && err != EBUSY)
-               return (err);
-       else
-               return (0);
-}
-
  /*
   * Removes properties from the given props list that fail permission checks
   * needed to clear them and to restore them in case of a receive error. For each
@@ -3782,7 +3861,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
  
         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
  
-       zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
+       zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
         (void) strcpy(zc->zc_name, dataset);
         pair = nvlist_next_nvpair(props, NULL);
         while (pair != NULL) {
@@ -3926,7 +4005,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
             strchr(zc->zc_value, '@') == NULL ||
             strchr(zc->zc_value, '%'))
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         (void) strcpy(tofs, zc->zc_value);
         tosnap = strchr(tofs, '@');
@@ -3941,7 +4020,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
         fp = getf(fd);
         if (fp == NULL) {
                 nvlist_free(props);
-               return (EBADF);
+               return (SET_ERROR(EBADF));
         }
  
         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -4010,7 +4089,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
                  * Caller made zc->zc_nvlist_dst less than the minimum expected
                  * size or supplied an invalid address.
                  */
-               props_error = EINVAL;
+               props_error = SET_ERROR(EINVAL);
         }
  
         off = fp->f_offset;
@@ -4029,13 +4108,13 @@ zfs_ioc_recv(zfs_cmd_t *zc)
                          * If the suspend fails, then the recv_end will
                          * likely also fail, and clean up after itself.
                          */
-                       end_err = dmu_recv_end(&drc);
+                       end_err = dmu_recv_end(&drc, zsb);
                         if (error == 0)
                                 error = zfs_resume_fs(zsb, tofs);
                         error = error ? error : end_err;
                         deactivate_super(zsb->z_sb);
                 } else {
-                       error = dmu_recv_end(&drc);
+                       error = dmu_recv_end(&drc, NULL);
                 }
         }
  
@@ -4049,6 +4128,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
                 error = 1;
         }
  #endif
+
         /*
          * On error, restore the original props.
          */
@@ -4107,8 +4187,10 @@ out:
   * zc_fromobj  objsetid of incremental fromsnap (may be zero)
   * zc_guid     if set, estimate size of stream only.  zc_cookie is ignored.
   *             output size in zc_objset_type.
+ * zc_flags    lzc_send_flags
   *
- * outputs: none
+ * outputs:
+ * zc_objset_type      estimated size, if zc_guid is set
   */
  static int
  zfs_ioc_send(zfs_cmd_t *zc)
@@ -4116,6 +4198,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
         int error;
         offset_t off;
         boolean_t estimate = (zc->zc_guid != 0);
+       boolean_t embedok = (zc->zc_flags & 0x1);
+       boolean_t large_block_ok = (zc->zc_flags & 0x2);
  
         if (zc->zc_obj != 0) {
                 dsl_pool_t *dp;
@@ -4132,7 +4216,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
                 }
  
                 if (dsl_dir_is_clone(tosnap->ds_dir))
-                       zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj;
+                       zc->zc_fromobj =
+                           dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
                 dsl_dataset_rele(tosnap, FTAG);
                 dsl_pool_rele(dp, FTAG);
         }
@@ -4172,11 +4257,12 @@ zfs_ioc_send(zfs_cmd_t *zc)
         } else {
                 file_t *fp = getf(zc->zc_cookie);
                 if (fp == NULL)
-                       return (EBADF);
+                       return (SET_ERROR(EBADF));
  
                 off = fp->f_offset;
                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
-                   zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off);
+                   zc->zc_fromobj, embedok, large_block_ok,
+                   zc->zc_cookie, fp->f_vnode, &off);
  
                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                         fp->f_offset = off;
@@ -4230,7 +4316,7 @@ zfs_ioc_send_progress(zfs_cmd_t *zc)
         if (dsp != NULL)
                 zc->zc_cookie = *(dsp->dsa_off);
         else
-               error = ENOENT;
+               error = SET_ERROR(ENOENT);
  
         mutex_exit(&ds->ds_sendstream_lock);
         dsl_dataset_rele(ds, FTAG);
@@ -4308,7 +4394,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
         spa = spa_lookup(zc->zc_name);
         if (spa == NULL) {
                 mutex_exit(&spa_namespace_lock);
-               return (EIO);
+               return (SET_ERROR(EIO));
         }
         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
                 /* we need to let spa_open/spa_load clear the chains */
@@ -4324,7 +4410,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
                 nvlist_t *config = NULL;
  
                 if (zc->zc_nvlist_src == 0)
-                       return (EINVAL);
+                       return (SET_ERROR(EINVAL));
  
                 if ((error = get_nvlist(zc->zc_nvlist_src,
                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
@@ -4353,7 +4439,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
                 if (vd == NULL) {
                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
                         spa_close(spa, FTAG);
-                       return (ENODEV);
+                       return (SET_ERROR(ENODEV));
                 }
         }
  
@@ -4365,7 +4451,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
          * Resume any suspended I/Os.
          */
         if (zio_resume(spa) != 0)
-               error = EIO;
+               error = SET_ERROR(EIO);
  
         spa_close(spa, FTAG);
  
@@ -4442,7 +4528,7 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc)
         int error;
  
         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
         if (error != 0)
@@ -4475,7 +4561,7 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc)
         void *buf;
  
         if (bufsize <= 0)
-               return (ENOMEM);
+               return (SET_ERROR(ENOMEM));
  
         error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
         if (error != 0)
@@ -4519,8 +4605,11 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
                          * objset_phys_t).  Suspend/resume the fs will do that.
                          */
                         error = zfs_suspend_fs(zsb);
-                       if (error == 0)
+                       if (error == 0) {
+                               dmu_objset_refresh_ownership(zsb->z_os,
+                                   zsb);
                                 error = zfs_resume_fs(zsb, zc->zc_name);
+                       }
                 }
                 if (error == 0)
                         error = dmu_objset_userspace_upgrade(zsb->z_os);
@@ -4541,7 +4630,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
  static int
  zfs_ioc_share(zfs_cmd_t *zc)
  {
-       return (ENOSYS);
+       return (SET_ERROR(ENOSYS));
  }
  
  ace_t full_access[] = {
@@ -4566,8 +4655,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc)
         if (error != 0)
                 return (error);
  
-       error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
-           os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
+       error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
  
         dmu_objset_rele(os, FTAG);
         return (error);
@@ -4626,7 +4714,7 @@ zfs_ioc_diff(zfs_cmd_t *zc)
  
         fp = getf(zc->zc_cookie);
         if (fp == NULL)
-               return (EBADF);
+               return (SET_ERROR(EBADF));
  
         off = fp->f_offset;
  
@@ -4688,7 +4776,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
             zc->zc_name) != 0)) {
                 VN_RELE(vp);
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
         }
  
         dzp = VTOZ(vp);
@@ -4756,6 +4844,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
                 if ((error = get_nvlist(zc->zc_nvlist_src,
                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
                         VN_RELE(vp);
+                       VN_RELE(ZTOV(sharedir));
                         ZFS_EXIT(zsb);
                         return (error);
                 }
@@ -4778,7 +4867,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
                 break;
  
         default:
-               error = EINVAL;
+               error = SET_ERROR(EINVAL);
                 break;
         }
  
@@ -4789,7 +4878,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
  
         return (error);
  #else
-       return (ENOTSUP);
+       return (SET_ERROR(ENOTSUP));
  #endif /* HAVE_SMB_SHARE */
  }
  
@@ -4808,6 +4897,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
  static int
  zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
  {
+       nvpair_t *pair;
         nvlist_t *holds;
         int cleanup_fd = -1;
         int error;
@@ -4815,7 +4905,20 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
  
         error = nvlist_lookup_nvlist(args, "holds", &holds);
         if (error != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
+
+       /* make sure the user didn't pass us any invalid (empty) tags */
+       for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+           pair = nvlist_next_nvpair(holds, pair)) {
+               char *htag;
+
+               error = nvpair_value_string(pair, &htag);
+               if (error != 0)
+                       return (SET_ERROR(error));
+
+               if (strlen(htag) == 0)
+                       return (SET_ERROR(EINVAL));
+       }
  
         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
                 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
@@ -4859,27 +4962,17 @@ zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
  static int
  zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
  {
-       nvpair_t *pair;
-
-       /*
-        * The release may cause the snapshot to be destroyed; make sure it
-        * is not mounted.
-        */
-       for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
-           pair = nvlist_next_nvpair(holds, pair))
-               zfs_unmount_snap(nvpair_name(pair));
-
         return (dsl_dataset_user_release(holds, errlist));
  }
  
  /*
   * inputs:
   * zc_guid             flags (ZEVENT_NONBLOCK)
+ * zc_cleanup_fd       zevent file descriptor
   *
   * outputs:
   * zc_nvlist_dst       next nvlist event
   * zc_cookie           dropped events since last get
- * zc_cleanup_fd       cleanup-on-exit file descriptor
   */
  static int
  zfs_ioc_events_next(zfs_cmd_t *zc)
@@ -4931,7 +5024,29 @@ zfs_ioc_events_clear(zfs_cmd_t *zc)
         zfs_zevent_drain_all(&count);
         zc->zc_cookie = count;
  
-       return 0;
+       return (0);
+}
+
+/*
+ * inputs:
+ * zc_guid             eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
+ * zc_cleanup          zevent file descriptor
+ */
+static int
+zfs_ioc_events_seek(zfs_cmd_t *zc)
+{
+       zfs_zevent_t *ze;
+       minor_t minor;
+       int error;
+
+       error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+       if (error != 0)
+               return (error);
+
+       error = zfs_zevent_seek(ze, zc->zc_guid);
+       zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+
+       return (error);
  }
  
  /*
@@ -4995,18 +5110,26 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
         uint64_t used, comp, uncomp;
  
         if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         error = dsl_pool_hold(lastsnap, FTAG, &dp);
         if (error != 0)
                 return (error);
  
         error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
+       if (error == 0 && !new->ds_is_snapshot) {
+               dsl_dataset_rele(new, FTAG);
+               error = SET_ERROR(EINVAL);
+       }
         if (error != 0) {
                 dsl_pool_rele(dp, FTAG);
                 return (error);
         }
         error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
+       if (error == 0 && !old->ds_is_snapshot) {
+               dsl_dataset_rele(old, FTAG);
+               error = SET_ERROR(EINVAL);
+       }
         if (error != 0) {
                 dsl_dataset_rele(new, FTAG);
                 dsl_pool_rele(dp, FTAG);
@@ -5027,6 +5150,10 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
   * innvl: {
   *     "fd" -> file descriptor to write stream to (int32)
   *     (optional) "fromsnap" -> full snap name to send an incremental from
+ *     (optional) "largeblockok" -> (value ignored)
+ *         indicates that blocks > 128KB are permitted
+ *     (optional) "embedok" -> (value ignored)
+ *         presence indicates DRR_WRITE_EMBEDDED records are permitted
   * }
   *
   * outnvl is unused
@@ -5040,18 +5167,24 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
         char *fromname = NULL;
         int fd;
         file_t *fp;
+       boolean_t largeblockok;
+       boolean_t embedok;
  
         error = nvlist_lookup_int32(innvl, "fd", &fd);
         if (error != 0)
-               return (EINVAL);
+               return (SET_ERROR(EINVAL));
  
         (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
  
+       largeblockok = nvlist_exists(innvl, "largeblockok");
+       embedok = nvlist_exists(innvl, "embedok");
+
         if ((fp = getf(fd)) == NULL)
-               return (EBADF);
+               return (SET_ERROR(EBADF));
  
         off = fp->f_offset;
-       error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off);
+       error = dmu_send(snapname, fromname, embedok, largeblockok,
+           fd, fp->f_vnode, &off);
  
         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                 fp->f_offset = off;
@@ -5065,7 +5198,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
   * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
   *
   * innvl: {
- *     (optional) "fromsnap" -> full snap name to send an incremental from
+ *     (optional) "from" -> full snap or bookmark name to send an incremental
+ *                          from
   * }
   *
   * outnvl: {
@@ -5076,7 +5210,6 @@ static int
  zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
  {
         dsl_pool_t *dp;
-       dsl_dataset_t *fromsnap = NULL;
         dsl_dataset_t *tosnap;
         int error;
         char *fromname;
@@ -5092,27 +5225,55 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
                 return (error);
         }
  
-       error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+       error = nvlist_lookup_string(innvl, "from", &fromname);
         if (error == 0) {
-               error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
-               if (error != 0) {
-                       dsl_dataset_rele(tosnap, FTAG);
-                       dsl_pool_rele(dp, FTAG);
-                       return (error);
+               if (strchr(fromname, '@') != NULL) {
+                       /*
+                        * If from is a snapshot, hold it and use the more
+                        * efficient dmu_send_estimate to estimate send space
+                        * size using deadlists.
+                        */
+                       dsl_dataset_t *fromsnap;
+                       error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
+                       if (error != 0)
+                               goto out;
+                       error = dmu_send_estimate(tosnap, fromsnap, &space);
+                       dsl_dataset_rele(fromsnap, FTAG);
+               } else if (strchr(fromname, '#') != NULL) {
+                       /*
+                        * If from is a bookmark, fetch the creation TXG of the
+                        * snapshot it was created from and use that to find
+                        * blocks that were born after it.
+                        */
+                       zfs_bookmark_phys_t frombm;
+
+                       error = dsl_bookmark_lookup(dp, fromname, tosnap,
+                           &frombm);
+                       if (error != 0)
+                               goto out;
+                       error = dmu_send_estimate_from_txg(tosnap,
+                           frombm.zbm_creation_txg, &space);
+               } else {
+                       /*
+                        * from is not properly formatted as a snapshot or
+                        * bookmark
+                        */
+                       error = SET_ERROR(EINVAL);
+                       goto out;
                 }
+       } else {
+               // If estimating the size of a full send, use dmu_send_estimate
+               error = dmu_send_estimate(tosnap, NULL, &space);
         }
  
-       error = dmu_send_estimate(tosnap, fromsnap, &space);
         fnvlist_add_uint64(outnvl, "space", space);
  
-       if (fromsnap != NULL)
-               dsl_dataset_rele(fromsnap, FTAG);
+out:
         dsl_dataset_rele(tosnap, FTAG);
         dsl_pool_rele(dp, FTAG);
         return (error);
  }
  
-
  static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
  
  static void
@@ -5264,6 +5425,23 @@ zfs_ioctl_init(void)
             zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
  
+       zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
+           zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
+
+       zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
+           zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+       zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
+           zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
+           POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+       zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
+           zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
+           POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
         /* IOCTLS that use the legacy function signature */
  
         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -5313,9 +5491,9 @@ zfs_ioctl_init(void)
          * does the logging of those commands.
          */
         zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
-           zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+           zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
         zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
-           zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+           zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
  
         zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
@@ -5335,7 +5513,7 @@ zfs_ioctl_init(void)
             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
  
         zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
-           zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
+           zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
         zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
  
@@ -5375,8 +5553,6 @@ zfs_ioctl_init(void)
             zfs_secpolicy_none);
         zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
             zfs_secpolicy_destroy);
-       zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback,
-           zfs_secpolicy_rollback);
         zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
             zfs_secpolicy_rename);
         zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
@@ -5400,16 +5576,14 @@ zfs_ioctl_init(void)
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
  
         /*
-        * ZoL functions
+        * ZoL functions
          */
-       zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor,
-           zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
-       zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor,
-           zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
+       zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
+           zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
  }
  
  int
@@ -5427,9 +5601,9 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type,
         error = spa_open(name, &spa, FTAG);
         if (error == 0) {
                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
-                       error = EAGAIN;
+                       error = SET_ERROR(EAGAIN);
                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
-                       error = EROFS;
+                       error = SET_ERROR(EROFS);
                 spa_close(spa, FTAG);
         }
         return (error);
@@ -5440,20 +5614,21 @@ zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
  {
         zfsdev_state_t *zs;
  
-       ASSERT(MUTEX_HELD(&zfsdev_state_lock));
-
-       for (zs = list_head(&zfsdev_state_list); zs != NULL;
-            zs = list_next(&zfsdev_state_list, zs)) {
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
                 if (zs->zs_minor == minor) {
+                       smp_rmb();
                         switch (which) {
-                               case ZST_ONEXIT:  return (zs->zs_onexit);
-                               case ZST_ZEVENT:  return (zs->zs_zevent);
-                               case ZST_ALL:     return (zs);
+                       case ZST_ONEXIT:
+                               return (zs->zs_onexit);
+                       case ZST_ZEVENT:
+                               return (zs->zs_zevent);
+                       case ZST_ALL:
+                               return (zs);
                         }
                 }
         }
  
-       return NULL;
+       return (NULL);
  }
  
  void *
@@ -5461,20 +5636,40 @@ zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
  {
         void *ptr;
  
-       mutex_enter(&zfsdev_state_lock);
         ptr = zfsdev_get_state_impl(minor, which);
-       mutex_exit(&zfsdev_state_lock);
  
-       return ptr;
+       return (ptr);
  }
  
-minor_t
-zfsdev_getminor(struct file *filp)
+int
+zfsdev_getminor(struct file *filp, minor_t *minorp)
  {
+       zfsdev_state_t *zs, *fpd;
+
         ASSERT(filp != NULL);
-       ASSERT(filp->private_data != NULL);
+       ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
+
+       fpd = filp->private_data;
+       if (fpd == NULL)
+               return (EBADF);
+
+       mutex_enter(&zfsdev_state_lock);
  
-       return (((zfsdev_state_t *)filp->private_data)->zs_minor);
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+
+               if (zs->zs_minor == -1)
+                       continue;
+
+               if (fpd == zs) {
+                       *minorp = fpd->zs_minor;
+                       mutex_exit(&zfsdev_state_lock);
+                       return (0);
+               }
+       }
+
+       mutex_exit(&zfsdev_state_lock);
+
+       return (EBADF);
  }
  
  /*
@@ -5504,25 +5699,50 @@ zfsdev_minor_alloc(void)
  static int
  zfsdev_state_init(struct file *filp)
  {
-       zfsdev_state_t *zs;
+       zfsdev_state_t *zs, *zsprev = NULL;
         minor_t minor;
+       boolean_t newzs = B_FALSE;
  
         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
  
-        minor = zfsdev_minor_alloc();
-        if (minor == 0)
-                return (ENXIO);
+       minor = zfsdev_minor_alloc();
+       if (minor == 0)
+               return (SET_ERROR(ENXIO));
  
-       zs = kmem_zalloc( sizeof(zfsdev_state_t), KM_SLEEP);
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+               if (zs->zs_minor == -1)
+                       break;
+               zsprev = zs;
+       }
+
+       if (!zs) {
+               zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+               newzs = B_TRUE;
+       }
  
         zs->zs_file = filp;
-       zs->zs_minor = minor;
         filp->private_data = zs;
  
         zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
         zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
  
-       list_insert_tail(&zfsdev_state_list, zs);
+
+       /*
+        * In order to provide for lock-free concurrent read access
+        * to the minor list in zfsdev_get_state_impl(), new entries
+        * must be completely written before linking them into the
+        * list whereas existing entries are already linked; the last
+        * operation must be updating zs_minor (from -1 to the new
+        * value).
+        */
+       if (newzs) {
+               zs->zs_minor = minor;
+               smp_wmb();
+               zsprev->zs_next = zs;
+       } else {
+               smp_wmb();
+               zs->zs_minor = minor;
+       }
  
         return (0);
  }
@@ -5536,13 +5756,11 @@ zfsdev_state_destroy(struct file *filp)
         ASSERT(filp->private_data != NULL);
  
         zs = filp->private_data;
+       zs->zs_minor = -1;
         zfs_onexit_destroy(zs->zs_onexit);
         zfs_zevent_destroy(zs->zs_zevent);
  
-       list_remove(&zfsdev_state_list, zs);
-       kmem_free(zs, sizeof(zfsdev_state_t));
-
-       return 0;
+       return (0);
  }
  
  static int
@@ -5574,26 +5792,50 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
  {
         zfs_cmd_t *zc;
         uint_t vecnum;
-       int error, rc, len, flag = 0;
+       int error, rc, flag = 0;
         const zfs_ioc_vec_t *vec;
-       char saved_poolname[MAXNAMELEN];
+       char *saved_poolname = NULL;
         nvlist_t *innvl = NULL;
+       fstrans_cookie_t cookie;
  
         vecnum = cmd - ZFS_IOC_FIRST;
         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
-               return (-EINVAL);
+               return (-SET_ERROR(EINVAL));
         vec = &zfs_ioc_vec[vecnum];
  
-       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
+       /*
+        * The registered ioctl list may be sparse, verify that either
+        * a normal or legacy handler are registered.
+        */
+       if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
+               return (-SET_ERROR(EINVAL));
+
+       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
  
         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
         if (error != 0) {
-               error = EFAULT;
+               error = SET_ERROR(EFAULT);
                 goto out;
         }
  
         zc->zc_iflags = flag & FKIOCTL;
-       if (zc->zc_nvlist_src_size != 0) {
+       if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
+               /*
+                * Make sure the user doesn't pass in an insane value for
+                * zc_nvlist_src_size.  We have to check, since we will end
+                * up allocating that much memory inside of get_nvlist().  This
+                * prevents a nefarious user from allocating tons of kernel
+                * memory.
+                *
+                * Also, we return EINVAL instead of ENOMEM here.  The reason
+                * being that returning ENOMEM from an ioctl() has a special
+                * connotation; that the user's size value is too small and
+                * needs to be expanded to hold the nvlist.  See
+                * zcmd_expand_dst_nvlist() for details.
+                */
+               error = SET_ERROR(EINVAL);      /* User's size too big */
+
+       } else if (zc->zc_nvlist_src_size != 0) {
                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
                     zc->zc_iflags, &innvl);
                 if (error != 0)
@@ -5608,7 +5850,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
         switch (vec->zvec_namecheck) {
         case POOL_NAME:
                 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
-                       error = EINVAL;
+                       error = SET_ERROR(EINVAL);
                 else
                         error = pool_status_check(zc->zc_name,
                             vec->zvec_namecheck, vec->zvec_pool_check);
@@ -5616,7 +5858,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
  
         case DATASET_NAME:
                 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
-                       error = EINVAL;
+                       error = SET_ERROR(EINVAL);
                 else
                         error = pool_status_check(zc->zc_name,
                             vec->zvec_namecheck, vec->zvec_pool_check);
@@ -5627,16 +5869,23 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
         }
  
  
-       if (error == 0 && !(flag & FKIOCTL))
+       if (error == 0 && !(flag & FKIOCTL)) {
+               cookie = spl_fstrans_mark();
                 error = vec->zvec_secpolicy(zc, innvl, CRED());
+               spl_fstrans_unmark(cookie);
+       }
  
         if (error != 0)
                 goto out;
  
         /* legacy ioctls can modify zc_name */
-       (void) strlcpy(saved_poolname, zc->zc_name, sizeof(saved_poolname));
-       len = strcspn(saved_poolname, "/@") + 1;
-       saved_poolname[len] = '\0';
+       saved_poolname = strdup(zc->zc_name);
+       if (saved_poolname == NULL) {
+               error = SET_ERROR(ENOMEM);
+               goto out;
+       } else {
+               saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
+       }
  
         if (vec->zvec_func != NULL) {
                 nvlist_t *outnvl;
@@ -5661,7 +5910,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                 }
  
                 outnvl = fnvlist_alloc();
+               cookie = spl_fstrans_mark();
                 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
+               spl_fstrans_unmark(cookie);
  
                 if (error == 0 && vec->zvec_allow_log &&
                     spa_open(zc->zc_name, &spa, FTAG) == 0) {
@@ -5689,19 +5940,24 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
  
                 nvlist_free(outnvl);
         } else {
+               cookie = spl_fstrans_mark();
                 error = vec->zvec_legacy_func(zc);
+               spl_fstrans_unmark(cookie);
         }
  
  out:
         nvlist_free(innvl);
         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
         if (error == 0 && rc != 0)
-               error = EFAULT;
+               error = SET_ERROR(EFAULT);
         if (error == 0 && vec->zvec_allow_log) {
                 char *s = tsd_get(zfs_allow_log_key);
                 if (s != NULL)
                         strfree(s);
-               (void) tsd_set(zfs_allow_log_key, strdup(saved_poolname));
+               (void) tsd_set(zfs_allow_log_key, saved_poolname);
+       } else {
+               if (saved_poolname != NULL)
+                       strfree(saved_poolname);
         }
  
         kmem_free(zc, sizeof (zfs_cmd_t));
@@ -5712,24 +5968,24 @@ out:
  static long
  zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
  {
-        return zfsdev_ioctl(filp, cmd, arg);
+       return (zfsdev_ioctl(filp, cmd, arg));
  }
  #else
-#define zfsdev_compat_ioctl   NULL
+#define        zfsdev_compat_ioctl     NULL
  #endif
  
  static const struct file_operations zfsdev_fops = {
-       .open            = zfsdev_open,
-       .release         = zfsdev_release,
-       .unlocked_ioctl  = zfsdev_ioctl,
-       .compat_ioctl    = zfsdev_compat_ioctl,
-       .owner           = THIS_MODULE,
+       .open           = zfsdev_open,
+       .release        = zfsdev_release,
+       .unlocked_ioctl = zfsdev_ioctl,
+       .compat_ioctl   = zfsdev_compat_ioctl,
+       .owner          = THIS_MODULE,
  };
  
  static struct miscdevice zfs_misc = {
-       .minor          = MISC_DYNAMIC_MINOR,
-       .name           = ZFS_DRIVER,
-       .fops           = &zfsdev_fops,
+       .minor          = MISC_DYNAMIC_MINOR,
+       .name           = ZFS_DRIVER,
+       .fops           = &zfsdev_fops,
  };
  
  static int
@@ -5738,11 +5994,11 @@ zfs_attach(void)
         int error;
  
         mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
-       list_create(&zfsdev_state_list, sizeof (zfsdev_state_t),
-           offsetof(zfsdev_state_t, zs_next));
+       zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+       zfsdev_state_list->zs_minor = -1;
  
         error = misc_register(&zfs_misc);
-        if (error != 0) {
+       if (error != 0) {
                 printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
                 return (error);
         }
@@ -5753,14 +6009,18 @@ zfs_attach(void)
  static void
  zfs_detach(void)
  {
-       int error;
-
-       error = misc_deregister(&zfs_misc);
-       if (error != 0)
-               printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error);
+       zfsdev_state_t *zs, *zsprev = NULL;
  
+       misc_deregister(&zfs_misc);
         mutex_destroy(&zfsdev_state_lock);
-       list_destroy(&zfsdev_state_list);
+
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+               if (zsprev)
+                       kmem_free(zsprev, sizeof (zfsdev_state_t));
+               zsprev = zs;
+       }
+       if (zsprev)
+               kmem_free(zsprev, sizeof (zfsdev_state_t));
  }
  
  static void
@@ -5771,73 +6031,81 @@ zfs_allow_log_destroy(void *arg)
  }
  
  #ifdef DEBUG
-#define ZFS_DEBUG_STR  " (DEBUG mode)"
+#define        ZFS_DEBUG_STR   " (DEBUG mode)"
  #else
-#define ZFS_DEBUG_STR  ""
+#define        ZFS_DEBUG_STR   ""
  #endif
  
-int
+static int __init
  _init(void)
  {
         int error;
  
+       error = -vn_set_pwd("/");
+       if (error) {
+               printk(KERN_NOTICE
+                   "ZFS: Warning unable to set pwd to '/': %d\n", error);
+               return (error);
+       }
+
+       if ((error = -zvol_init()) != 0)
+               return (error);
+
         spa_init(FREAD | FWRITE);
         zfs_init();
  
-       if ((error = zvol_init()) != 0)
-               goto out1;
-
         zfs_ioctl_init();
  
         if ((error = zfs_attach()) != 0)
-               goto out2;
+               goto out;
  
         tsd_create(&zfs_fsyncer_key, NULL);
         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
         tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
  
         printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
-              "ZFS pool version %s, ZFS filesystem version %s\n",
-              ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
-              SPA_VERSION_STRING, ZPL_VERSION_STRING);
+           "ZFS pool version %s, ZFS filesystem version %s\n",
+           ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
+           SPA_VERSION_STRING, ZPL_VERSION_STRING);
+#ifndef CONFIG_FS_POSIX_ACL
+       printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
+#endif /* CONFIG_FS_POSIX_ACL */
  
         return (0);
  
-out2:
-       (void) zvol_fini();
-out1:
+out:
         zfs_fini();
         spa_fini();
+       (void) zvol_fini();
         printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
-              ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
-              ZFS_DEBUG_STR, error);
+           ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
+           ZFS_DEBUG_STR, error);
  
         return (error);
  }
  
-int
+static void __exit
  _fini(void)
  {
         zfs_detach();
-       zvol_fini();
         zfs_fini();
         spa_fini();
+       zvol_fini();
  
         tsd_destroy(&zfs_fsyncer_key);
         tsd_destroy(&rrw_tsd_key);
         tsd_destroy(&zfs_allow_log_key);
  
         printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
-              ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
-
-       return (0);
+           ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
  }
  
  #ifdef HAVE_SPL
-spl_module_init(_init);
-spl_module_exit(_fini);
+module_init(_init);
+module_exit(_fini);
  
  MODULE_DESCRIPTION("ZFS");
  MODULE_AUTHOR(ZFS_META_AUTHOR);
  MODULE_LICENSE(ZFS_META_LICENSE);
+MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
  #endif /* HAVE_SPL */