Add zstd support to zfs

[mirror_zfs.git] / module / zfs / zfs_ioctl.c
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c

index b2517d84f2f9b26a52297e5a8af8a0d9249a777b..7f623bb046eafde801ede686647e6ebf41c23819 100644 (file)
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -27,7 +27,7 @@
   * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
   * Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
   * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
   * Copyright (c) 2013 Steven Hartland. All rights reserved.
   * Copyright (c) 2014 Integros [integros.com]
@@ -37,6 +37,9 @@
   * Copyright 2017 RackTop Systems.
   * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
   * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
   */
  
  /*
@@ -221,14 +224,12 @@
  kmutex_t zfsdev_state_lock;
  zfsdev_state_t *zfsdev_state_list;
  
-extern void zfs_init(void);
-extern void zfs_fini(void);
-
  /*
   * Limit maximum nvlist size.  We don't want users passing in insane values
   * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
+ * Defaults to 0=auto which is handled by platform code.
   */
-#define        MAX_NVLIST_SRC_SIZE     KMALLOC_MAX_SIZE
+unsigned long zfs_max_nvlist_src_size = 0;
  
  uint_t zfs_fsyncer_key;
  uint_t zfs_allow_log_key;
@@ -302,23 +303,6 @@ history_str_get(zfs_cmd_t *zc)
         return (buf);
  }
  
-/*
- * Check to see if the named dataset is currently defined as bootable
- */
-static boolean_t
-zfs_is_bootfs(const char *name)
-{
-       objset_t *os;
-
-       if (dmu_objset_hold(name, FTAG, &os) == 0) {
-               boolean_t ret;
-               ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
-               dmu_objset_rele(os, FTAG);
-               return (ret);
-       }
-       return (B_FALSE);
-}
-
  /*
   * Return non-zero if the spa version is less than requested version.
   */
@@ -734,13 +718,13 @@ zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
             ZFS_DELEG_PERM_SEND, cr));
  }
  
-int
+static int
  zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
         return (SET_ERROR(ENOTSUP));
  }
  
-int
+static int
  zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  {
         return (SET_ERROR(ENOTSUP));
@@ -1940,8 +1924,9 @@ static int
  zfs_ioc_vdev_attach(zfs_cmd_t *zc)
  {
         spa_t *spa;
-       int replacing = zc->zc_cookie;
         nvlist_t *config;
+       int replacing = zc->zc_cookie;
+       int rebuild = zc->zc_simple;
         int error;
  
         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
@@ -1949,7 +1934,8 @@ zfs_ioc_vdev_attach(zfs_cmd_t *zc)
  
         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
             zc->zc_iflags, &config)) == 0) {
-               error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
+               error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
+                   rebuild);
                 nvlist_free(config);
         }
  
@@ -2291,7 +2277,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
  
         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
         if (error != 0) {
-               return (error == ENOENT ? ESRCH : error);
+               return (error == ENOENT ? SET_ERROR(ESRCH) : error);
         }
  
         /*
@@ -2480,6 +2466,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
         case ZFS_PROP_REFRESERVATION:
                 err = dsl_dataset_set_refreservation(dsname, source, intval);
                 break;
+       case ZFS_PROP_COMPRESSION:
+               err = dsl_dataset_set_compression(dsname, source, intval);
+               /*
+                * Set err to -1 to force the zfs_set_prop_nvlist code down the
+                * default path to set the value in the nvlist.
+                */
+               if (err == 0)
+                       err = -1;
+               break;
         case ZFS_PROP_VOLSIZE:
                 err = zvol_set_volsize(dsname, intval);
                 break;
@@ -2503,7 +2498,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                         zfs_cmd_t *zc;
  
                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
-                       (void) strcpy(zc->zc_name, dsname);
+                       (void) strlcpy(zc->zc_name, dsname,
+                           sizeof (zc->zc_name));
                         (void) zfs_ioc_userspace_upgrade(zc);
                         (void) zfs_ioc_id_quota_upgrade(zc);
                         kmem_free(zc, sizeof (zfs_cmd_t));
@@ -2590,7 +2586,8 @@ retry:
                                 case PROP_TYPE_INDEX:
                                         if (zfs_prop_index_to_string(prop,
                                             intval, &unused) != 0)
-                                               err = SET_ERROR(EINVAL);
+                                               err =
+                                                   SET_ERROR(ZFS_ERR_BADPROP);
                                         break;
                                 default:
                                         cmn_err(CE_PANIC,
@@ -3511,6 +3508,58 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
         return (error);
  }
  
+/*
+ * This ioctl is used to set the bootenv configuration on the current
+ * pool. This configuration is stored in the second padding area of the label,
+ * and it is used by the GRUB bootloader used on Linux to store the contents
+ * of the grubenv file.  The file is stored as raw ASCII, and is protected by
+ * an embedded checksum.  By default, GRUB will check if the boot filesystem
+ * supports storing the environment data in a special location, and if so,
+ * will invoke filesystem specific logic to retrieve it. This can be overridden
+ * by a variable, should the user so desire.
+ */
+/* ARGSUSED */
+static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
+       {"envmap",      DATA_TYPE_STRING,       0},
+};
+
+static int
+zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       char *envmap;
+       int error;
+       spa_t *spa;
+
+       envmap = fnvlist_lookup_string(innvl, "envmap");
+       if ((error = spa_open(name, &spa, FTAG)) != 0)
+               return (error);
+       spa_vdev_state_enter(spa, SCL_ALL);
+       error = vdev_label_write_bootenv(spa->spa_root_vdev, envmap);
+       (void) spa_vdev_state_exit(spa, NULL, 0);
+       spa_close(spa, FTAG);
+       return (error);
+}
+
+static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
+       /* no nvl keys */
+};
+
+/* ARGSUSED */
+static int
+zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       spa_t *spa;
+       int error;
+
+       if ((error = spa_open(name, &spa, FTAG)) != 0)
+               return (error);
+       spa_vdev_state_enter(spa, SCL_ALL);
+       error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
+       (void) spa_vdev_state_exit(spa, NULL, 0);
+       spa_close(spa, FTAG);
+       return (error);
+}
+
  /*
   * The dp_config_rwlock must not be held when calling this, because the
   * unmount may need to write out data.
@@ -3614,11 +3663,13 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  }
  
  /*
- * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
- * All bookmarks must be in the same pool.
+ * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
+ * All bookmarks and snapshots must be in the same pool.
+ * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
   *
   * innvl: {
- *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
+ *     new_bookmark1 -> existing_snapshot,
+ *     new_bookmark2 -> existing_bookmark,
   * }
   *
   * outnvl: bookmark -> error code (int32)
@@ -3632,25 +3683,6 @@ static const zfs_ioc_key_t zfs_keys_bookmark[] = {
  static int
  zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
  {
-       for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
-           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
-               char *snap_name;
-
-               /*
-                * Verify the snapshot argument.
-                */
-               if (nvpair_value_string(pair, &snap_name) != 0)
-                       return (SET_ERROR(EINVAL));
-
-
-               /* Verify that the keys (bookmarks) are unique */
-               for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
-                   pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
-                       if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
-                               return (SET_ERROR(EINVAL));
-               }
-       }
-
         return (dsl_bookmark_create(innvl, outnvl));
  }
  
@@ -4088,6 +4120,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
         return (error);
  }
  
+/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ *     "wait_activity" -> int32_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
+       {ZFS_WAIT_ACTIVITY,     DATA_TYPE_INT32,                0},
+};
+
+static int
+zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       int32_t activity;
+       boolean_t waited = B_FALSE;
+       int error;
+       dsl_pool_t *dp;
+       dsl_dir_t *dd;
+       dsl_dataset_t *ds;
+
+       if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
+               return (SET_ERROR(EINVAL));
+
+       if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
+               return (SET_ERROR(EINVAL));
+
+       if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
+               return (error);
+
+       if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
+               dsl_pool_rele(dp, FTAG);
+               return (error);
+       }
+
+       dd = ds->ds_dir;
+       mutex_enter(&dd->dd_activity_lock);
+       dd->dd_activity_waiters++;
+
+       /*
+        * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
+        * aren't evicted while we're waiting. Normally this is prevented by
+        * holding the pool, but we can't do that while we're waiting since
+        * that would prevent TXGs from syncing out. Some of the functionality
+        * of long-holds (e.g. preventing deletion) is unnecessary for this
+        * case, since we would cancel the waiters before proceeding with a
+        * deletion. An alternative mechanism for keeping the dataset around
+        * could be developed but this is simpler.
+        */
+       dsl_dataset_long_hold(ds, FTAG);
+       dsl_pool_rele(dp, FTAG);
+
+       error = dsl_dir_wait(dd, ds, activity, &waited);
+
+       dsl_dataset_long_rele(ds, FTAG);
+       dd->dd_activity_waiters--;
+       if (dd->dd_activity_waiters == 0)
+               cv_signal(&dd->dd_activity_cv);
+       mutex_exit(&dd->dd_activity_lock);
+
+       dsl_dataset_rele(ds, FTAG);
+
+       if (error == 0)
+               fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
+
+       return (error);
+}
+
  /*
   * fsname is name of dataset to rollback (to most recent snapshot)
   *
@@ -4164,7 +4273,7 @@ recursive_unmount(const char *fsname, void *arg)
   * snapname is the snapshot to redact.
   * innvl: {
   *     "bookname" -> (string)
- *         name of the redaction bookmark to generate
+ *         shortname of the redaction bookmark to generate
   *     "snapnv" -> (nvlist, values ignored)
   *         snapshots to redact snapname with respect to
   * }
@@ -4257,7 +4366,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
         const char *propname = nvpair_name(pair);
         boolean_t issnap = (strchr(dsname, '@') != NULL);
         zfs_prop_t prop = zfs_name_to_prop(propname);
-       uint64_t intval;
+       uint64_t intval, compval;
         int err;
  
         if (prop == ZPROP_INVAL) {
@@ -4339,19 +4448,20 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                  * we'll catch them later.
                  */
                 if (nvpair_value_uint64(pair, &intval) == 0) {
-                       if (intval >= ZIO_COMPRESS_GZIP_1 &&
-                           intval <= ZIO_COMPRESS_GZIP_9 &&
+                       compval = ZIO_COMPRESS_ALGO(intval);
+                       if (compval >= ZIO_COMPRESS_GZIP_1 &&
+                           compval <= ZIO_COMPRESS_GZIP_9 &&
                             zfs_earlier_version(dsname,
                             SPA_VERSION_GZIP_COMPRESSION)) {
                                 return (SET_ERROR(ENOTSUP));
                         }
  
-                       if (intval == ZIO_COMPRESS_ZLE &&
+                       if (compval == ZIO_COMPRESS_ZLE &&
                             zfs_earlier_version(dsname,
                             SPA_VERSION_ZLE_COMPRESSION))
                                 return (SET_ERROR(ENOTSUP));
  
-                       if (intval == ZIO_COMPRESS_LZ4) {
+                       if (compval == ZIO_COMPRESS_LZ4) {
                                 spa_t *spa;
  
                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
@@ -4365,16 +4475,18 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                                 spa_close(spa, FTAG);
                         }
  
-                       /*
-                        * If this is a bootable dataset then
-                        * verify that the compression algorithm
-                        * is supported for booting. We must return
-                        * something other than ENOTSUP since it
-                        * implies a downrev pool version.
-                        */
-                       if (zfs_is_bootfs(dsname) &&
-                           !BOOTFS_COMPRESS_VALID(intval)) {
-                               return (SET_ERROR(ERANGE));
+                       if (compval == ZIO_COMPRESS_ZSTD) {
+                               spa_t *spa;
+
+                               if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+                                       return (err);
+
+                               if (!spa_feature_is_enabled(spa,
+                                   SPA_FEATURE_ZSTD_COMPRESS)) {
+                                       spa_close(spa, FTAG);
+                                       return (SET_ERROR(ENOTSUP));
+                               }
+                               spa_close(spa, FTAG);
                         }
                 }
                 break;
@@ -4417,16 +4529,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                     intval != ZFS_DNSIZE_LEGACY) {
                         spa_t *spa;
  
-                       /*
-                        * If this is a bootable dataset then
-                        * we don't allow large (>512B) dnodes,
-                        * because GRUB doesn't support them.
-                        */
-                       if (zfs_is_bootfs(dsname) &&
-                           intval != ZFS_DNSIZE_LEGACY) {
-                               return (SET_ERROR(EDOM));
-                       }
-
                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
                                 return (err);
  
@@ -4693,7 +4795,7 @@ zfs_allow_log_destroy(void *arg)
                 kmem_strfree(poolname);
  }
  
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
  static boolean_t zfs_ioc_recv_inject_err;
  #endif
  
@@ -4704,9 +4806,9 @@ static boolean_t zfs_ioc_recv_inject_err;
  static int
  zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
      nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
-    boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record,
-    int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
-    uint64_t *action_handle, nvlist_t **errors)
+    boolean_t resumable, int input_fd,
+    dmu_replay_record_t *begin_record, uint64_t *read_bytes,
+    uint64_t *errflags, nvlist_t **errors)
  {
         dmu_recv_cookie_t drc;
         int error = 0;
@@ -4835,7 +4937,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
                 nvlist_free(xprops);
         }
  
-       error = dmu_recv_stream(&drc, cleanup_fd, action_handle, &off);
+       error = dmu_recv_stream(&drc, &off);
  
         if (error == 0) {
                 zfsvfs_t *zfsvfs = NULL;
@@ -4906,7 +5008,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
         }
         *read_bytes = off - noff;
  
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
         if (zfs_ioc_recv_inject_err) {
                 zfs_ioc_recv_inject_err = B_FALSE;
                 error = 1;
@@ -5027,13 +5129,10 @@ out:
   * zc_cookie           file descriptor to recv from
   * zc_begin_record     the BEGIN record of the stream (not byteswapped)
   * zc_guid             force flag
- * zc_cleanup_fd       cleanup-on-exit file descriptor
- * zc_action_handle    handle for this guid/ds mapping (or zero on first call)
   *
   * outputs:
   * zc_cookie           number of bytes read
   * zc_obj              zprop_errflags_t
- * zc_action_handle    handle for this guid/ds mapping
   * zc_nvlist_dst{_size} error for each unapplied received property
   */
  static int
@@ -5076,8 +5175,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
  
         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
             NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
-           zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
-           &zc->zc_action_handle, &errors);
+           &zc->zc_cookie, &zc->zc_obj, &errors);
         nvlist_free(recvdprops);
         nvlist_free(localprops);
  
@@ -5110,15 +5208,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
   *     "input_fd" -> file descriptor to read stream from (int32)
   *     (optional) "force" -> force flag (value ignored)
   *     (optional) "resumable" -> resumable flag (value ignored)
- *     (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
- *     (optional) "action_handle" -> handle for this guid/ds mapping
+ *     (optional) "cleanup_fd" -> unused
+ *     (optional) "action_handle" -> unused
   *     (optional) "hidden_args" -> { "wkeydata" -> value }
   * }
   *
   * outnvl: {
   *     "read_bytes" -> number of bytes read
   *     "error_flags" -> zprop_errflags_t
- *     "action_handle" -> handle for this guid/ds mapping
   *     "errors" -> error for each unapplied received property (nvlist)
   * }
   */
@@ -5151,11 +5248,9 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         char tofs[ZFS_MAX_DATASET_NAME_LEN];
         boolean_t force;
         boolean_t resumable;
-       uint64_t action_handle = 0;
         uint64_t read_bytes = 0;
         uint64_t errflags = 0;
         int input_fd = -1;
-       int cleanup_fd = -1;
         int error;
  
         snapname = fnvlist_lookup_string(innvl, "snapname");
@@ -5165,7 +5260,7 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
             strchr(snapname, '%'))
                 return (SET_ERROR(EINVAL));
  
-       (void) strcpy(tofs, snapname);
+       (void) strlcpy(tofs, snapname, sizeof (tofs));
         tosnap = strchr(tofs, '@');
         *tosnap++ = '\0';
  
@@ -5183,14 +5278,6 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
         force = nvlist_exists(innvl, "force");
         resumable = nvlist_exists(innvl, "resumable");
  
-       error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
-       if (error && error != ENOENT)
-               return (error);
-
-       error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
-       if (error && error != ENOENT)
-               return (error);
-
         /* we still use "props" here for backwards compatibility */
         error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
         if (error && error != ENOENT)
@@ -5205,12 +5292,11 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
                 return (error);
  
         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
-           hidden_args, force, resumable, input_fd, begin_record, cleanup_fd,
-           &read_bytes, &errflags, &action_handle, &errors);
+           hidden_args, force, resumable, input_fd, begin_record,
+           &read_bytes, &errflags, &errors);
  
         fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
         fnvlist_add_uint64(outnvl, "error_flags", errflags);
-       fnvlist_add_uint64(outnvl, "action_handle", action_handle);
         fnvlist_add_nvlist(outnvl, "errors", errors);
  
         nvlist_free(errors);
@@ -5407,7 +5493,7 @@ zfs_ioc_send_progress(zfs_cmd_t *zc)
         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
             dsp = list_next(&ds->ds_sendstreams, dsp)) {
                 if (dsp->dss_outfd == zc->zc_cookie &&
-                   dsp->dss_proc == curproc)
+                   zfs_proc_is_caller(dsp->dss_proc))
                         break;
         }
  
@@ -5546,9 +5632,10 @@ zfs_ioc_clear(zfs_cmd_t *zc)
         } else {
                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
                 if (vd == NULL) {
-                       (void) spa_vdev_state_exit(spa, NULL, ENODEV);
+                       error = SET_ERROR(ENODEV);
+                       (void) spa_vdev_state_exit(spa, NULL, error);
                         spa_close(spa, FTAG);
-                       return (SET_ERROR(ENODEV));
+                       return (error);
                 }
         }
  
@@ -6322,7 +6409,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
  }
  
  /* ARGSUSED */
-int
+static int
  send_space_sum(objset_t *os, void *buf, int len, void *arg)
  {
         uint64_t *size = arg;
@@ -6777,7 +6864,7 @@ zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
  }
  
-void
+static void
  zfs_ioctl_init(void)
  {
         zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
@@ -6930,6 +7017,21 @@ zfs_ioctl_init(void)
             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
             zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
  
+       zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
+           zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+           zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
+
+       zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
+           zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
+           zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
+
+       zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
+           zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
+           POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
+           zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
+
         /* IOCTLS that use the legacy function signature */
  
         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -7145,7 +7247,7 @@ zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
         return (0);
  }
  
-int
+static int
  pool_status_check(const char *name, zfs_ioc_namecheck_t type,
      zfs_ioc_poolcheck_t check)
  {
@@ -7169,6 +7271,41 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type,
         return (error);
  }
  
+int
+zfsdev_getminor(int fd, minor_t *minorp)
+{
+       zfsdev_state_t *zs, *fpd;
+       zfs_file_t *fp;
+       int rc;
+
+       ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
+
+       if ((rc = zfs_file_get(fd, &fp)))
+               return (rc);
+
+       fpd = zfs_file_private(fp);
+       if (fpd == NULL)
+               return (SET_ERROR(EBADF));
+
+       mutex_enter(&zfsdev_state_lock);
+
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+
+               if (zs->zs_minor == -1)
+                       continue;
+
+               if (fpd == zs) {
+                       *minorp = fpd->zs_minor;
+                       mutex_exit(&zfsdev_state_lock);
+                       return (0);
+               }
+       }
+
+       mutex_exit(&zfsdev_state_lock);
+
+       return (SET_ERROR(EBADF));
+}
+
  static void *
  zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
  {
@@ -7226,11 +7363,13 @@ zfsdev_minor_alloc(void)
  }
  
  long
-zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc)
+zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
  {
-       int error, cmd, flag = 0;
+       int error, cmd;
         const zfs_ioc_vec_t *vec;
         char *saved_poolname = NULL;
+       uint64_t max_nvlist_src_size;
+       size_t saved_poolname_len = 0;
         nvlist_t *innvl = NULL;
         fstrans_cookie_t cookie;
  
@@ -7249,7 +7388,8 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc)
                 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
  
         zc->zc_iflags = flag & FKIOCTL;
-       if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
+       max_nvlist_src_size = zfs_max_nvlist_src_size_os();
+       if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
                 /*
                  * Make sure the user doesn't pass in an insane value for
                  * zc_nvlist_src_size.  We have to check, since we will end
@@ -7330,13 +7470,15 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc)
                 goto out;
  
         /* legacy ioctls can modify zc_name */
-       saved_poolname = kmem_strdup(zc->zc_name);
-       if (saved_poolname == NULL) {
-               error = SET_ERROR(ENOMEM);
-               goto out;
-       } else {
-               saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
-       }
+       /*
+        * Can't use kmem_strdup() as we might truncate the string and
+        * kmem_strfree() would then free with incorrect size.
+        */
+       saved_poolname_len = strlen(zc->zc_name) + 1;
+       saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
+
+       strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
+       saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
  
         if (vec->zvec_func != NULL) {
                 nvlist_t *outnvl;
@@ -7413,11 +7555,11 @@ out:
                 char *s = tsd_get(zfs_allow_log_key);
                 if (s != NULL)
                         kmem_strfree(s);
-               (void) tsd_set(zfs_allow_log_key, saved_poolname);
-       } else {
-               if (saved_poolname != NULL)
-                       kmem_strfree(saved_poolname);
+               (void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
         }
+       if (saved_poolname != NULL)
+               kmem_free(saved_poolname, saved_poolname_len);
+
         return (error);
  }
  
@@ -7457,19 +7599,20 @@ out:
  void
  zfs_kmod_fini(void)
  {
-       zfsdev_state_t *zs, *zsprev = NULL;
+       zfsdev_state_t *zs, *zsnext = NULL;
  
         zfsdev_detach();
  
         mutex_destroy(&zfsdev_state_lock);
  
-       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
-               if (zsprev)
-                       kmem_free(zsprev, sizeof (zfsdev_state_t));
-               zsprev = zs;
+       for (zs = zfsdev_state_list; zs != NULL; zs = zsnext) {
+               zsnext = zs->zs_next;
+               if (zs->zs_onexit)
+                       zfs_onexit_destroy(zs->zs_onexit);
+               if (zs->zs_zevent)
+                       zfs_zevent_destroy(zs->zs_zevent);
+               kmem_free(zs, sizeof (zfsdev_state_t));
         }
-       if (zsprev)
-               kmem_free(zsprev, sizeof (zfsdev_state_t));
  
         zfs_fini();
         spa_fini();
@@ -7479,3 +7622,8 @@ zfs_kmod_fini(void)
         tsd_destroy(&rrw_tsd_key);
         tsd_destroy(&zfs_allow_log_key);
  }
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
+    "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
+/* END CSTYLED */