]> git.proxmox.com Git - mirror_zfs-debian.git/blobdiff - module/zfs/zfs_ioctl.c
Imported Upstream version 0.6.4.2
[mirror_zfs-debian.git] / module / zfs / zfs_ioctl.c
index a061978dcee8907cf8b11b2a60db9c5187f07df4..39783e1091a41995d0352600876c47e6a284752f 100644 (file)
@@ -29,6 +29,7 @@
  * Copyright (c) 201i3 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
  */
 
 /*
 
 #include <sys/dmu_send.h>
 #include <sys/dsl_destroy.h>
+#include <sys/dsl_bookmark.h>
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 
 #include "zfs_comutil.h"
 
 kmutex_t zfsdev_state_lock;
-list_t zfsdev_state_list;
+zfsdev_state_t *zfsdev_state_list;
 
 extern void zfs_init(void);
 extern void zfs_fini(void);
@@ -245,7 +247,54 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 
-static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature);
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+void
+__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
+{
+       const char *newfile;
+       size_t size = 4096;
+       char *buf = kmem_alloc(size, KM_SLEEP);
+       char *nl;
+       va_list adx;
+
+       /*
+        * Get rid of annoying prefix to filename.
+        */
+       newfile = strrchr(file, '/');
+       if (newfile != NULL) {
+               newfile = newfile + 1; /* Get rid of leading / */
+       } else {
+               newfile = file;
+       }
+
+       va_start(adx, fmt);
+       (void) vsnprintf(buf, size, fmt, adx);
+       va_end(adx);
+
+       /*
+        * Get rid of trailing newline.
+        */
+       nl = strrchr(buf, '\n');
+       if (nl != NULL)
+               *nl = '\0';
+
+       /*
+        * To get this data enable the zfs__dprintf trace point as shown:
+        *
+        * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
+        * $ echo 1 > /sys/module/zfs/parameters/zfs_flags
+        * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
+        * $ echo 0 > /sys/kernel/debug/tracing/trace
+        *
+        * # Dump the ring buffer.
+        * $ cat /sys/kernel/debug/tracing/trace
+        */
+       DTRACE_PROBE4(zfs__dprintf,
+           char *, newfile, char *, func, int, line, char *, buf);
+
+       kmem_free(buf, size);
+}
+#endif /* HAVE_DECLARE_EVENT_CLASS */
 
 static void
 history_str_free(char *buf)
@@ -261,7 +310,7 @@ history_str_get(zfs_cmd_t *zc)
        if (zc->zc_history == 0)
                return (NULL);
 
-       buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG);
+       buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
        if (copyinstr((void *)(uintptr_t)zc->zc_history,
            buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
                history_str_free(buf);
@@ -812,22 +861,9 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                return (SET_ERROR(EINVAL));
        for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
            pair = nextpair) {
-               dsl_pool_t *dp;
-               dsl_dataset_t *ds;
-
-               error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp);
-               if (error != 0)
-                       break;
                nextpair = nvlist_next_nvpair(snaps, pair);
-               error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds);
-               if (error == 0)
-                       dsl_dataset_rele(ds, FTAG);
-               dsl_pool_rele(dp, FTAG);
-
-               if (error == 0) {
-                       error = zfs_secpolicy_destroy_perms(nvpair_name(pair),
-                           cr);
-               } else if (error == ENOENT) {
+               error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
+               if (error == ENOENT) {
                        /*
                         * Ignore any snapshots that don't exist (we consider
                         * them "already destroyed").  Remove the name from the
@@ -986,6 +1022,76 @@ zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
        return (error);
 }
 
+/*
+ * Check for permission to create each snapshot in the nvlist.
+ */
+/* ARGSUSED */
+static int
+zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       int error = 0;
+       nvpair_t *pair;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
+               char *name = nvpair_name(pair);
+               char *hashp = strchr(name, '#');
+
+               if (hashp == NULL) {
+                       error = SET_ERROR(EINVAL);
+                       break;
+               }
+               *hashp = '\0';
+               error = zfs_secpolicy_write_perms(name,
+                   ZFS_DELEG_PERM_BOOKMARK, cr);
+               *hashp = '#';
+               if (error != 0)
+                       break;
+       }
+       return (error);
+}
+
+/* ARGSUSED */
+static int
+zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       nvpair_t *pair, *nextpair;
+       int error = 0;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
+           pair = nextpair) {
+               char *name = nvpair_name(pair);
+               char *hashp = strchr(name, '#');
+               nextpair = nvlist_next_nvpair(innvl, pair);
+
+               if (hashp == NULL) {
+                       error = SET_ERROR(EINVAL);
+                       break;
+               }
+
+               *hashp = '\0';
+               error = zfs_secpolicy_write_perms(name,
+                   ZFS_DELEG_PERM_DESTROY, cr);
+               *hashp = '#';
+               if (error == ENOENT) {
+                       /*
+                        * Ignore any filesystems that don't exist (we consider
+                        * their bookmarks "already destroyed").  Remove
+                        * the name from the nvl here in case the filesystem
+                        * is created between now and when we try to destroy
+                        * the bookmark (in which case we don't want to
+                        * destroy it since we haven't checked for permission).
+                        */
+                       fnvlist_remove_nvpair(innvl, pair);
+                       error = 0;
+               }
+               if (error != 0)
+                       break;
+       }
+
+       return (error);
+}
+
 /* ARGSUSED */
 static int
 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
@@ -1221,20 +1327,20 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
        if (size == 0)
                return (SET_ERROR(EINVAL));
 
-       packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
+       packed = vmem_alloc(size, KM_SLEEP);
 
        if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
            iflag)) != 0) {
-               kmem_free(packed, size);
+               vmem_free(packed, size);
                return (error);
        }
 
        if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
-               kmem_free(packed, size);
+               vmem_free(packed, size);
                return (error);
        }
 
-       kmem_free(packed, size);
+       vmem_free(packed, size);
 
        *nvp = list;
        return (0);
@@ -2336,46 +2442,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
                        zfs_cmd_t *zc;
 
-                       zc = kmem_zalloc(sizeof (zfs_cmd_t),
-                           KM_SLEEP | KM_NODEBUG);
+                       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
                        (void) strcpy(zc->zc_name, dsname);
                        (void) zfs_ioc_userspace_upgrade(zc);
                        kmem_free(zc, sizeof (zfs_cmd_t));
                }
                break;
        }
-       case ZFS_PROP_COMPRESSION:
-       {
-               if (intval == ZIO_COMPRESS_LZ4) {
-                       zfeature_info_t *feature =
-                           &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
-                       spa_t *spa;
-
-                       if ((err = spa_open(dsname, &spa, FTAG)) != 0)
-                               return (err);
-
-                       /*
-                        * Setting the LZ4 compression algorithm activates
-                        * the feature.
-                        */
-                       if (!spa_feature_is_active(spa, feature)) {
-                               if ((err = zfs_prop_activate_feature(spa,
-                                   feature)) != 0) {
-                                       spa_close(spa, FTAG);
-                                       return (err);
-                               }
-                       }
-
-                       spa_close(spa, FTAG);
-               }
-               /*
-                * We still want the default set action to be performed in the
-                * caller, we only performed zfeature settings here.
-                */
-               err = -1;
-               break;
-       }
-
        default:
                err = -1;
        }
@@ -2552,7 +2625,6 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl)
 
        while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
                const char *propname = nvpair_name(pair);
-               char *valstr;
 
                if (!zfs_prop_user(propname) ||
                    nvpair_type(pair) != DATA_TYPE_STRING)
@@ -2565,8 +2637,7 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl)
                if (strlen(propname) >= ZAP_MAXNAMELEN)
                        return (SET_ERROR(ENAMETOOLONG));
 
-               VERIFY(nvpair_value_string(pair, &valstr) == 0);
-               if (strlen(valstr) >= ZAP_MAXVALUELEN)
+               if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
                        return (SET_ERROR(E2BIG));
        }
        return (0);
@@ -3243,7 +3314,8 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
                 * The snap name must contain an @, and the part after it must
                 * contain only valid characters.
                 */
-               if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
                        return (SET_ERROR(EINVAL));
 
                /*
@@ -3397,10 +3469,10 @@ zfs_destroy_unmount_origin(const char *fsname)
  *
  * outnvl: snapshot -> error code (int32)
  */
+/* ARGSUSED */
 static int
 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-       int error, poollen;
        nvlist_t *snaps;
        nvpair_t *pair;
        boolean_t defer;
@@ -3409,25 +3481,110 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
                return (SET_ERROR(EINVAL));
        defer = nvlist_exists(innvl, "defer");
 
-       poollen = strlen(poolname);
        for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
            pair = nvlist_next_nvpair(snaps, pair)) {
+               (void) zfs_unmount_snap(nvpair_name(pair));
+               (void) zvol_remove_minor(nvpair_name(pair));
+       }
+
+       return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
+}
+
+/*
+ * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
+ * All bookmarks must be in the same pool.
+ *
+ * innvl: {
+ *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
+ * }
+ *
+ * outnvl: bookmark -> error code (int32)
+ *
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       nvpair_t *pair, *pair2;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
+               char *snap_name;
+
+               /*
+                * Verify the snapshot argument.
+                */
+               if (nvpair_value_string(pair, &snap_name) != 0)
+                       return (SET_ERROR(EINVAL));
+
+
+               /* Verify that the keys (bookmarks) are unique */
+               for (pair2 = nvlist_next_nvpair(innvl, pair);
+                   pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
+                       if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
+                               return (SET_ERROR(EINVAL));
+               }
+       }
+
+       return (dsl_bookmark_create(innvl, outnvl));
+}
+
+/*
+ * innvl: {
+ *     property 1, property 2, ...
+ * }
+ *
+ * outnvl: {
+ *     bookmark name 1 -> { property 1, property 2, ... },
+ *     bookmark name 2 -> { property 1, property 2, ... }
+ * }
+ *
+ */
+static int
+zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       return (dsl_get_bookmarks(fsname, innvl, outnvl));
+}
+
+/*
+ * innvl: {
+ *     bookmark name 1, bookmark name 2
+ * }
+ *
+ * outnvl: bookmark -> error code (int32)
+ *
+ */
+static int
+zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
+    nvlist_t *outnvl)
+{
+       int error, poollen;
+       nvpair_t *pair;
+
+       poollen = strlen(poolname);
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
                const char *name = nvpair_name(pair);
+               const char *cp = strchr(name, '#');
 
                /*
-                * The snap must be in the specified pool.
+                * The bookmark name must contain an #, and the part after it
+                * must contain only valid characters.
+                */
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
+                       return (SET_ERROR(EINVAL));
+
+               /*
+                * The bookmark must be in the specified pool.
                 */
                if (strncmp(name, poolname, poollen) != 0 ||
-                   (name[poollen] != '/' && name[poollen] != '@'))
+                   (name[poollen] != '/' && name[poollen] != '#'))
                        return (SET_ERROR(EXDEV));
-
-               error = zfs_unmount_snap(name);
-               if (error != 0)
-                       return (error);
-               (void) zvol_remove_minor(name);
        }
 
-       return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
+       error = dsl_bookmark_destroy(innvl, outnvl);
+       return (error);
 }
 
 /*
@@ -3494,11 +3651,13 @@ recursive_unmount(const char *fsname, void *arg)
 {
        const char *snapname = arg;
        char *fullname;
+       int error;
 
        fullname = kmem_asprintf("%s@%s", fsname, snapname);
-       zfs_unmount_snap(fullname);
+       error = zfs_unmount_snap(fullname);
        strfree(fullname);
-       return (zfs_unmount_snap(fullname));
+
+       return (error);
 }
 
 /*
@@ -3629,15 +3788,13 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                                return (SET_ERROR(ENOTSUP));
 
                        if (intval == ZIO_COMPRESS_LZ4) {
-                               zfeature_info_t *feature =
-                                   &spa_feature_table[
-                                   SPA_FEATURE_LZ4_COMPRESS];
                                spa_t *spa;
 
                                if ((err = spa_open(dsname, &spa, FTAG)) != 0)
                                        return (err);
 
-                               if (!spa_feature_is_enabled(spa, feature)) {
+                               if (!spa_feature_is_enabled(spa,
+                                   SPA_FEATURE_LZ4_COMPRESS)) {
                                        spa_close(spa, FTAG);
                                        return (SET_ERROR(ENOTSUP));
                                }
@@ -3689,56 +3846,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
        return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
 }
 
-/*
- * Checks for a race condition to make sure we don't increment a feature flag
- * multiple times.
- */
-static int
-zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
-{
-       spa_t *spa = dmu_tx_pool(tx)->dp_spa;
-       zfeature_info_t *feature = arg;
-
-       if (!spa_feature_is_active(spa, feature))
-               return (0);
-       else
-               return (SET_ERROR(EBUSY));
-}
-
-/*
- * The callback invoked on feature activation in the sync task caused by
- * zfs_prop_activate_feature.
- */
-static void
-zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
-{
-       spa_t *spa = dmu_tx_pool(tx)->dp_spa;
-       zfeature_info_t *feature = arg;
-
-       spa_feature_incr(spa, feature, tx);
-}
-
-/*
- * Activates a feature on a pool in response to a property setting. This
- * creates a new sync task which modifies the pool to reflect the feature
- * as being active.
- */
-static int
-zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature)
-{
-       int err;
-
-       /* EBUSY here indicates that the feature is already active */
-       err = dsl_sync_task(spa_name(spa),
-           zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
-           feature, 2);
-
-       if (err != 0 && err != EBUSY)
-               return (err);
-       else
-               return (0);
-}
-
 /*
  * Removes properties from the given props list that fail permission checks
  * needed to clear them and to restore them in case of a receive error. For each
@@ -3766,7 +3873,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
 
        VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
-       zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
+       zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
        (void) strcpy(zc->zc_name, dataset);
        pair = nvlist_next_nvpair(props, NULL);
        while (pair != NULL) {
@@ -4097,8 +4204,10 @@ out:
  * zc_fromobj  objsetid of incremental fromsnap (may be zero)
  * zc_guid     if set, estimate size of stream only.  zc_cookie is ignored.
  *             output size in zc_objset_type.
+ * zc_flags    if =1, WRITE_EMBEDDED records are permitted
  *
- * outputs: none
+ * outputs:
+ * zc_objset_type      estimated size, if zc_guid is set
  */
 static int
 zfs_ioc_send(zfs_cmd_t *zc)
@@ -4106,6 +4215,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
        int error;
        offset_t off;
        boolean_t estimate = (zc->zc_guid != 0);
+       boolean_t embedok = (zc->zc_flags & 0x1);
 
        if (zc->zc_obj != 0) {
                dsl_pool_t *dp;
@@ -4166,7 +4276,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
 
                off = fp->f_offset;
                error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
-                   zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off);
+                   zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off);
 
                if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                        fp->f_offset = off;
@@ -4858,11 +4968,11 @@ zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 /*
  * inputs:
  * zc_guid             flags (ZEVENT_NONBLOCK)
+ * zc_cleanup_fd       zevent file descriptor
  *
  * outputs:
  * zc_nvlist_dst       next nvlist event
  * zc_cookie           dropped events since last get
- * zc_cleanup_fd       cleanup-on-exit file descriptor
  */
 static int
 zfs_ioc_events_next(zfs_cmd_t *zc)
@@ -4917,6 +5027,28 @@ zfs_ioc_events_clear(zfs_cmd_t *zc)
        return (0);
 }
 
+/*
+ * inputs:
+ * zc_guid             eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
+ * zc_cleanup          zevent file descriptor
+ */
+static int
+zfs_ioc_events_seek(zfs_cmd_t *zc)
+{
+       zfs_zevent_t *ze;
+       minor_t minor;
+       int error;
+
+       error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+       if (error != 0)
+               return (error);
+
+       error = zfs_zevent_seek(ze, zc->zc_guid);
+       zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+
+       return (error);
+}
+
 /*
  * inputs:
  * zc_name             name of new filesystem or snapshot
@@ -5010,6 +5142,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
  * innvl: {
  *     "fd" -> file descriptor to write stream to (int32)
  *     (optional) "fromsnap" -> full snap name to send an incremental from
+ *     (optional) "embedok" -> (value ignored)
+ *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  * }
  *
  * outnvl is unused
@@ -5023,6 +5157,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
        char *fromname = NULL;
        int fd;
        file_t *fp;
+       boolean_t embedok;
 
        error = nvlist_lookup_int32(innvl, "fd", &fd);
        if (error != 0)
@@ -5030,11 +5165,13 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 
        (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
 
+       embedok = nvlist_exists(innvl, "embedok");
+
        if ((fp = getf(fd)) == NULL)
                return (SET_ERROR(EBADF));
 
        off = fp->f_offset;
-       error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off);
+       error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off);
 
        if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                fp->f_offset = off;
@@ -5251,6 +5388,19 @@ zfs_ioctl_init(void)
            zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
            POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
 
+       zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
+           zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+       zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
+           zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
+           POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+       zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
+           zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
+           POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
        /* IOCTLS that use the legacy function signature */
 
        zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -5300,9 +5450,9 @@ zfs_ioctl_init(void)
         * does the logging of those commands.
         */
        zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
-           zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+           zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
        zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
-           zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+           zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 
        zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
            zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
@@ -5391,6 +5541,8 @@ zfs_ioctl_init(void)
            zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
        zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
            zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
+       zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
+           zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 }
 
 int
@@ -5421,11 +5573,9 @@ zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
 {
        zfsdev_state_t *zs;
 
-       ASSERT(MUTEX_HELD(&zfsdev_state_lock));
-
-       for (zs = list_head(&zfsdev_state_list); zs != NULL;
-           zs = list_next(&zfsdev_state_list, zs)) {
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
                if (zs->zs_minor == minor) {
+                       smp_rmb();
                        switch (which) {
                        case ZST_ONEXIT:
                                return (zs->zs_onexit);
@@ -5445,9 +5595,7 @@ zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
 {
        void *ptr;
 
-       mutex_enter(&zfsdev_state_lock);
        ptr = zfsdev_get_state_impl(minor, which);
-       mutex_exit(&zfsdev_state_lock);
 
        return (ptr);
 }
@@ -5488,8 +5636,9 @@ zfsdev_minor_alloc(void)
 static int
 zfsdev_state_init(struct file *filp)
 {
-       zfsdev_state_t *zs;
+       zfsdev_state_t *zs, *zsprev = NULL;
        minor_t minor;
+       boolean_t newzs = B_FALSE;
 
        ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
@@ -5497,16 +5646,40 @@ zfsdev_state_init(struct file *filp)
        if (minor == 0)
                return (SET_ERROR(ENXIO));
 
-       zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+               if (zs->zs_minor == -1)
+                       break;
+               zsprev = zs;
+       }
+
+       if (!zs) {
+               zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+               newzs = B_TRUE;
+       }
 
        zs->zs_file = filp;
-       zs->zs_minor = minor;
        filp->private_data = zs;
 
        zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
        zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
 
-       list_insert_tail(&zfsdev_state_list, zs);
+
+       /*
+        * In order to provide for lock-free concurrent read access
+        * to the minor list in zfsdev_get_state_impl(), new entries
+        * must be completely written before linking them into the
+        * list whereas existing entries are already linked; the last
+        * operation must be updating zs_minor (from -1 to the new
+        * value).
+        */
+       if (newzs) {
+               zs->zs_minor = minor;
+               smp_wmb();
+               zsprev->zs_next = zs;
+       } else {
+               smp_wmb();
+               zs->zs_minor = minor;
+       }
 
        return (0);
 }
@@ -5520,12 +5693,10 @@ zfsdev_state_destroy(struct file *filp)
        ASSERT(filp->private_data != NULL);
 
        zs = filp->private_data;
+       zs->zs_minor = -1;
        zfs_onexit_destroy(zs->zs_onexit);
        zfs_zevent_destroy(zs->zs_zevent);
 
-       list_remove(&zfsdev_state_list, zs);
-       kmem_free(zs, sizeof (zfsdev_state_t));
-
        return (0);
 }
 
@@ -5558,10 +5729,11 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 {
        zfs_cmd_t *zc;
        uint_t vecnum;
-       int error, rc, len = 0, flag = 0;
+       int error, rc, flag = 0;
        const zfs_ioc_vec_t *vec;
        char *saved_poolname = NULL;
        nvlist_t *innvl = NULL;
+       fstrans_cookie_t cookie;
 
        vecnum = cmd - ZFS_IOC_FIRST;
        if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
@@ -5575,7 +5747,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
        if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
                return (-SET_ERROR(EINVAL));
 
-       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
+       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
 
        error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
        if (error != 0) {
@@ -5625,9 +5797,13 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                goto out;
 
        /* legacy ioctls can modify zc_name */
-       len = strcspn(zc->zc_name, "/@#") + 1;
-       saved_poolname = kmem_alloc(len, KM_SLEEP);
-       (void) strlcpy(saved_poolname, zc->zc_name, len);
+       saved_poolname = strdup(zc->zc_name);
+       if (saved_poolname == NULL) {
+               error = SET_ERROR(ENOMEM);
+               goto out;
+       } else {
+               saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
+       }
 
        if (vec->zvec_func != NULL) {
                nvlist_t *outnvl;
@@ -5651,8 +5827,10 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                        }
                }
 
-               VERIFY0(nvlist_alloc(&outnvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
+               outnvl = fnvlist_alloc();
+               cookie = spl_fstrans_mark();
                error = vec->zvec_func(zc->zc_name, innvl, outnvl);
+               spl_fstrans_unmark(cookie);
 
                if (error == 0 && vec->zvec_allow_log &&
                    spa_open(zc->zc_name, &spa, FTAG) == 0) {
@@ -5680,7 +5858,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 
                nvlist_free(outnvl);
        } else {
+               cookie = spl_fstrans_mark();
                error = vec->zvec_legacy_func(zc);
+               spl_fstrans_unmark(cookie);
        }
 
 out:
@@ -5695,7 +5875,7 @@ out:
                (void) tsd_set(zfs_allow_log_key, saved_poolname);
        } else {
                if (saved_poolname != NULL)
-                       kmem_free(saved_poolname, len);
+                       strfree(saved_poolname);
        }
 
        kmem_free(zc, sizeof (zfs_cmd_t));
@@ -5732,8 +5912,8 @@ zfs_attach(void)
        int error;
 
        mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
-       list_create(&zfsdev_state_list, sizeof (zfsdev_state_t),
-           offsetof(zfsdev_state_t, zs_next));
+       zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+       zfsdev_state_list->zs_minor = -1;
 
        error = misc_register(&zfs_misc);
        if (error != 0) {
@@ -5748,13 +5928,21 @@ static void
 zfs_detach(void)
 {
        int error;
+       zfsdev_state_t *zs, *zsprev = NULL;
 
        error = misc_deregister(&zfs_misc);
        if (error != 0)
                printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error);
 
        mutex_destroy(&zfsdev_state_lock);
-       list_destroy(&zfsdev_state_list);
+
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+               if (zsprev)
+                       kmem_free(zsprev, sizeof (zfsdev_state_t));
+               zsprev = zs;
+       }
+       if (zsprev)
+               kmem_free(zsprev, sizeof (zfsdev_state_t));
 }
 
 static void
@@ -5770,11 +5958,18 @@ zfs_allow_log_destroy(void *arg)
 #define        ZFS_DEBUG_STR   ""
 #endif
 
-int
+static int __init
 _init(void)
 {
        int error;
 
+       error = vn_set_pwd("/");
+       if (error) {
+               printk(KERN_NOTICE
+                   "ZFS: Warning unable to set pwd to '/': %d\n", error);
+               return (error);
+       }
+
        spa_init(FREAD | FWRITE);
        zfs_init();
 
@@ -5812,7 +6007,7 @@ out1:
        return (error);
 }
 
-int
+static void __exit
 _fini(void)
 {
        zfs_detach();
@@ -5826,13 +6021,11 @@ _fini(void)
 
        printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
            ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
-
-       return (0);
 }
 
 #ifdef HAVE_SPL
-spl_module_init(_init);
-spl_module_exit(_fini);
+module_init(_init);
+module_exit(_fini);
 
 MODULE_DESCRIPTION("ZFS");
 MODULE_AUTHOR(ZFS_META_AUTHOR);