]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/zfs_ioctl.c
Retire KM_NODEBUG
[mirror_zfs.git] / module / zfs / zfs_ioctl.c
index 9b084632feadb04885a77ed737d81caa78d69713..7d443ecf185460b8a4f11e55a57aa3f679b36c61 100644 (file)
@@ -29,6 +29,7 @@
  * Copyright (c) 201i3 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
  */
 
 /*
 
 #include <sys/dmu_send.h>
 #include <sys/dsl_destroy.h>
+#include <sys/dsl_bookmark.h>
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 
 #include <linux/miscdevice.h>
+#include <linux/module_compat.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 
 kmutex_t zfsdev_state_lock;
-list_t zfsdev_state_list;
+zfsdev_state_t *zfsdev_state_list;
 
 extern void zfs_init(void);
 extern void zfs_fini(void);
@@ -245,7 +248,54 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 
-static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature);
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+void
+__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
+{
+       const char *newfile;
+       size_t size = 4096;
+       char *buf = kmem_alloc(size, KM_PUSHPAGE);
+       char *nl;
+       va_list adx;
+
+       /*
+        * Get rid of annoying prefix to filename.
+        */
+       newfile = strrchr(file, '/');
+       if (newfile != NULL) {
+               newfile = newfile + 1; /* Get rid of leading / */
+       } else {
+               newfile = file;
+       }
+
+       va_start(adx, fmt);
+       (void) vsnprintf(buf, size, fmt, adx);
+       va_end(adx);
+
+       /*
+        * Get rid of trailing newline.
+        */
+       nl = strrchr(buf, '\n');
+       if (nl != NULL)
+               *nl = '\0';
+
+       /*
+        * To get this data enable the zfs__dprintf trace point as shown:
+        *
+        * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
+        * $ echo 1 > /sys/module/zfs/parameters/zfs_flags
+        * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
+        * $ echo 0 > /sys/kernel/debug/tracing/trace
+        *
+        * # Dump the ring buffer.
+        * $ cat /sys/kernel/debug/tracing/trace
+        */
+       DTRACE_PROBE4(zfs__dprintf,
+           char *, newfile, char *, func, int, line, char *, buf);
+
+       kmem_free(buf, size);
+}
+#endif /* HAVE_DECLARE_EVENT_CLASS */
 
 static void
 history_str_free(char *buf)
@@ -261,7 +311,7 @@ history_str_get(zfs_cmd_t *zc)
        if (zc->zc_history == 0)
                return (NULL);
 
-       buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG);
+       buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
        if (copyinstr((void *)(uintptr_t)zc->zc_history,
            buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
                history_str_free(buf);
@@ -567,7 +617,7 @@ out_check:
                return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
        return (0);
 #else
-       return ENOTSUP;
+       return (ENOTSUP);
 #endif /* HAVE_MLSLABEL */
 }
 
@@ -812,22 +862,9 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
                return (SET_ERROR(EINVAL));
        for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
            pair = nextpair) {
-               dsl_pool_t *dp;
-               dsl_dataset_t *ds;
-
-               error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp);
-               if (error != 0)
-                       break;
                nextpair = nvlist_next_nvpair(snaps, pair);
-               error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds);
-               if (error == 0)
-                       dsl_dataset_rele(ds, FTAG);
-               dsl_pool_rele(dp, FTAG);
-
-               if (error == 0) {
-                       error = zfs_secpolicy_destroy_perms(nvpair_name(pair),
-                           cr);
-               } else if (error == ENOENT) {
+               error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
+               if (error == ENOENT) {
                        /*
                         * Ignore any snapshots that don't exist (we consider
                         * them "already destroyed").  Remove the name from the
@@ -986,6 +1023,76 @@ zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
        return (error);
 }
 
+/*
+ * Check for permission to create each snapshot in the nvlist.
+ */
+/* ARGSUSED */
+static int
+zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       int error = 0;
+       nvpair_t *pair;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
+               char *name = nvpair_name(pair);
+               char *hashp = strchr(name, '#');
+
+               if (hashp == NULL) {
+                       error = SET_ERROR(EINVAL);
+                       break;
+               }
+               *hashp = '\0';
+               error = zfs_secpolicy_write_perms(name,
+                   ZFS_DELEG_PERM_BOOKMARK, cr);
+               *hashp = '#';
+               if (error != 0)
+                       break;
+       }
+       return (error);
+}
+
+/* ARGSUSED */
+static int
+zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+       nvpair_t *pair, *nextpair;
+       int error = 0;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
+           pair = nextpair) {
+               char *name = nvpair_name(pair);
+               char *hashp = strchr(name, '#');
+               nextpair = nvlist_next_nvpair(innvl, pair);
+
+               if (hashp == NULL) {
+                       error = SET_ERROR(EINVAL);
+                       break;
+               }
+
+               *hashp = '\0';
+               error = zfs_secpolicy_write_perms(name,
+                   ZFS_DELEG_PERM_DESTROY, cr);
+               *hashp = '#';
+               if (error == ENOENT) {
+                       /*
+                        * Ignore any filesystems that don't exist (we consider
+                        * their bookmarks "already destroyed").  Remove
+                        * the name from the nvl here in case the filesystem
+                        * is created between now and when we try to destroy
+                        * the bookmark (in which case we don't want to
+                        * destroy it since we haven't checked for permission).
+                        */
+                       fnvlist_remove_nvpair(innvl, pair);
+                       error = 0;
+               }
+               if (error != 0)
+                       break;
+       }
+
+       return (error);
+}
+
 /* ARGSUSED */
 static int
 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
@@ -1221,7 +1328,7 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
        if (size == 0)
                return (SET_ERROR(EINVAL));
 
-       packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
+       packed = kmem_alloc(size, KM_SLEEP);
 
        if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
            iflag)) != 0) {
@@ -2336,46 +2443,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
                if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
                        zfs_cmd_t *zc;
 
-                       zc = kmem_zalloc(sizeof (zfs_cmd_t),
-                           KM_SLEEP | KM_NODEBUG);
+                       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
                        (void) strcpy(zc->zc_name, dsname);
                        (void) zfs_ioc_userspace_upgrade(zc);
                        kmem_free(zc, sizeof (zfs_cmd_t));
                }
                break;
        }
-       case ZFS_PROP_COMPRESSION:
-       {
-               if (intval == ZIO_COMPRESS_LZ4) {
-                       zfeature_info_t *feature =
-                           &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
-                       spa_t *spa;
-
-                       if ((err = spa_open(dsname, &spa, FTAG)) != 0)
-                               return (err);
-
-                       /*
-                        * Setting the LZ4 compression algorithm activates
-                        * the feature.
-                        */
-                       if (!spa_feature_is_active(spa, feature)) {
-                               if ((err = zfs_prop_activate_feature(spa,
-                                   feature)) != 0) {
-                                       spa_close(spa, FTAG);
-                                       return (err);
-                               }
-                       }
-
-                       spa_close(spa, FTAG);
-               }
-               /*
-                * We still want the default set action to be performed in the
-                * caller, we only performed zfeature settings here.
-                */
-               err = -1;
-               break;
-       }
-
        default:
                err = -1;
        }
@@ -2552,7 +2626,6 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl)
 
        while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
                const char *propname = nvpair_name(pair);
-               char *valstr;
 
                if (!zfs_prop_user(propname) ||
                    nvpair_type(pair) != DATA_TYPE_STRING)
@@ -2565,8 +2638,7 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl)
                if (strlen(propname) >= ZAP_MAXNAMELEN)
                        return (SET_ERROR(ENAMETOOLONG));
 
-               VERIFY(nvpair_value_string(pair, &valstr) == 0);
-               if (strlen(valstr) >= ZAP_MAXVALUELEN)
+               if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
                        return (SET_ERROR(E2BIG));
        }
        return (0);
@@ -3243,7 +3315,8 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
                 * The snap name must contain an @, and the part after it must
                 * contain only valid characters.
                 */
-               if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
                        return (SET_ERROR(EINVAL));
 
                /*
@@ -3397,10 +3470,10 @@ zfs_destroy_unmount_origin(const char *fsname)
  *
  * outnvl: snapshot -> error code (int32)
  */
+/* ARGSUSED */
 static int
 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-       int error, poollen;
        nvlist_t *snaps;
        nvpair_t *pair;
        boolean_t defer;
@@ -3409,25 +3482,110 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
                return (SET_ERROR(EINVAL));
        defer = nvlist_exists(innvl, "defer");
 
-       poollen = strlen(poolname);
        for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
            pair = nvlist_next_nvpair(snaps, pair)) {
+               (void) zfs_unmount_snap(nvpair_name(pair));
+               (void) zvol_remove_minor(nvpair_name(pair));
+       }
+
+       return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
+}
+
+/*
+ * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
+ * All bookmarks must be in the same pool.
+ *
+ * innvl: {
+ *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
+ * }
+ *
+ * outnvl: bookmark -> error code (int32)
+ *
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       nvpair_t *pair, *pair2;
+
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
+               char *snap_name;
+
+               /*
+                * Verify the snapshot argument.
+                */
+               if (nvpair_value_string(pair, &snap_name) != 0)
+                       return (SET_ERROR(EINVAL));
+
+
+               /* Verify that the keys (bookmarks) are unique */
+               for (pair2 = nvlist_next_nvpair(innvl, pair);
+                   pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
+                       if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
+                               return (SET_ERROR(EINVAL));
+               }
+       }
+
+       return (dsl_bookmark_create(innvl, outnvl));
+}
+
+/*
+ * innvl: {
+ *     property 1, property 2, ...
+ * }
+ *
+ * outnvl: {
+ *     bookmark name 1 -> { property 1, property 2, ... },
+ *     bookmark name 2 -> { property 1, property 2, ... }
+ * }
+ *
+ */
+static int
+zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       return (dsl_get_bookmarks(fsname, innvl, outnvl));
+}
+
+/*
+ * innvl: {
+ *     bookmark name 1, bookmark name 2
+ * }
+ *
+ * outnvl: bookmark -> error code (int32)
+ *
+ */
+static int
+zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
+    nvlist_t *outnvl)
+{
+       int error, poollen;
+       nvpair_t *pair;
+
+       poollen = strlen(poolname);
+       for (pair = nvlist_next_nvpair(innvl, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
                const char *name = nvpair_name(pair);
+               const char *cp = strchr(name, '#');
 
                /*
-                * The snap must be in the specified pool.
+                * The bookmark name must contain an #, and the part after it
+                * must contain only valid characters.
+                */
+               if (cp == NULL ||
+                   zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
+                       return (SET_ERROR(EINVAL));
+
+               /*
+                * The bookmark must be in the specified pool.
                 */
                if (strncmp(name, poolname, poollen) != 0 ||
-                   (name[poollen] != '/' && name[poollen] != '@'))
+                   (name[poollen] != '/' && name[poollen] != '#'))
                        return (SET_ERROR(EXDEV));
-
-               error = zfs_unmount_snap(name);
-               if (error != 0)
-                       return (error);
-               (void) zvol_remove_minor(name);
        }
 
-       return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
+       error = dsl_bookmark_destroy(innvl, outnvl);
+       return (error);
 }
 
 /*
@@ -3494,11 +3652,13 @@ recursive_unmount(const char *fsname, void *arg)
 {
        const char *snapname = arg;
        char *fullname;
+       int error;
 
        fullname = kmem_asprintf("%s@%s", fsname, snapname);
-       zfs_unmount_snap(fullname);
+       error = zfs_unmount_snap(fullname);
        strfree(fullname);
-       return (zfs_unmount_snap(fullname));
+
+       return (error);
 }
 
 /*
@@ -3629,15 +3789,13 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                                return (SET_ERROR(ENOTSUP));
 
                        if (intval == ZIO_COMPRESS_LZ4) {
-                               zfeature_info_t *feature =
-                                   &spa_feature_table[
-                                   SPA_FEATURE_LZ4_COMPRESS];
                                spa_t *spa;
 
                                if ((err = spa_open(dsname, &spa, FTAG)) != 0)
                                        return (err);
 
-                               if (!spa_feature_is_enabled(spa, feature)) {
+                               if (!spa_feature_is_enabled(spa,
+                                   SPA_FEATURE_LZ4_COMPRESS)) {
                                        spa_close(spa, FTAG);
                                        return (SET_ERROR(ENOTSUP));
                                }
@@ -3689,56 +3847,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
        return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
 }
 
-/*
- * Checks for a race condition to make sure we don't increment a feature flag
- * multiple times.
- */
-static int
-zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
-{
-       spa_t *spa = dmu_tx_pool(tx)->dp_spa;
-       zfeature_info_t *feature = arg;
-
-       if (!spa_feature_is_active(spa, feature))
-               return (0);
-       else
-               return (SET_ERROR(EBUSY));
-}
-
-/*
- * The callback invoked on feature activation in the sync task caused by
- * zfs_prop_activate_feature.
- */
-static void
-zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
-{
-       spa_t *spa = dmu_tx_pool(tx)->dp_spa;
-       zfeature_info_t *feature = arg;
-
-       spa_feature_incr(spa, feature, tx);
-}
-
-/*
- * Activates a feature on a pool in response to a property setting. This
- * creates a new sync task which modifies the pool to reflect the feature
- * as being active.
- */
-static int
-zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature)
-{
-       int err;
-
-       /* EBUSY here indicates that the feature is already active */
-       err = dsl_sync_task(spa_name(spa),
-           zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
-           feature, 2);
-
-       if (err != 0 && err != EBUSY)
-               return (err);
-       else
-               return (0);
-}
-
 /*
  * Removes properties from the given props list that fail permission checks
  * needed to clear them and to restore them in case of a receive error. For each
@@ -3766,7 +3874,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
 
        VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
-       zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
+       zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
        (void) strcpy(zc->zc_name, dataset);
        pair = nvlist_next_nvpair(props, NULL);
        while (pair != NULL) {
@@ -4097,8 +4205,10 @@ out:
  * zc_fromobj  objsetid of incremental fromsnap (may be zero)
  * zc_guid     if set, estimate size of stream only.  zc_cookie is ignored.
  *             output size in zc_objset_type.
+ * zc_flags    if =1, WRITE_EMBEDDED records are permitted
  *
- * outputs: none
+ * outputs:
+ * zc_objset_type      estimated size, if zc_guid is set
  */
 static int
 zfs_ioc_send(zfs_cmd_t *zc)
@@ -4106,6 +4216,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
        int error;
        offset_t off;
        boolean_t estimate = (zc->zc_guid != 0);
+       boolean_t embedok = (zc->zc_flags & 0x1);
 
        if (zc->zc_obj != 0) {
                dsl_pool_t *dp;
@@ -4166,7 +4277,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
 
                off = fp->f_offset;
                error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
-                   zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off);
+                   zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off);
 
                if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                        fp->f_offset = off;
@@ -4858,11 +4969,11 @@ zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 /*
  * inputs:
  * zc_guid             flags (ZEVENT_NONBLOCK)
+ * zc_cleanup_fd       zevent file descriptor
  *
  * outputs:
  * zc_nvlist_dst       next nvlist event
  * zc_cookie           dropped events since last get
- * zc_cleanup_fd       cleanup-on-exit file descriptor
  */
 static int
 zfs_ioc_events_next(zfs_cmd_t *zc)
@@ -4914,7 +5025,29 @@ zfs_ioc_events_clear(zfs_cmd_t *zc)
        zfs_zevent_drain_all(&count);
        zc->zc_cookie = count;
 
-       return 0;
+       return (0);
+}
+
+/*
+ * inputs:
+ * zc_guid             eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
+ * zc_cleanup          zevent file descriptor
+ */
+static int
+zfs_ioc_events_seek(zfs_cmd_t *zc)
+{
+       zfs_zevent_t *ze;
+       minor_t minor;
+       int error;
+
+       error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+       if (error != 0)
+               return (error);
+
+       error = zfs_zevent_seek(ze, zc->zc_guid);
+       zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+
+       return (error);
 }
 
 /*
@@ -5010,6 +5143,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
  * innvl: {
  *     "fd" -> file descriptor to write stream to (int32)
  *     (optional) "fromsnap" -> full snap name to send an incremental from
+ *     (optional) "embedok" -> (value ignored)
+ *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  * }
  *
  * outnvl is unused
@@ -5023,6 +5158,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
        char *fromname = NULL;
        int fd;
        file_t *fp;
+       boolean_t embedok;
 
        error = nvlist_lookup_int32(innvl, "fd", &fd);
        if (error != 0)
@@ -5030,11 +5166,13 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 
        (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
 
+       embedok = nvlist_exists(innvl, "embedok");
+
        if ((fp = getf(fd)) == NULL)
                return (SET_ERROR(EBADF));
 
        off = fp->f_offset;
-       error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off);
+       error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off);
 
        if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
                fp->f_offset = off;
@@ -5251,6 +5389,19 @@ zfs_ioctl_init(void)
            zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
            POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
 
+       zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
+           zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+       zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
+           zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
+           POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+       zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
+           zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
+           POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
        /* IOCTLS that use the legacy function signature */
 
        zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -5391,6 +5542,8 @@ zfs_ioctl_init(void)
            zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
        zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
            zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
+       zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
+           zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 }
 
 int
@@ -5421,20 +5574,21 @@ zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
 {
        zfsdev_state_t *zs;
 
-       ASSERT(MUTEX_HELD(&zfsdev_state_lock));
-
-       for (zs = list_head(&zfsdev_state_list); zs != NULL;
-            zs = list_next(&zfsdev_state_list, zs)) {
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
                if (zs->zs_minor == minor) {
+                       smp_rmb();
                        switch (which) {
-                               case ZST_ONEXIT:  return (zs->zs_onexit);
-                               case ZST_ZEVENT:  return (zs->zs_zevent);
-                               case ZST_ALL:     return (zs);
+                       case ZST_ONEXIT:
+                               return (zs->zs_onexit);
+                       case ZST_ZEVENT:
+                               return (zs->zs_zevent);
+                       case ZST_ALL:
+                               return (zs);
                        }
                }
        }
 
-       return NULL;
+       return (NULL);
 }
 
 void *
@@ -5442,11 +5596,9 @@ zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
 {
        void *ptr;
 
-       mutex_enter(&zfsdev_state_lock);
        ptr = zfsdev_get_state_impl(minor, which);
-       mutex_exit(&zfsdev_state_lock);
 
-       return ptr;
+       return (ptr);
 }
 
 minor_t
@@ -5485,25 +5637,50 @@ zfsdev_minor_alloc(void)
 static int
 zfsdev_state_init(struct file *filp)
 {
-       zfsdev_state_t *zs;
+       zfsdev_state_t *zs, *zsprev = NULL;
        minor_t minor;
+       boolean_t newzs = B_FALSE;
 
        ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
-        minor = zfsdev_minor_alloc();
-        if (minor == 0)
-                return (SET_ERROR(ENXIO));
+       minor = zfsdev_minor_alloc();
+       if (minor == 0)
+               return (SET_ERROR(ENXIO));
 
-       zs = kmem_zalloc( sizeof(zfsdev_state_t), KM_SLEEP);
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+               if (zs->zs_minor == -1)
+                       break;
+               zsprev = zs;
+       }
+
+       if (!zs) {
+               zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+               newzs = B_TRUE;
+       }
 
        zs->zs_file = filp;
-       zs->zs_minor = minor;
        filp->private_data = zs;
 
        zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
        zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
 
-       list_insert_tail(&zfsdev_state_list, zs);
+
+       /*
+        * In order to provide for lock-free concurrent read access
+        * to the minor list in zfsdev_get_state_impl(), new entries
+        * must be completely written before linking them into the
+        * list whereas existing entries are already linked; the last
+        * operation must be updating zs_minor (from -1 to the new
+        * value).
+        */
+       if (newzs) {
+               zs->zs_minor = minor;
+               smp_wmb();
+               zsprev->zs_next = zs;
+       } else {
+               smp_wmb();
+               zs->zs_minor = minor;
+       }
 
        return (0);
 }
@@ -5517,13 +5694,11 @@ zfsdev_state_destroy(struct file *filp)
        ASSERT(filp->private_data != NULL);
 
        zs = filp->private_data;
+       zs->zs_minor = -1;
        zfs_onexit_destroy(zs->zs_onexit);
        zfs_zevent_destroy(zs->zs_zevent);
 
-       list_remove(&zfsdev_state_list, zs);
-       kmem_free(zs, sizeof(zfsdev_state_t));
-
-       return 0;
+       return (0);
 }
 
 static int
@@ -5555,9 +5730,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 {
        zfs_cmd_t *zc;
        uint_t vecnum;
-       int error, rc, len, flag = 0;
+       int error, rc, flag = 0;
        const zfs_ioc_vec_t *vec;
-       char *saved_poolname;
+       char *saved_poolname = NULL;
        nvlist_t *innvl = NULL;
 
        vecnum = cmd - ZFS_IOC_FIRST;
@@ -5565,8 +5740,14 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                return (-SET_ERROR(EINVAL));
        vec = &zfs_ioc_vec[vecnum];
 
-       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
-       saved_poolname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+       /*
+        * The registered ioctl list may be sparse, verify that either
+        * a normal or legacy handler are registered.
+        */
+       if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
+               return (-SET_ERROR(EINVAL));
+
+       zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
 
        error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
        if (error != 0) {
@@ -5616,9 +5797,13 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
                goto out;
 
        /* legacy ioctls can modify zc_name */
-       (void) strlcpy(saved_poolname, zc->zc_name, sizeof(saved_poolname));
-       len = strcspn(saved_poolname, "/@") + 1;
-       saved_poolname[len] = '\0';
+       saved_poolname = strdup(zc->zc_name);
+       if (saved_poolname == NULL) {
+               error = SET_ERROR(ENOMEM);
+               goto out;
+       } else {
+               saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
+       }
 
        if (vec->zvec_func != NULL) {
                nvlist_t *outnvl;
@@ -5683,10 +5868,12 @@ out:
                char *s = tsd_get(zfs_allow_log_key);
                if (s != NULL)
                        strfree(s);
-               (void) tsd_set(zfs_allow_log_key, strdup(saved_poolname));
+               (void) tsd_set(zfs_allow_log_key, saved_poolname);
+       } else {
+               if (saved_poolname != NULL)
+                       strfree(saved_poolname);
        }
 
-       kmem_free(saved_poolname, MAXNAMELEN);
        kmem_free(zc, sizeof (zfs_cmd_t));
        return (-error);
 }
@@ -5695,24 +5882,24 @@ out:
 static long
 zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 {
-        return zfsdev_ioctl(filp, cmd, arg);
+       return (zfsdev_ioctl(filp, cmd, arg));
 }
 #else
-#define zfsdev_compat_ioctl   NULL
+#define        zfsdev_compat_ioctl     NULL
 #endif
 
 static const struct file_operations zfsdev_fops = {
-       .open            = zfsdev_open,
-       .release         = zfsdev_release,
-       .unlocked_ioctl  = zfsdev_ioctl,
-       .compat_ioctl    = zfsdev_compat_ioctl,
-       .owner           = THIS_MODULE,
+       .open           = zfsdev_open,
+       .release        = zfsdev_release,
+       .unlocked_ioctl = zfsdev_ioctl,
+       .compat_ioctl   = zfsdev_compat_ioctl,
+       .owner          = THIS_MODULE,
 };
 
 static struct miscdevice zfs_misc = {
-       .minor          = MISC_DYNAMIC_MINOR,
-       .name           = ZFS_DRIVER,
-       .fops           = &zfsdev_fops,
+       .minor          = MISC_DYNAMIC_MINOR,
+       .name           = ZFS_DRIVER,
+       .fops           = &zfsdev_fops,
 };
 
 static int
@@ -5721,11 +5908,11 @@ zfs_attach(void)
        int error;
 
        mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
-       list_create(&zfsdev_state_list, sizeof (zfsdev_state_t),
-           offsetof(zfsdev_state_t, zs_next));
+       zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+       zfsdev_state_list->zs_minor = -1;
 
        error = misc_register(&zfs_misc);
-        if (error != 0) {
+       if (error != 0) {
                printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
                return (error);
        }
@@ -5737,13 +5924,21 @@ static void
 zfs_detach(void)
 {
        int error;
+       zfsdev_state_t *zs, *zsprev = NULL;
 
        error = misc_deregister(&zfs_misc);
        if (error != 0)
                printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error);
 
        mutex_destroy(&zfsdev_state_lock);
-       list_destroy(&zfsdev_state_list);
+
+       for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+               if (zsprev)
+                       kmem_free(zsprev, sizeof (zfsdev_state_t));
+               zsprev = zs;
+       }
+       if (zsprev)
+               kmem_free(zsprev, sizeof (zfsdev_state_t));
 }
 
 static void
@@ -5754,9 +5949,9 @@ zfs_allow_log_destroy(void *arg)
 }
 
 #ifdef DEBUG
-#define ZFS_DEBUG_STR  " (DEBUG mode)"
+#define        ZFS_DEBUG_STR   " (DEBUG mode)"
 #else
-#define ZFS_DEBUG_STR  ""
+#define        ZFS_DEBUG_STR   ""
 #endif
 
 int
@@ -5780,9 +5975,9 @@ _init(void)
        tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
        printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
-              "ZFS pool version %s, ZFS filesystem version %s\n",
-              ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
-              SPA_VERSION_STRING, ZPL_VERSION_STRING);
+           "ZFS pool version %s, ZFS filesystem version %s\n",
+           ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
+           SPA_VERSION_STRING, ZPL_VERSION_STRING);
 #ifndef CONFIG_FS_POSIX_ACL
        printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
 #endif /* CONFIG_FS_POSIX_ACL */
@@ -5795,8 +5990,8 @@ out1:
        zfs_fini();
        spa_fini();
        printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
-              ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
-              ZFS_DEBUG_STR, error);
+           ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
+           ZFS_DEBUG_STR, error);
 
        return (error);
 }
@@ -5814,7 +6009,7 @@ _fini(void)
        tsd_destroy(&zfs_allow_log_key);
 
        printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
-              ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
+           ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
 
        return (0);
 }