]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Add linux events
authorBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 26 Aug 2010 18:42:43 +0000 (11:42 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 31 Aug 2010 20:41:36 +0000 (13:41 -0700)
This topic branch leverages the Solaris style FMA call points
in ZFS to create a user space visible event notification system
under Linux.  This new system is called zevent and it unifies
all previous Solaris style ereports and sysevent notifications.

Under this Linux specific scheme when a sysevent or ereport event
occurs an nvlist describing the event is created which looks almost
exactly like a Solaris ereport.  These events are queued up in the
kernel when they occur and conditionally logged to the console.
It is then up to a user space application to consume the events
and do whatever it likes with them.

To make this possible the existing /dev/zfs ABI has been extended
with two new ioctls which behave as follows.

* ZFS_IOC_EVENTS_NEXT
Get the next pending event.  The kernel will keep track of the last
event consumed by the file descriptor and provide the next one if
available.  If no new events are available the ioctl() will block
waiting for the next event.  This ioctl may also be called in a
non-blocking mode by setting zc.zc_guid = ZEVENT_NONBLOCK.  In the
non-blocking case if no events are available ENOENT will be returned.
It is possible that ESHUTDOWN will be returned if the ioctl() is
called while module unloading is in progress.  And finally ENOMEM
may occur if the provided nvlist buffer is not large enough to
contain the entire event.

* ZFS_IOC_EVENTS_CLEAR
Clear are events queued by the kernel.  The kernel will keep a fairly
large number of recent events queued, use this ioctl to clear the
in kernel list.  This will effect all user space processes consuming
events.

The zpool command has been extended to use this events ABI with the
'events' subcommand.  You may run 'zpool events -v' to output a
verbose log of all recent events.  This is very similar to the
Solaris 'fmdump -ev' command with the key difference being it also
includes what would be considered sysevents under Solaris.  You
may also run in follow mode with the '-f' option.  To clear the
in kernel event queue use the '-c' option.

$ sudo cmd/zpool/zpool events -fv
TIME                        CLASS
May 13 2010 16:31:15.777711000 ereport.fs.zfs.config.sync
        class = "ereport.fs.zfs.config.sync"
        ena = 0x40982b7897700001
        detector = (embedded nvlist)
                version = 0x0
                scheme = "zfs"
                pool = 0xed976600de75dfa6
        (end detector)

        time = 0x4bec8bc3 0x2e5aed98
        pool = "zpios"
        pool_guid = 0xed976600de75dfa6
        pool_context = 0x0

While the 'zpool events' command is handy for interactive debugging
it is not expected to be the primary consumer of zevents.  This ABI
was primarily added to facilitate the addition of a user space
monitoring daemon.  This daemon would consume all events posted by
the kernel and based on the type of event perform an action.  For
most events simply forwarding them on to syslog is likely enough.
But this interface also cleanly allows for more sophisticated
actions to be taken such as generating an email for a failed drive.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
18 files changed:
cmd/zpool/zpool_main.c
lib/libzfs/include/libzfs.h
lib/libzfs/libzfs_pool.c
lib/libzpool/include/sys/zfs_context.h
module/zcommon/include/sys/fs/zfs.h
module/zfs/dsl_scan.c
module/zfs/fm.c
module/zfs/include/sys/fm/fs/zfs.h
module/zfs/include/sys/fm/protocol.h
module/zfs/include/sys/fm/util.h
module/zfs/include/sys/zfs_context.h
module/zfs/include/sys/zfs_ioctl.h
module/zfs/spa.c
module/zfs/spa_config.c
module/zfs/spa_misc.c
module/zfs/vdev.c
module/zfs/zfs_fm.c
module/zfs/zfs_ioctl.c

index 3d2852326e1552f820a5a9c65da967c0b121a756..074f76e81eb1c575438bf2f84d62340e1c2e2965 100644 (file)
@@ -42,6 +42,8 @@
 #include <zone.h>
 #include <sys/fs/zfs.h>
 #include <sys/stat.h>
+#include <sys/fm/util.h>
+#include <sys/fm/protocol.h>
 
 #include <libzfs.h>
 
@@ -77,6 +79,7 @@ static int zpool_do_export(int, char **);
 static int zpool_do_upgrade(int, char **);
 
 static int zpool_do_history(int, char **);
+static int zpool_do_events(int, char **);
 
 static int zpool_do_get(int, char **);
 static int zpool_do_set(int, char **);
@@ -119,6 +122,7 @@ typedef enum {
        HELP_SCRUB,
        HELP_STATUS,
        HELP_UPGRADE,
+       HELP_EVENTS,
        HELP_GET,
        HELP_SET,
        HELP_SPLIT
@@ -167,6 +171,8 @@ static zpool_command_t command_table[] = {
        { "upgrade",    zpool_do_upgrade,       HELP_UPGRADE            },
        { NULL },
        { "history",    zpool_do_history,       HELP_HISTORY            },
+       { "events",     zpool_do_events,        HELP_EVENTS             },
+       { NULL },
        { "get",        zpool_do_get,           HELP_GET                },
        { "set",        zpool_do_set,           HELP_SET                },
 };
@@ -234,6 +240,8 @@ get_usage(zpool_help_t idx) {
                return (gettext("\tupgrade\n"
                    "\tupgrade -v\n"
                    "\tupgrade [-V version] <-a | pool ...>\n"));
+       case HELP_EVENTS:
+               return (gettext("\tevents [-vfc]\n"));
        case HELP_GET:
                return (gettext("\tget <\"all\" | property[,...]> "
                    "<pool> ...\n"));
@@ -4210,6 +4218,331 @@ zpool_do_history(int argc, char **argv)
        return (ret);
 }
 
+typedef struct ev_opts {
+       int verbose;
+       int follow;
+       int clear;
+} ev_opts_t;
+
+static void
+zpool_do_events_short(nvlist_t *nvl)
+{
+       char ctime_str[26], str[32], *ptr;
+       int64_t *tv;
+       uint_t n;
+
+       verify(nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0);
+       memset(str, ' ', 32);
+       (void) ctime_r((const time_t *)&tv[0], ctime_str);
+       (void) strncpy(str,    ctime_str+4,  6);             /* 'Jun 30'     */
+       (void) strncpy(str+7,  ctime_str+20, 4);             /* '1993'       */
+       (void) strncpy(str+12, ctime_str+11, 8);             /* '21:49:08'   */
+       (void) sprintf(str+20, ".%09lld", (longlong_t)tv[1]);/* '.123456789' */
+       (void) printf(gettext("%s "), str);
+
+       verify(nvlist_lookup_string(nvl, FM_CLASS, &ptr) == 0);
+       (void) printf(gettext("%s\n"), ptr);
+}
+
+static void
+zpool_do_events_nvprint(nvlist_t *nvl, int depth)
+{
+       nvpair_t *nvp;
+
+       for (nvp = nvlist_next_nvpair(nvl, NULL);
+           nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
+
+               data_type_t type = nvpair_type(nvp);
+               const char *name = nvpair_name(nvp);
+
+               boolean_t b;
+               uint8_t i8;
+               uint16_t i16;
+               uint32_t i32;
+               uint64_t i64;
+               char *str;
+               nvlist_t *cnv;
+
+               printf(gettext("%*s%s = "), depth, "", name);
+
+               switch (type) {
+               case DATA_TYPE_BOOLEAN:
+                       printf(gettext("%s"), "1");
+                       break;
+
+               case DATA_TYPE_BOOLEAN_VALUE:
+                       (void) nvpair_value_boolean_value(nvp, &b);
+                       printf(gettext("%s"), b ? "1" : "0");
+                       break;
+
+               case DATA_TYPE_BYTE:
+                       (void) nvpair_value_byte(nvp, &i8);
+                       printf(gettext("0x%x"), i8);
+                       break;
+
+               case DATA_TYPE_INT8:
+                       (void) nvpair_value_int8(nvp, (void *)&i8);
+                       printf(gettext("0x%x"), i8);
+                       break;
+
+               case DATA_TYPE_UINT8:
+                       (void) nvpair_value_uint8(nvp, &i8);
+                       printf(gettext("0x%x"), i8);
+                       break;
+
+               case DATA_TYPE_INT16:
+                       (void) nvpair_value_int16(nvp, (void *)&i16);
+                       printf(gettext("0x%x"), i16);
+                       break;
+
+               case DATA_TYPE_UINT16:
+                       (void) nvpair_value_uint16(nvp, &i16);
+                       printf(gettext("0x%x"), i16);
+                       break;
+
+               case DATA_TYPE_INT32:
+                       (void) nvpair_value_int32(nvp, (void *)&i32);
+                       printf(gettext("0x%x"), i32);
+                       break;
+
+               case DATA_TYPE_UINT32:
+                       (void) nvpair_value_uint32(nvp, &i32);
+                       printf(gettext("0x%x"), i32);
+                       break;
+
+               case DATA_TYPE_INT64:
+                       (void) nvpair_value_int64(nvp, (void *)&i64);
+                       printf(gettext("0x%llx"), (u_longlong_t)i64);
+                       break;
+
+               case DATA_TYPE_UINT64:
+                       (void) nvpair_value_uint64(nvp, &i64);
+                       printf(gettext("0x%llx"), (u_longlong_t)i64);
+                       break;
+
+               case DATA_TYPE_HRTIME:
+                       (void) nvpair_value_hrtime(nvp, (void *)&i64);
+                       printf(gettext("0x%llx"), (u_longlong_t)i64);
+                       break;
+
+               case DATA_TYPE_STRING:
+                       (void) nvpair_value_string(nvp, &str);
+                       printf(gettext("\"%s\""), str ? str : "<NULL>");
+                       break;
+
+               case DATA_TYPE_NVLIST:
+                       printf(gettext("(embedded nvlist)\n"));
+                       (void) nvpair_value_nvlist(nvp, &cnv);
+                       zpool_do_events_nvprint(cnv, depth + 8);
+                       printf(gettext("%*s(end %s)\n"), depth, "", name);
+                       break;
+
+               case DATA_TYPE_NVLIST_ARRAY: {
+                       nvlist_t **val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
+                       printf(gettext("(%d embedded nvlists)\n"), nelem);
+                       for (i = 0; i < nelem; i++) {
+                               printf(gettext("%*s%s[%d] = %s\n"),
+                                      depth, "", name, i, "(embedded nvlist)");
+                               zpool_do_events_nvprint(val[i], depth + 8);
+                               printf(gettext("%*s(end %s[%i])\n"),
+                                      depth, "", name, i);
+                       }
+                       printf(gettext("%*s(end %s)\n"), depth, "", name);
+                       }
+                       break;
+
+               case DATA_TYPE_INT8_ARRAY: {
+                       int8_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_int8_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%x "), val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_UINT8_ARRAY: {
+                       uint8_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_uint8_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%x "), val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_INT16_ARRAY: {
+                       int16_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_int16_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%x "), val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_UINT16_ARRAY: {
+                       uint16_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_uint16_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%x "), val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_INT32_ARRAY: {
+                       int32_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_int32_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%x "), val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_UINT32_ARRAY: {
+                       uint32_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_uint32_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%x "), val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_INT64_ARRAY: {
+                       int64_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_int64_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%llx "), (u_longlong_t)val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_UINT64_ARRAY: {
+                       uint64_t *val;
+                       uint_t i, nelem;
+
+                       (void) nvpair_value_uint64_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               printf(gettext("0x%llx "), (u_longlong_t)val[i]);
+
+                       break;
+                       }
+
+               case DATA_TYPE_STRING_ARRAY:
+               case DATA_TYPE_BOOLEAN_ARRAY:
+               case DATA_TYPE_BYTE_ARRAY:
+               case DATA_TYPE_DOUBLE:
+               case DATA_TYPE_UNKNOWN:
+                       printf(gettext("<unknown>"));
+                       break;
+               }
+
+               printf(gettext("\n"));
+       }
+}
+
+static int
+zpool_do_events_next(ev_opts_t *opts)
+{
+       nvlist_t *nvl;
+       int cleanup_fd, ret, dropped;
+
+        cleanup_fd = open(ZFS_DEV, O_RDWR);
+        VERIFY(cleanup_fd >= 0);
+
+       (void) printf(gettext("%-30s %s\n"), "TIME", "CLASS");
+
+       while (1) {
+               ret = zpool_events_next(g_zfs, &nvl, &dropped,
+                   !!opts->follow, cleanup_fd);
+               if (ret || nvl == NULL)
+                       break;
+
+               if (dropped > 0)
+                       (void) printf(gettext("dropped %d events\n"), dropped);
+
+               zpool_do_events_short(nvl);
+
+               if (opts->verbose) {
+                       zpool_do_events_nvprint(nvl, 8);
+                       printf(gettext("\n"));
+               }
+
+               nvlist_free(nvl);
+       }
+
+        VERIFY(0 == close(cleanup_fd));
+
+       return (ret);
+}
+
+static int
+zpool_do_events_clear(ev_opts_t *opts)
+{
+       int count, ret;
+
+       ret = zpool_events_clear(g_zfs, &count);
+       if (!ret)
+               (void) printf(gettext("cleared %d events\n"), count);
+
+       return (ret);
+}
+
+/*
+ * zpool events [-vfc]
+ *
+ * Displays events logs by ZFS.
+ */
+int
+zpool_do_events(int argc, char **argv)
+{
+       ev_opts_t opts = { 0 };
+       int ret;
+       int c;
+
+       /* check options */
+       while ((c = getopt(argc, argv, "vfc")) != -1) {
+               switch (c) {
+               case 'v':
+                       opts.verbose = 1;
+                       break;
+               case 'f':
+                       opts.follow = 1;
+                       break;
+               case 'c':
+                       opts.clear = 1;
+                       break;
+               case '?':
+                       (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+                           optopt);
+                       usage(B_FALSE);
+               }
+       }
+       argc -= optind;
+       argv += optind;
+
+       if (opts.clear)
+               ret = zpool_do_events_clear(&opts);
+       else
+               ret = zpool_do_events_next(&opts);
+
+       return ret;
+}
+
 static int
 get_callback(zpool_handle_t *zhp, void *data)
 {
index ea34cc9efa319b274d1b371905c63cc171b61646..b0287ed1f8205406aa633aa5910fc6f6225a39dc 100644 (file)
@@ -368,6 +368,8 @@ extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
 extern void zpool_set_history_str(const char *subcommand, int argc,
     char **argv, char *history_str);
 extern int zpool_stage_history(libzfs_handle_t *, const char *);
+extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int);
+extern int zpool_events_clear(libzfs_handle_t *, int *);
 extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
     size_t len);
 extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
index f1edb77f0f6a5bf1bf88d4cd07c8458c7e861e55..42f303894aed40cd800d1689998c77440baa0f6b 100644 (file)
@@ -3474,6 +3474,94 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
        return (err);
 }
 
+/*
+ * Retrieve the next event.  If there is a new event available 'nvp' will
+ * contain a newly allocated nvlist and 'dropped' will be set to the number
+ * of missed events since the last call to this function.  When 'nvp' is
+ * set to NULL it indicates no new events are available.  In either case
+ * the function returns 0 and it is up to the caller to free 'nvp'.  In
+ * the case of a fatal error the function will return a non-zero value.
+ * When the function is called in blocking mode it will not return until
+ * a new event is available.
+ */
+int
+zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
+    int *dropped, int block, int cleanup_fd)
+{
+       zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+       int error = 0;
+
+       *nvp = NULL;
+       *dropped = 0;
+       zc.zc_cleanup_fd = cleanup_fd;
+
+       if (!block)
+               zc.zc_guid = ZEVENT_NONBLOCK;
+
+       if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
+               return (-1);
+
+retry:
+       if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
+               switch (errno) {
+               case ESHUTDOWN:
+                       error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
+                           dgettext(TEXT_DOMAIN, "zfs shutdown"));
+                       goto out;
+               case ENOENT:
+                       /* Blocking error case should not occur */
+                       if (block)
+                               error = zpool_standard_error_fmt(hdl, errno,
+                                   dgettext(TEXT_DOMAIN, "cannot get event"));
+
+                       goto out;
+               case ENOMEM:
+                       if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+                               error = zfs_error_fmt(hdl, EZFS_NOMEM,
+                                   dgettext(TEXT_DOMAIN, "cannot get event"));
+                               goto out;
+                       } else {
+                               goto retry;
+                       }
+               default:
+                       error = zpool_standard_error_fmt(hdl, errno,
+                           dgettext(TEXT_DOMAIN, "cannot get event"));
+                       goto out;
+               }
+       }
+
+       error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
+       if (error != 0)
+               goto out;
+
+       *dropped = (int)zc.zc_cookie;
+out:
+       zcmd_free_nvlists(&zc);
+
+       return (error);
+}
+
+/*
+ * Clear all events.
+ */
+int
+zpool_events_clear(libzfs_handle_t *hdl, int *count)
+{
+       zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+       char msg[1024];
+
+       (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+           "cannot clear events"));
+
+       if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
+               return (zpool_standard_error_fmt(hdl, errno, msg));
+
+       if (count != NULL)
+               *count = (int)zc.zc_cookie; /* # of events cleared */
+
+       return (0);
+}
+
 void
 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
     char *pathname, size_t len)
index f2776566c07606620b384e4df8dcc75a2bbac4c7..34c351bd0dbccbdded3e0631cdb6140b122447e9 100644 (file)
@@ -57,7 +57,6 @@ extern "C" {
 #include <atomic.h>
 #include <dirent.h>
 #include <time.h>
-#include <libsysevent.h>
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/cred.h>
@@ -71,8 +70,7 @@ extern "C" {
 #include <sys/sdt.h>
 #include <sys/kstat.h>
 #include <sys/u8_textprep.h>
-#include <sys/sysevent/eventdefs.h>
-#include <sys/sysevent/dev.h>
+#include <sys/fm/fs/zfs.h>
 #include <sys/sunddi.h>
 
 /*
index da0b12bab4a99e0fb95e5d453604d7ce07431222..b2c946919f0d16b36a290ff94423190484218e81 100644 (file)
@@ -774,7 +774,9 @@ typedef enum zfs_ioc {
        ZFS_IOC_NEXT_OBJ,
        ZFS_IOC_DIFF,
        ZFS_IOC_TMP_SNAPSHOT,
-       ZFS_IOC_OBJ_TO_STATS
+       ZFS_IOC_OBJ_TO_STATS,
+       ZFS_IOC_EVENTS_NEXT,
+       ZFS_IOC_EVENTS_CLEAR,
 } zfs_ioc_t;
 
 /*
index 525832b452feeea4df82a915e0c2dfd8275d0bda..c37a8224c2bf3ea2f89f85a60333d239cae854f7 100644 (file)
@@ -50,9 +50,7 @@
 
 typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
 
-static scan_cb_t dsl_scan_defrag_cb;
 static scan_cb_t dsl_scan_scrub_cb;
-static scan_cb_t dsl_scan_remove_cb;
 static dsl_syncfunc_t dsl_scan_cancel_sync;
 static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
 
@@ -194,9 +192,9 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 
                if (vdev_resilver_needed(spa->spa_root_vdev,
                    &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
-                       spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
+                       spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START);
                } else {
-                       spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
+                       spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START);
                }
 
                spa->spa_scrub_started = B_TRUE;
@@ -297,7 +295,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
                    complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
                if (complete) {
                        spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
-                           ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
+                           FM_EREPORT_ZFS_RESILVER_FINISH :
+                           FM_EREPORT_ZFS_SCRUB_FINISH);
                }
                spa_errlog_rotate(spa);
 
index 4efcff4f464adf18714c2d0e0368f4319fee7154..67d0c1a6e4cdfcab2afcf2108042b58cea419db1 100644 (file)
 
 #include <sys/types.h>
 #include <sys/time.h>
-#include <sys/sysevent.h>
-#include <sys/sysevent_impl.h>
+#include <sys/list.h>
 #include <sys/nvpair.h>
 #include <sys/cmn_err.h>
-#include <sys/cpuvar.h>
 #include <sys/sysmacros.h>
-#include <sys/systm.h>
-#include <sys/ddifm.h>
-#include <sys/ddifm_impl.h>
-#include <sys/spl.h>
-#include <sys/dumphdr.h>
 #include <sys/compress.h>
-#include <sys/cpuvar.h>
-#include <sys/console.h>
-#include <sys/panic.h>
-#include <sys/kobj.h>
 #include <sys/sunddi.h>
 #include <sys/systeminfo.h>
-#include <sys/sysevent/eventdefs.h>
 #include <sys/fm/util.h>
 #include <sys/fm/protocol.h>
+#include <sys/kstat.h>
+#include <sys/zfs_context.h>
+#ifdef _KERNEL
+#include <sys/atomic.h>
+#include <sys/condvar.h>
+#include <sys/cpuvar.h>
+#include <sys/systm.h>
+#include <sys/dumphdr.h>
+#include <sys/cpuvar.h>
+#include <sys/console.h>
+#include <sys/kobj.h>
+#include <sys/time.h>
+#include <sys/zfs_ioctl.h>
 
-/*
- * URL and SUNW-MSG-ID value to display for fm_panic(), defined below.  These
- * values must be kept in sync with the FMA source code in usr/src/cmd/fm.
- */
-static const char *fm_url = "http://www.sun.com/msg";
-static const char *fm_msgid = "SUNOS-8000-0G";
-static char *volatile fm_panicstr = NULL;
+int zevent_len_max = 0;
+int zevent_cols = 80;
+int zevent_console = 0;
 
-errorq_t *ereport_errorq;
-void *ereport_dumpbuf;
-size_t ereport_dumplen;
+static int zevent_len_cur = 0;
+static int zevent_waiters = 0;
+static int zevent_flags = 0;
 
-static uint_t ereport_chanlen = ERPT_EVCH_MAX;
-static evchan_t *ereport_chan = NULL;
-static ulong_t ereport_qlen = 0;
-static size_t ereport_size = 0;
-static int ereport_cols = 80;
+static kmutex_t zevent_lock;
+static list_t zevent_list;
+static kcondvar_t zevent_cv;
+#endif /* _KERNEL */
 
 extern void fastreboot_disable_highpil(void);
 
 /*
- * Common fault management kstats to record ereport generation
- * failures
+ * Common fault management kstats to record event generation failures
  */
 
 struct erpt_kstat {
@@ -114,57 +109,9 @@ static struct erpt_kstat erpt_kstat_data = {
        { "payload-set-failed", KSTAT_DATA_UINT64 }
 };
 
-/*ARGSUSED*/
-static void
-fm_drain(void *private, void *data, errorq_elem_t *eep)
-{
-       nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep);
-
-       if (!panicstr)
-               (void) fm_ereport_post(nvl, EVCH_TRYHARD);
-       else
-               fm_nvprint(nvl);
-}
-
-void
-fm_init(void)
-{
-       kstat_t *ksp;
+kstat_t *fm_ksp;
 
-       (void) sysevent_evc_bind(FM_ERROR_CHAN,
-           &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND);
-
-       (void) sysevent_evc_control(ereport_chan,
-           EVCH_SET_CHAN_LEN, &ereport_chanlen);
-
-       if (ereport_qlen == 0)
-               ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
-
-       if (ereport_size == 0)
-               ereport_size = ERPT_DATA_SZ;
-
-       ereport_errorq = errorq_nvcreate("fm_ereport_queue",
-           (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size,
-           FM_ERR_PIL, ERRORQ_VITAL);
-       if (ereport_errorq == NULL)
-               panic("failed to create required ereport error queue");
-
-       ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP);
-       ereport_dumplen = ereport_size;
-
-       /* Initialize ereport allocation and generation kstats */
-       ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED,
-           sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
-           KSTAT_FLAG_VIRTUAL);
-
-       if (ksp != NULL) {
-               ksp->ks_data = &erpt_kstat_data;
-               kstat_install(ksp);
-       } else {
-               cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
-
-       }
-}
+#ifdef _KERNEL
 
 /*
  * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
@@ -183,7 +130,7 @@ fm_printf(int depth, int c, int cols, const char *format, ...)
        va_end(ap);
 
        if (c + width >= cols) {
-               console_printf("\n\r");
+               console_printf("\n");
                c = 0;
                if (format[0] != ' ' && depth > 0) {
                        console_printf(" ");
@@ -245,54 +192,54 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
 
                case DATA_TYPE_BYTE:
                        (void) nvpair_value_byte(nvp, &i8);
-                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i8);
                        break;
 
                case DATA_TYPE_INT8:
                        (void) nvpair_value_int8(nvp, (void *)&i8);
-                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i8);
                        break;
 
                case DATA_TYPE_UINT8:
                        (void) nvpair_value_uint8(nvp, &i8);
-                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i8);
                        break;
 
                case DATA_TYPE_INT16:
                        (void) nvpair_value_int16(nvp, (void *)&i16);
-                       c = fm_printf(d + 1, c, cols, "%x", i16);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i16);
                        break;
 
                case DATA_TYPE_UINT16:
                        (void) nvpair_value_uint16(nvp, &i16);
-                       c = fm_printf(d + 1, c, cols, "%x", i16);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i16);
                        break;
 
                case DATA_TYPE_INT32:
                        (void) nvpair_value_int32(nvp, (void *)&i32);
-                       c = fm_printf(d + 1, c, cols, "%x", i32);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i32);
                        break;
 
                case DATA_TYPE_UINT32:
                        (void) nvpair_value_uint32(nvp, &i32);
-                       c = fm_printf(d + 1, c, cols, "%x", i32);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i32);
                        break;
 
                case DATA_TYPE_INT64:
                        (void) nvpair_value_int64(nvp, (void *)&i64);
-                       c = fm_printf(d + 1, c, cols, "%llx",
+                       c = fm_printf(d + 1, c, cols, "0x%llx",
                            (u_longlong_t)i64);
                        break;
 
                case DATA_TYPE_UINT64:
                        (void) nvpair_value_uint64(nvp, &i64);
-                       c = fm_printf(d + 1, c, cols, "%llx",
+                       c = fm_printf(d + 1, c, cols, "0x%llx",
                            (u_longlong_t)i64);
                        break;
 
                case DATA_TYPE_HRTIME:
                        (void) nvpair_value_hrtime(nvp, (void *)&i64);
-                       c = fm_printf(d + 1, c, cols, "%llx",
+                       c = fm_printf(d + 1, c, cols, "0x%llx",
                            (u_longlong_t)i64);
                        break;
 
@@ -322,19 +269,124 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
                        }
                        break;
 
+               case DATA_TYPE_INT8_ARRAY: {
+                       int8_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int8_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT8_ARRAY: {
+                       uint8_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint8_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_INT16_ARRAY: {
+                       int16_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int16_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT16_ARRAY: {
+                       uint16_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint16_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_INT32_ARRAY: {
+                       int32_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int32_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT32_ARRAY: {
+                       uint32_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint32_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_INT64_ARRAY: {
+                       int64_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int64_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT64_ARRAY: {
+                       uint64_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint64_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_STRING_ARRAY:
                case DATA_TYPE_BOOLEAN_ARRAY:
                case DATA_TYPE_BYTE_ARRAY:
-               case DATA_TYPE_INT8_ARRAY:
-               case DATA_TYPE_UINT8_ARRAY:
-               case DATA_TYPE_INT16_ARRAY:
-               case DATA_TYPE_UINT16_ARRAY:
-               case DATA_TYPE_INT32_ARRAY:
-               case DATA_TYPE_UINT32_ARRAY:
-               case DATA_TYPE_INT64_ARRAY:
-               case DATA_TYPE_UINT64_ARRAY:
-               case DATA_TYPE_STRING_ARRAY:
                        c = fm_printf(d + 1, c, cols, "[...]");
                        break;
+
                case DATA_TYPE_UNKNOWN:
                        c = fm_printf(d + 1, c, cols, "<unknown>");
                        break;
@@ -350,191 +402,255 @@ fm_nvprint(nvlist_t *nvl)
        char *class;
        int c = 0;
 
-       console_printf("\r");
+       console_printf("\n");
 
        if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
-               c = fm_printf(0, c, ereport_cols, "%s", class);
+               c = fm_printf(0, c, zevent_cols, "%s", class);
 
-       if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0)
+       if (fm_nvprintr(nvl, 0, c, zevent_cols) != 0)
                console_printf("\n");
 
        console_printf("\n");
 }
 
-/*
- * Wrapper for panic() that first produces an FMA-style message for admins.
- * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this
- * is the one exception to that rule and the only error that gets messaged.
- * This function is intended for use by subsystems that have detected a fatal
- * error and enqueued appropriate ereports and wish to then force a panic.
- */
-/*PRINTFLIKE1*/
+static zevent_t *
+zfs_zevent_alloc(void)
+{
+       zevent_t *ev;
+
+       ev = kmem_zalloc(sizeof(zevent_t), KM_SLEEP);
+       if (ev == NULL)
+               return NULL;
+
+       list_create(&ev->ev_ze_list, sizeof(zfs_zevent_t),
+                   offsetof(zfs_zevent_t, ze_node));
+       list_link_init(&ev->ev_node);
+
+       return ev;
+}
+
+static void
+zfs_zevent_free(zevent_t *ev)
+{
+       /* Run provided cleanup callback */
+       ev->ev_cb(ev->ev_nvl, ev->ev_detector);
+
+       list_destroy(&ev->ev_ze_list);
+       kmem_free(ev, sizeof(zevent_t));
+}
+
+static void
+zfs_zevent_drain(zevent_t *ev)
+{
+       zfs_zevent_t *ze;
+
+       ASSERT(MUTEX_HELD(&zevent_lock));
+       list_remove(&zevent_list, ev);
+
+       /* Remove references to this event in all private file data */
+       while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
+               list_remove(&ev->ev_ze_list, ze);
+               ze->ze_zevent = NULL;
+               ze->ze_dropped++;
+       }
+
+       zfs_zevent_free(ev);
+}
+
 void
-fm_panic(const char *format, ...)
+zfs_zevent_drain_all(int *count)
 {
-       va_list ap;
+       zevent_t *ev;
 
-       (void) casptr((void *)&fm_panicstr, NULL, (void *)format);
-#if defined(__i386) || defined(__amd64)
-       fastreboot_disable_highpil();
-#endif /* __i386 || __amd64 */
-       va_start(ap, format);
-       vpanic(format, ap);
-       va_end(ap);
+       mutex_enter(&zevent_lock);
+       while ((ev = list_head(&zevent_list)) != NULL)
+               zfs_zevent_drain(ev);
+
+       *count = zevent_len_cur;
+       zevent_len_cur = 0;
+       mutex_exit(&zevent_lock);
 }
 
 /*
- * Simply tell the caller if fm_panicstr is set, ie. an fma event has
- * caused the panic. If so, something other than the default panic
- * diagnosis method will diagnose the cause of the panic.
+ * New zevents are inserted at the head.  If the maximum queue
+ * length is exceeded a zevent will be drained from the tail.
+ * As part of this any user space processes which currently have
+ * a reference to this zevent_t in their private data will have
+ * this reference set to NULL.
  */
-int
-is_fm_panic()
+static void
+zfs_zevent_insert(zevent_t *ev)
 {
-       if (fm_panicstr)
-               return (1);
+       mutex_enter(&zevent_lock);
+       list_insert_head(&zevent_list, ev);
+       if (zevent_len_cur >= zevent_len_max)
+               zfs_zevent_drain(list_tail(&zevent_list));
        else
-               return (0);
+               zevent_len_cur++;
+
+       mutex_exit(&zevent_lock);
 }
 
 /*
- * Print any appropriate FMA banner message before the panic message.  This
- * function is called by panicsys() and prints the message for fm_panic().
- * We print the message here so that it comes after the system is quiesced.
- * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix).
- * The rest of the message is for the console only and not needed in the log,
- * so it is printed using console_printf().  We break it up into multiple
- * chunks so as to avoid overflowing any small legacy prom_printf() buffers.
+ * Post a zevent
  */
 void
-fm_banner(void)
+zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
 {
-       timespec_t tod;
-       hrtime_t now;
+       int64_t tv_array[2];
+       timestruc_t tv;
+       size_t nvl_size = 0;
+       zevent_t *ev;
 
-       if (!fm_panicstr)
-               return; /* panic was not initiated by fm_panic(); do nothing */
+       gethrestime(&tv);
+       tv_array[0] = tv.tv_sec;
+       tv_array[1] = tv.tv_nsec;
+       if (nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2)) {
+               atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+               return;
+       }
 
-       if (panicstr) {
-               tod = panic_hrestime;
-               now = panic_hrtime;
-       } else {
-               gethrestime(&tod);
-               now = gethrtime_waitfree();
+       (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
+       if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
+               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+               return;
        }
 
-       cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, "
-           "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid);
+       if (zevent_console)
+               fm_nvprint(nvl);
 
-       console_printf(
-"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n"
-"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n",
-           fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now);
+       ev = zfs_zevent_alloc();
+       if (ev == NULL) {
+               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+               return;
+       }
 
-       console_printf(
-"PLATFORM: %s, CSN: -, HOSTNAME: %s\n"
-"SOURCE: %s, REV: %s %s\n",
-           platform, utsname.nodename, utsname.sysname,
-           utsname.release, utsname.version);
+        ev->ev_nvl = nvl;
+       ev->ev_detector = detector;
+       ev->ev_cb = cb;
+       zfs_zevent_insert(ev);
+       cv_broadcast(&zevent_cv);
+}
 
-       console_printf(
-"DESC: Errors have been detected that require a reboot to ensure system\n"
-"integrity.  See %s/%s for more information.\n",
-           fm_url, fm_msgid);
+static int
+zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
+{
+       *ze = zfsdev_get_state(minor, ZST_ZEVENT);
+       if (*ze == NULL)
+               return (EBADF);
 
-       console_printf(
-"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n"
-"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n"
-"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n");
+       return (0);
+}
 
-       console_printf("\n");
+int
+zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
+{
+       file_t *fp;
+       int error;
+
+        fp = getf(fd);
+        if (fp == NULL)
+                return (EBADF);
+
+        *minorp = zfsdev_getminor(fp->f_file);
+        error = zfs_zevent_minor_to_state(*minorp, ze);
+
+       if (error)
+               zfs_zevent_fd_rele(fd);
+
+       return (error);
+}
+
+void
+zfs_zevent_fd_rele(int fd)
+{
+       releasef(fd);
 }
 
 /*
- * Utility function to write all of the pending ereports to the dump device.
- * This function is called at either normal reboot or panic time, and simply
- * iterates over the in-transit messages in the ereport sysevent channel.
+ * Get the next zevent in the stream and place a copy in 'event'.
  */
-void
-fm_ereport_dump(void)
+int
+zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *dropped)
 {
-       evchanq_t *chq;
-       sysevent_t *sep;
-       erpt_dump_t ed;
-
-       timespec_t tod;
-       hrtime_t now;
-       char *buf;
-       size_t len;
-
-       if (panicstr) {
-               tod = panic_hrestime;
-               now = panic_hrtime;
+       zevent_t *ev;
+       int error;
+
+       mutex_enter(&zevent_lock);
+       if (ze->ze_zevent == NULL) {
+               /* New stream start at the beginning/tail */
+               ev = list_tail(&zevent_list);
+               if (ev == NULL) {
+                       error = ENOENT;
+                       goto out;
+               }
        } else {
-               if (ereport_errorq != NULL)
-                       errorq_drain(ereport_errorq);
-               gethrestime(&tod);
-               now = gethrtime_waitfree();
+               /* Existing stream continue with the next element and remove
+                * ourselves from the wait queue for the previous element */
+               ev = list_prev(&zevent_list, ze->ze_zevent);
+               if (ev == NULL) {
+                       error = ENOENT;
+                       goto out;
+               }
+
+               list_remove(&ze->ze_zevent->ev_ze_list, ze);
        }
 
-       /*
-        * In the panic case, sysevent_evc_walk_init() will return NULL.
-        */
-       if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL &&
-           !panicstr)
-               return; /* event channel isn't initialized yet */
+       ze->ze_zevent = ev;
+       list_insert_head(&ev->ev_ze_list, ze);
+       nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
+       *dropped = ze->ze_dropped;
+       ze->ze_dropped = 0;
+out:
+       mutex_exit(&zevent_lock);
 
-       while ((sep = sysevent_evc_walk_step(chq)) != NULL) {
-               if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL)
-                       break;
+       return error;
+}
+
+int
+zfs_zevent_wait(zfs_zevent_t *ze)
+{
+       int error = 0;
+
+       mutex_enter(&zevent_lock);
 
-               ed.ed_magic = ERPT_MAGIC;
-               ed.ed_chksum = checksum32(buf, len);
-               ed.ed_size = (uint32_t)len;
-               ed.ed_pad = 0;
-               ed.ed_hrt_nsec = SE_TIME(sep);
-               ed.ed_hrt_base = now;
-               ed.ed_tod_base.sec = tod.tv_sec;
-               ed.ed_tod_base.nsec = tod.tv_nsec;
-
-               dumpvp_write(&ed, sizeof (ed));
-               dumpvp_write(buf, len);
+       if (zevent_flags & ZEVENT_SHUTDOWN) {
+               error = ESHUTDOWN;
+               goto out;
        }
 
-       sysevent_evc_walk_fini(chq);
+       zevent_waiters++;
+       cv_wait_interruptible(&zevent_cv, &zevent_lock);
+       if (issig(JUSTLOOKING))
+               error = EINTR;
+
+       zevent_waiters--;
+out:
+       mutex_exit(&zevent_lock);
+
+       return error;
 }
 
-/*
- * Post an error report (ereport) to the sysevent error channel.  The error
- * channel must be established with a prior call to sysevent_evc_create()
- * before publication may occur.
- */
 void
-fm_ereport_post(nvlist_t *ereport, int evc_flag)
+zfs_zevent_init(zfs_zevent_t **zep)
 {
-       size_t nvl_size = 0;
-       evchan_t *error_chan;
+       zfs_zevent_t *ze;
 
-       (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE);
-       if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
-               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
-               return;
-       }
+       ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
+       list_link_init(&ze->ze_node);
+}
 
-       if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan,
-           EVCH_CREAT|EVCH_HOLD_PEND) != 0) {
-               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
-               return;
-       }
+void
+zfs_zevent_destroy(zfs_zevent_t *ze)
+{
+       mutex_enter(&zevent_lock);
+       if (ze->ze_zevent)
+               list_remove(&ze->ze_zevent->ev_ze_list, ze);
+       mutex_exit(&zevent_lock);
 
-       if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
-           SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
-               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
-               (void) sysevent_evc_unbind(error_chan);
-               return;
-       }
-       (void) sysevent_evc_unbind(error_chan);
+       kmem_free(ze, sizeof (zfs_zevent_t));
 }
+#endif /* _KERNEL */
 
 /*
  * Wrapppers for FM nvlist allocators
@@ -938,6 +1054,105 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
        }
 }
 
+void
+fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
+    nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
+{
+       nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
+       nvlist_t *pairs[HC_MAXPAIRS];
+       nvlist_t **hcl;
+       uint_t n;
+       int i, j;
+       va_list ap;
+       char *hcname, *hcid;
+
+       if (!fm_fmri_hc_set_common(fmri, version, auth))
+               return;
+
+       /*
+        * copy the bboard nvpairs to the pairs array
+        */
+       if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
+           != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       for (i = 0; i < n; i++) {
+               if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
+                   &hcname) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+               if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+
+               pairs[i] = fm_nvlist_create(nva);
+               if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
+                   nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
+                       for (j = 0; j <= i; j++) {
+                               if (pairs[j] != NULL)
+                                       fm_nvlist_destroy(pairs[j],
+                                           FM_NVA_RETAIN);
+                       }
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+       }
+
+       /*
+        * create the pairs from passed in pairs
+        */
+       npairs = MIN(npairs, HC_MAXPAIRS);
+
+       va_start(ap, npairs);
+       for (i = n; i < npairs + n; i++) {
+               const char *name = va_arg(ap, const char *);
+               uint32_t id = va_arg(ap, uint32_t);
+               char idstr[11];
+               (void) snprintf(idstr, sizeof (idstr), "%u", id);
+               pairs[i] = fm_nvlist_create(nva);
+               if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
+                   nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
+                       for (j = 0; j <= i; j++) {
+                               if (pairs[j] != NULL)
+                                       fm_nvlist_destroy(pairs[j],
+                                           FM_NVA_RETAIN);
+                       }
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+       }
+       va_end(ap);
+
+       /*
+        * Create the fmri hc list
+        */
+       if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
+           npairs + n) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       for (i = 0; i < npairs + n; i++) {
+                       fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
+       }
+
+       if (snvl != NULL) {
+               if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+       }
+}
+
 /*
  * Set-up and validate the members of an dev fmri according to:
  *
@@ -1167,7 +1382,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
                        ena = (uint64_t)((format & ENA_FORMAT_MASK) |
                            ((cpuid << ENA_FMT1_CPUID_SHFT) &
                            ENA_FMT1_CPUID_MASK) |
-                           ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) &
+                           ((gethrtime() << ENA_FMT1_TIME_SHFT) &
                            ENA_FMT1_TIME_MASK));
                }
                break;
@@ -1185,7 +1400,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
 uint64_t
 fm_ena_generate(uint64_t timestamp, uchar_t format)
 {
-       return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format));
+       return (fm_ena_generate_cpu(timestamp, getcpuid(), format));
 }
 
 uint64_t
@@ -1253,134 +1468,67 @@ fm_ena_time_get(uint64_t ena)
        return (time);
 }
 
-/*
- * Convert a getpcstack() trace to symbolic name+offset, and add the resulting
- * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK.
- */
+#ifdef _KERNEL
 void
-fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth)
+fm_init(void)
 {
-       int i;
-       char *sym;
-       ulong_t off;
-       char *stkpp[FM_STK_DEPTH];
-       char buf[FM_STK_DEPTH * FM_SYM_SZ];
-       char *stkp = buf;
-
-       for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) {
-               if ((sym = kobj_getsymname(stack[i], &off)) != NULL)
-                       (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off);
-               else
-                       (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]);
-               stkpp[i] = stkp;
-       }
+       zevent_len_cur = 0;
+       zevent_flags = 0;
 
-       fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK,
-           DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL);
-}
+       if (zevent_len_max == 0)
+               zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
 
-void
-print_msg_hwerr(ctid_t ct_id, proc_t *p)
-{
-       uprintf("Killed process %d (%s) in contract id %d "
-           "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id);
+       /* Initialize zevent allocation and generation kstats */
+       fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
+           sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
+           KSTAT_FLAG_VIRTUAL);
+
+       if (fm_ksp != NULL) {
+               fm_ksp->ks_data = &erpt_kstat_data;
+               kstat_install(fm_ksp);
+       } else {
+               cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
+       }
+
+       mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
+       list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node));
+       cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
 }
 
 void
-fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
-    nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
+fm_fini(void)
 {
-       nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
-       nvlist_t *pairs[HC_MAXPAIRS];
-       nvlist_t **hcl;
-       uint_t n;
-       int i, j;
-       va_list ap;
-       char *hcname, *hcid;
+       int count;
 
-       if (!fm_fmri_hc_set_common(fmri, version, auth))
-               return;
+       zfs_zevent_drain_all(&count);
+       cv_broadcast(&zevent_cv);
 
-       /*
-        * copy the bboard nvpairs to the pairs array
-        */
-       if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
-           != 0) {
-               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-               return;
+       mutex_enter(&zevent_lock);
+       zevent_flags |= ZEVENT_SHUTDOWN;
+       while (zevent_waiters > 0) {
+               mutex_exit(&zevent_lock);
+               schedule();
+               mutex_enter(&zevent_lock);
        }
+       mutex_exit(&zevent_lock);
 
-       for (i = 0; i < n; i++) {
-               if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
-                   &hcname) != 0) {
-                       atomic_add_64(
-                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-                       return;
-               }
-               if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
-                       atomic_add_64(
-                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-                       return;
-               }
+       cv_destroy(&zevent_cv);
+       list_destroy(&zevent_list);
+       mutex_destroy(&zevent_lock);
 
-               pairs[i] = fm_nvlist_create(nva);
-               if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
-                   nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
-                       for (j = 0; j <= i; j++) {
-                               if (pairs[j] != NULL)
-                                       fm_nvlist_destroy(pairs[j],
-                                           FM_NVA_RETAIN);
-                       }
-                       atomic_add_64(
-                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-                       return;
-               }
+       if (fm_ksp != NULL) {
+               kstat_delete(fm_ksp);
+               fm_ksp = NULL;
        }
+}
 
-       /*
-        * create the pairs from passed in pairs
-        */
-       npairs = MIN(npairs, HC_MAXPAIRS);
-
-       va_start(ap, npairs);
-       for (i = n; i < npairs + n; i++) {
-               const char *name = va_arg(ap, const char *);
-               uint32_t id = va_arg(ap, uint32_t);
-               char idstr[11];
-               (void) snprintf(idstr, sizeof (idstr), "%u", id);
-               pairs[i] = fm_nvlist_create(nva);
-               if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
-                   nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
-                       for (j = 0; j <= i; j++) {
-                               if (pairs[j] != NULL)
-                                       fm_nvlist_destroy(pairs[j],
-                                           FM_NVA_RETAIN);
-                       }
-                       atomic_add_64(
-                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-                       return;
-               }
-       }
-       va_end(ap);
+module_param(zevent_len_max, int, 0644);
+MODULE_PARM_DESC(zevent_len_max, "Maximum event queue length");
 
-       /*
-        * Create the fmri hc list
-        */
-       if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
-           npairs + n) != 0) {
-               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-               return;
-       }
+module_param(zevent_cols, int, 0644);
+MODULE_PARM_DESC(zevent_cols, "Maximum event column width");
 
-       for (i = 0; i < npairs + n; i++) {
-                       fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
-       }
+module_param(zevent_console, int, 0644);
+MODULE_PARM_DESC(zevent_console, "Log events to the console");
 
-       if (snvl != NULL) {
-               if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
-                       atomic_add_64(
-                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-                       return;
-               }
-       }
-}
+#endif /* _KERNEL */
index c752edc99bbd986f8350a5b36778b24c055bdc73..d5c71d1744185df3061f3a6dbf252001dfa4bbc5 100644 (file)
@@ -35,7 +35,9 @@ extern "C" {
 #define        FM_EREPORT_ZFS_CHECKSUM                 "checksum"
 #define        FM_EREPORT_ZFS_IO                       "io"
 #define        FM_EREPORT_ZFS_DATA                     "data"
+#define        FM_EREPORT_ZFS_CONFIG_SYNC              "config.sync"
 #define        FM_EREPORT_ZFS_POOL                     "zpool"
+#define        FM_EREPORT_ZFS_POOL_DESTROY             "zpool.destroy"
 #define        FM_EREPORT_ZFS_DEVICE_UNKNOWN           "vdev.unknown"
 #define        FM_EREPORT_ZFS_DEVICE_OPEN_FAILED       "vdev.open_failed"
 #define        FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA      "vdev.corrupt_data"
@@ -43,9 +45,19 @@ extern "C" {
 #define        FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM      "vdev.bad_guid_sum"
 #define        FM_EREPORT_ZFS_DEVICE_TOO_SMALL         "vdev.too_small"
 #define        FM_EREPORT_ZFS_DEVICE_BAD_LABEL         "vdev.bad_label"
+#define        FM_EREPORT_ZFS_DEVICE_REMOVE            "vdev.remove"
+#define        FM_EREPORT_ZFS_DEVICE_CLEAR             "vdev.clear"
+#define        FM_EREPORT_ZFS_DEVICE_CHECK             "vdev.check"
+#define        FM_EREPORT_ZFS_DEVICE_SPARE             "vdev.spare"
+#define        FM_EREPORT_ZFS_DEVICE_AUTOEXPAND        "vdev.autoexpand"
 #define        FM_EREPORT_ZFS_IO_FAILURE               "io_failure"
 #define        FM_EREPORT_ZFS_PROBE_FAILURE            "probe_failure"
 #define        FM_EREPORT_ZFS_LOG_REPLAY               "log_replay"
+#define        FM_EREPORT_ZFS_RESILVER_START           "resilver.start"
+#define        FM_EREPORT_ZFS_RESILVER_FINISH          "resilver.finish"
+#define        FM_EREPORT_ZFS_SCRUB_START              "scrub.start"
+#define        FM_EREPORT_ZFS_SCRUB_FINISH             "scrub.finish"
+#define        FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH       "bootfs.vdev.attach"
 
 #define        FM_EREPORT_PAYLOAD_ZFS_POOL             "pool"
 #define        FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE    "pool_failmode"
@@ -56,6 +68,7 @@ extern "C" {
 #define        FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH        "vdev_path"
 #define        FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID       "vdev_devid"
 #define        FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU         "vdev_fru"
+#define        FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE       "vdev_state"
 #define        FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID      "parent_guid"
 #define        FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE      "parent_type"
 #define        FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH      "parent_path"
@@ -85,9 +98,9 @@ extern "C" {
 #define        FM_EREPORT_FAILMODE_CONTINUE            "continue"
 #define        FM_EREPORT_FAILMODE_PANIC               "panic"
 
-#define        FM_RESOURCE_REMOVED                     "removed"
-#define        FM_RESOURCE_AUTOREPLACE                 "autoreplace"
-#define        FM_RESOURCE_STATECHANGE                 "statechange"
+#define        FM_EREPORT_RESOURCE_REMOVED             "removed"
+#define        FM_EREPORT_RESOURCE_AUTOREPLACE         "autoreplace"
+#define        FM_EREPORT_RESOURCE_STATECHANGE         "statechange"
 
 #ifdef __cplusplus
 }
index 5eca760dadc50313580071f64e004fb4decb1108..1ee221286cefaf363dfdcea9cd84533fdd71d366 100644 (file)
@@ -69,6 +69,7 @@ extern "C" {
 /* ereport payload member names */
 #define        FM_EREPORT_DETECTOR             "detector"
 #define        FM_EREPORT_ENA                  "ena"
+#define        FM_EREPORT_TIME                 "time"
 
 /* list.* event payload member names */
 #define        FM_LIST_EVENT_SIZE              "list-sz"
@@ -327,16 +328,13 @@ extern "C" {
 #define        FM_FMRI_SW_CTXT_ZONE                    "zone"
 #define        FM_FMRI_SW_CTXT_CTID                    "ctid"
 #define        FM_FMRI_SW_CTXT_STACK                   "stack"
+#define        FM_NVA_FREE             0       /* free allocator on nvlist_destroy */
+#define        FM_NVA_RETAIN           1       /* keep allocator on nvlist_destroy */
 
 extern nv_alloc_t *fm_nva_xcreate(char *, size_t);
 extern void fm_nva_xdestroy(nv_alloc_t *);
-
 extern nvlist_t *fm_nvlist_create(nv_alloc_t *);
 extern void fm_nvlist_destroy(nvlist_t *, int);
-
-#define        FM_NVA_FREE     0               /* free allocator on nvlist_destroy */
-#define        FM_NVA_RETAIN   1               /* keep allocator on nvlist_destroy */
-
 extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t,
     const nvlist_t *, ...);
 extern void fm_payload_set(nvlist_t *, ...);
@@ -350,8 +348,6 @@ extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
     uint8_t *, const char *);
 extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
     const char *, uint64_t);
-extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
-    const char *, const char *);
 extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
 extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *,
     nvlist_t *, int, ...);
index 37334101b3cfc69808741553d205d3a3d6049a8d..94947d67c6dd574ef87d6be204334bb993cb08a2 100644 (file)
@@ -31,7 +31,6 @@ extern "C" {
 #endif
 
 #include <sys/nvpair.h>
-#include <sys/errorq.h>
 
 /*
  * Shared user/kernel definitions for class length, error channel name,
@@ -71,29 +70,42 @@ typedef struct erpt_dump {
 } erpt_dump_t;
 
 #ifdef _KERNEL
-#include <sys/systm.h>
 
-#define        FM_STK_DEPTH    20      /* maximum stack depth */
-#define        FM_SYM_SZ       64      /* maximum symbol size */
-#define        FM_ERR_PIL      2       /* PIL for ereport_errorq drain processing */
+#define ZEVENT_SHUTDOWN        0x1
 
-#define        FM_EREPORT_PAYLOAD_NAME_STACK           "stack"
+typedef void zevent_cb_t(nvlist_t *, nvlist_t *);
 
-extern errorq_t *ereport_errorq;
-extern void *ereport_dumpbuf;
-extern size_t ereport_dumplen;
+typedef struct zevent_s {
+       nvlist_t        *ev_nvl;       /* protected by the zevent_lock */
+       nvlist_t        *ev_detector;  /* " */
+       list_t          ev_ze_list;    /* " */
+       list_node_t     ev_node;       /* " */
+       zevent_cb_t     *ev_cb;        /* " */
+} zevent_t;
+
+typedef struct zfs_zevent {
+       zevent_t        *ze_zevent;    /* protected by the zevent_lock */
+       list_node_t     ze_node;       /* " */
+       uint64_t        ze_dropped;    /* " */
+} zfs_zevent_t;
 
 extern void fm_init(void);
+extern void fm_fini(void);
 extern void fm_nvprint(nvlist_t *);
-extern void fm_panic(const char *, ...);
-extern void fm_banner(void);
+extern void zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
+extern void zfs_zevent_drain_all(int *);
+extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
+extern void zfs_zevent_fd_rele(int);
+extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *);
+extern int zfs_zevent_wait(zfs_zevent_t *);
+extern void zfs_zevent_init(zfs_zevent_t **);
+extern void zfs_zevent_destroy(zfs_zevent_t *);
 
-extern void fm_ereport_dump(void);
-extern void fm_ereport_post(nvlist_t *, int);
+#else
 
-extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int);
+static inline void fm_init(void) { }
+static inline void fm_fini(void) { }
 
-extern int is_fm_panic();
 #endif  /* _KERNEL */
 
 #ifdef __cplusplus
index 558e9e1884e37297c3cb7a95c9b6999941fc5503..af9275b1904cd3b9c6c07d14929eab9df4dee321 100644 (file)
@@ -58,14 +58,9 @@ extern "C" {
 #include <sys/zone.h>
 #include <sys/uio.h>
 #include <sys/zfs_debug.h>
-#include <sys/sysevent.h>
-#include <sys/sysevent/eventdefs.h>
-#include <sys/sysevent/dev.h>
-#include <sys/fm/util.h>
+#include <sys/fm/fs/zfs.h>
 #include <sys/sunddi.h>
 
-#define        CPU_SEQID       (CPU->cpu_seqid)
-
 #ifdef __cplusplus
 }
 #endif
index 84bf794fe5f027fb2874d12ebf64029d10acb400..ad41561ad6f3ae88e63a23383dfb5b1d8d52d840 100644 (file)
@@ -236,6 +236,9 @@ typedef struct zinject_record {
 #define        ZINJECT_FLUSH_ARC       0x2
 #define        ZINJECT_UNLOAD_SPA      0x4
 
+#define        ZEVENT_NONBLOCK         0x1
+#define        ZEVENT_SIZE             1024
+
 typedef struct zfs_share {
        uint64_t        z_exportdata;
        uint64_t        z_sharedata;
index 606138a3eeef475b6ffc18257a2aacdd2524adda..e037f4133ff5cb5cf6b56e781507b148f46c8126 100644 (file)
@@ -1293,8 +1293,9 @@ spa_check_removed(vdev_t *vd)
                spa_check_removed(vd->vdev_child[c]);
 
        if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
-               zfs_post_autoreplace(vd->vdev_spa, vd);
-               spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
+               zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE,
+                   vd->vdev_spa, vd, NULL, 0, 0);
+               spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK);
        }
 }
 
@@ -3639,7 +3640,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
                }
        }
 
-       spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
+       spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY);
 
        if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
                spa_unload(spa);
@@ -3970,7 +3971,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 
        if (newvd->vdev_isspare) {
                spa_spare_activate(newvd);
-               spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
+               spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE);
        }
 
        oldvdpath = spa_strdup(oldvd->vdev_path);
@@ -4002,7 +4003,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
        spa_strfree(newvdpath);
 
        if (spa->spa_bootfs)
-               spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
+               spa_event_notify(spa, newvd, FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH);
 
        return (0);
 }
@@ -4203,7 +4204,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
        vd->vdev_detached = B_TRUE;
        vdev_dirty(tvd, VDD_DTL, vd, txg);
 
-       spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
+       spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE);
 
        /* hang on to the spa before we release the lock */
        spa_open_ref(spa, FTAG);
@@ -5034,9 +5035,6 @@ spa_async_probe(spa_t *spa, vdev_t *vd)
 static void
 spa_async_autoexpand(spa_t *spa, vdev_t *vd)
 {
-       sysevent_id_t eid;
-       nvlist_t *attr;
-       char *physpath;
        int c;
 
        if (!spa->spa_autoexpand)
@@ -5050,17 +5048,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd)
        if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
                return;
 
-       physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-       (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
-
-       VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-       VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
-
-       (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
-           ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
-
-       nvlist_free(attr);
-       kmem_free(physpath, MAXPATHLEN);
+       spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND);
 }
 
 static void
@@ -5858,8 +5846,7 @@ spa_has_active_shared_spare(spa_t *spa)
 }
 
 /*
- * Post a sysevent corresponding to the given event.  The 'name' must be one of
- * the event definitions in sys/sysevent/eventdefs.h.  The payload will be
+ * Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h.  The payload will be
  * filled in from the spa and (optionally) the vdev.  This doesn't do anything
  * in the userland libzpool, as we don't want consumers to misinterpret ztest
  * or zdb as real changes.
@@ -5868,49 +5855,6 @@ void
 spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
 {
 #ifdef _KERNEL
-       sysevent_t              *ev;
-       sysevent_attr_list_t    *attr = NULL;
-       sysevent_value_t        value;
-       sysevent_id_t           eid;
-
-       ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
-           SE_SLEEP);
-
-       value.value_type = SE_DATA_TYPE_STRING;
-       value.value.sv_string = spa_name(spa);
-       if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
-               goto done;
-
-       value.value_type = SE_DATA_TYPE_UINT64;
-       value.value.sv_uint64 = spa_guid(spa);
-       if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
-               goto done;
-
-       if (vd) {
-               value.value_type = SE_DATA_TYPE_UINT64;
-               value.value.sv_uint64 = vd->vdev_guid;
-               if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
-                   SE_SLEEP) != 0)
-                       goto done;
-
-               if (vd->vdev_path) {
-                       value.value_type = SE_DATA_TYPE_STRING;
-                       value.value.sv_string = vd->vdev_path;
-                       if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
-                           &value, SE_SLEEP) != 0)
-                               goto done;
-               }
-       }
-
-       if (sysevent_attach_attributes(ev, attr) != 0)
-               goto done;
-       attr = NULL;
-
-       (void) log_sysevent(ev, SE_SLEEP, &eid);
-
-done:
-       if (attr)
-               sysevent_free_attr(attr);
-       sysevent_free(ev);
+       zfs_ereport_post(name, spa, vd, NULL, 0, 0);
 #endif
 }
index 69d57f66dbb6c89154c35c94833a20ccede273c5..1cf3950d450d78fd8fbb222fe1e9cc49f1193a93 100644 (file)
@@ -258,7 +258,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
        spa_config_generation++;
 
        if (postsysevent)
-               spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
+               spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
 }
 
 /*
index 32ef51db15688e197de22beb4c37f81c5bc47dc0..4027d0f4f7de09ed9ee4930b60a582f52f34c345 100644 (file)
@@ -40,6 +40,7 @@
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
+#include <sys/fm/util.h>
 #include <sys/dsl_scan.h>
 #include <sys/fs/zfs.h>
 #include <sys/metaslab_impl.h>
@@ -1540,6 +1541,7 @@ spa_init(int mode)
 
        spa_mode_global = mode;
 
+       fm_init();
        refcount_init();
        unique_init();
        zio_init();
@@ -1565,6 +1567,7 @@ spa_fini(void)
        zio_fini();
        unique_fini();
        refcount_fini();
+       fm_fini();
 
        avl_destroy(&spa_namespace_avl);
        avl_destroy(&spa_spare_avl);
index 17b45b0e828dd88627ff6e64da26d30189102dd0..4613e951a57f543028695bc3b1b6afaf7f4b3bb9 100644 (file)
@@ -2388,7 +2388,7 @@ vdev_clear(spa_t *spa, vdev_t *vd)
                if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
                        spa_async_request(spa, SPA_ASYNC_RESILVER);
 
-               spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR);
+               spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_CLEAR);
        }
 
        /*
index 0b4812666442de49edd3bc310ce124285b877387..c93057e8e8d1654f2f75b9e1110c07f75f9f5095 100644 (file)
  * ereport with information about the differences.
  */
 #ifdef _KERNEL
+static void
+zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
+{
+       if (nvl)
+               fm_nvlist_destroy(nvl, FM_NVA_FREE);
+
+       if (detector)
+               fm_nvlist_destroy(detector, FM_NVA_FREE);
+}
+
 static void
 zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
     const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
@@ -410,7 +420,7 @@ update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
  * to the new smallest gap, to prepare for our next invocation.
  */
 static void
-shrink_ranges(zfs_ecksum_info_t *eip)
+zei_shrink_ranges(zfs_ecksum_info_t *eip)
 {
        uint32_t mingap = UINT32_MAX;
        uint32_t new_allowed_gap = eip->zei_mingap + 1;
@@ -429,12 +439,13 @@ shrink_ranges(zfs_ecksum_info_t *eip)
                uint32_t end = r[idx].zr_end;
 
                while (idx < max - 1) {
-                       idx++;
+                       uint32_t nstart, nend, gap;
 
-                       uint32_t nstart = r[idx].zr_start;
-                       uint32_t nend = r[idx].zr_end;
+                       idx++;
+                       nstart = r[idx].zr_start;
+                       nend = r[idx].zr_end;
 
-                       uint32_t gap = nstart - end;
+                       gap = nstart - end;
                        if (gap < new_allowed_gap) {
                                end = nend;
                                continue;
@@ -454,13 +465,13 @@ shrink_ranges(zfs_ecksum_info_t *eip)
 }
 
 static void
-add_range(zfs_ecksum_info_t *eip, int start, int end)
+zei_add_range(zfs_ecksum_info_t *eip, int start, int end)
 {
        struct zei_ranges *r = eip->zei_ranges;
        size_t count = eip->zei_range_count;
 
        if (count >= MAX_RANGES) {
-               shrink_ranges(eip);
+               zei_shrink_ranges(eip);
                count = eip->zei_range_count;
        }
        if (count == 0) {
@@ -482,7 +493,7 @@ add_range(zfs_ecksum_info_t *eip, int start, int end)
 }
 
 static size_t
-range_total_size(zfs_ecksum_info_t *eip)
+zei_range_total_size(zfs_ecksum_info_t *eip)
 {
        struct zei_ranges *r = eip->zei_ranges;
        size_t count = eip->zei_range_count;
@@ -559,7 +570,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
                        if (start == -1)
                                continue;
 
-                       add_range(eip, start, idx);
+                       zei_add_range(eip, start, idx);
                        start = -1;
                } else {
                        if (start != -1)
@@ -569,10 +580,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
                }
        }
        if (start != -1)
-               add_range(eip, start, idx);
+               zei_add_range(eip, start, idx);
 
        /* See if it will fit in our inline buffers */
-       inline_size = range_total_size(eip);
+       inline_size = zei_range_total_size(eip);
        if (inline_size > ZFM_MAX_INLINE)
                no_inline = 1;
 
@@ -675,10 +686,8 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
        if (ereport == NULL)
                return;
 
-       fm_ereport_post(ereport, EVCH_SLEEP);
-
-       fm_nvlist_destroy(ereport, FM_NVA_FREE);
-       fm_nvlist_destroy(detector, FM_NVA_FREE);
+       /* Cleanup is handled by the callback function */
+       zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
 #endif
 }
 
@@ -730,12 +739,10 @@ zfs_ereport_finish_checksum(zio_cksum_report_t *report,
            good_data, bad_data, report->zcr_length, drop_if_identical);
 
        if (info != NULL)
-               fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
+               zfs_zevent_post(report->zcr_ereport,
+                   report->zcr_detector, zfs_zevent_post_cb);
 
-       fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE);
-       fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE);
        report->zcr_ereport = report->zcr_detector = NULL;
-
        if (info != NULL)
                kmem_free(info, sizeof (*info));
 #endif
@@ -764,7 +771,7 @@ void
 zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
 {
 #ifdef _KERNEL
-       fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
+       zfs_zevent_post(report->zcr_ereport, report->zcr_detector, NULL);
 #endif
 }
 
@@ -787,14 +794,10 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
        info = annotate_ecksum(ereport, zbc, good_data, bad_data, length,
            B_FALSE);
 
-       if (info != NULL)
-               fm_ereport_post(ereport, EVCH_SLEEP);
-
-       fm_nvlist_destroy(ereport, FM_NVA_FREE);
-       fm_nvlist_destroy(detector, FM_NVA_FREE);
-
-       if (info != NULL)
+       if (info != NULL) {
+               zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
                kmem_free(info, sizeof (*info));
+       }
 #endif
 }
 
@@ -817,13 +820,14 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
        VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0);
        VERIFY(nvlist_add_uint64(resource,
            FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0);
-       if (vd)
+       if (vd) {
                VERIFY(nvlist_add_uint64(resource,
                    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0);
+               VERIFY(nvlist_add_uint64(resource,
+                   FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0);
+       }
 
-       fm_ereport_post(resource, EVCH_SLEEP);
-
-       fm_nvlist_destroy(resource, FM_NVA_FREE);
+       zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
 #endif
 }
 
@@ -836,7 +840,7 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
 void
 zfs_post_remove(spa_t *spa, vdev_t *vd)
 {
-       zfs_post_common(spa, vd, FM_RESOURCE_REMOVED);
+       zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_REMOVED);
 }
 
 /*
@@ -847,7 +851,7 @@ zfs_post_remove(spa_t *spa, vdev_t *vd)
 void
 zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
 {
-       zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE);
+       zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_AUTOREPLACE);
 }
 
 /*
@@ -859,5 +863,13 @@ zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
 void
 zfs_post_state_change(spa_t *spa, vdev_t *vd)
 {
-       zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE);
+       zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_STATECHANGE);
 }
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_ereport_post);
+EXPORT_SYMBOL(zfs_ereport_post_checksum);
+EXPORT_SYMBOL(zfs_post_remove);
+EXPORT_SYMBOL(zfs_post_autoreplace);
+EXPORT_SYMBOL(zfs_post_state_change);
+#endif /* _KERNEL */
index 3e149ab33472e7ea91c3dbdce6b9b48be75f818c..bcafcfbf6610dccfced8a7f36ce58d4963b1a619 100644 (file)
@@ -1798,7 +1798,7 @@ zfs_ioc_objset_stats(zfs_cmd_t *zc)
  * local property values.
  */
 static int
-zfs_ioc_objset_recvd_props(struct file *filp, zfs_cmd_t *zc)
+zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
 {
        objset_t *os = NULL;
        int error;
@@ -4626,6 +4626,67 @@ zfs_ioc_get_holds(zfs_cmd_t *zc)
        return (error);
 }
 
+/*
+ * inputs:
+ * zc_guid             flags (ZEVENT_NONBLOCK)
+ *
+ * outputs:
+ * zc_nvlist_dst       next nvlist event
+ * zc_cookie           dropped events since last get
+ * zc_cleanup_fd       cleanup-on-exit file descriptor
+ */
+static int
+zfs_ioc_events_next(zfs_cmd_t *zc)
+{
+       zfs_zevent_t *ze;
+       nvlist_t *event = NULL;
+       minor_t minor;
+       uint64_t dropped = 0;
+       int error;
+
+       error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+       if (error != 0)
+               return (error);
+
+       do {
+               error = zfs_zevent_next(ze, &event, &dropped);
+               if (event != NULL) {
+                       zc->zc_cookie = dropped;
+                       error = put_nvlist(zc, event);
+                       nvlist_free(event);
+               }
+
+               if (zc->zc_guid & ZEVENT_NONBLOCK)
+                       break;
+
+               if ((error == 0) || (error != ENOENT))
+                       break;
+
+               error = zfs_zevent_wait(ze);
+               if (error)
+                       break;
+       } while (1);
+
+       zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+
+       return (error);
+}
+
+/*
+ * outputs:
+ * zc_cookie           cleared events count
+ */
+static int
+zfs_ioc_events_clear(zfs_cmd_t *zc)
+{
+       int count;
+
+       zfs_zevent_drain_all(&count);
+       zc->zc_cookie = count;
+
+       return 0;
+}
+
 /*
  * pool create, destroy, and export don't log the history as part of
  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
@@ -4747,7 +4808,11 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
        { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
            B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
        { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
-           POOL_CHECK_SUSPENDED }
+           POOL_CHECK_SUSPENDED },
+       { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE,
+           POOL_CHECK_NONE },
+       { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE,
+           POOL_CHECK_NONE },
 };
 
 int