4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Fault Management Architecture (FMA) Resource and Protocol Support
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
31 * Name-Value Pair Lists
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist construtor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
40 * Protocol Event and FMRI Construction
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
54 #include <sys/types.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/compress.h>
61 #include <sys/sunddi.h>
62 #include <sys/systeminfo.h>
63 #include <sys/fm/util.h>
64 #include <sys/fm/protocol.h>
65 #include <sys/kstat.h>
66 #include <sys/zfs_context.h>
68 #include <sys/atomic.h>
69 #include <sys/condvar.h>
70 #include <sys/cpuvar.h>
71 #include <sys/systm.h>
72 #include <sys/dumphdr.h>
73 #include <sys/cpuvar.h>
74 #include <sys/console.h>
77 #include <sys/zfs_ioctl.h>
79 int zfs_zevent_len_max
= 0;
80 int zfs_zevent_cols
= 80;
81 int zfs_zevent_console
= 0;
83 static int zevent_len_cur
= 0;
84 static int zevent_waiters
= 0;
85 static int zevent_flags
= 0;
87 static kmutex_t zevent_lock
;
88 static list_t zevent_list
;
89 static kcondvar_t zevent_cv
;
92 extern void fastreboot_disable_highpil(void);
95 * Common fault management kstats to record event generation failures
99 kstat_named_t erpt_dropped
; /* num erpts dropped on post */
100 kstat_named_t erpt_set_failed
; /* num erpt set failures */
101 kstat_named_t fmri_set_failed
; /* num fmri set failures */
102 kstat_named_t payload_set_failed
; /* num payload set failures */
105 static struct erpt_kstat erpt_kstat_data
= {
106 { "erpt-dropped", KSTAT_DATA_UINT64
},
107 { "erpt-set-failed", KSTAT_DATA_UINT64
},
108 { "fmri-set-failed", KSTAT_DATA_UINT64
},
109 { "payload-set-failed", KSTAT_DATA_UINT64
}
117 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
118 * output so they aren't split across console lines, and return the end column.
122 fm_printf(int depth
, int c
, int cols
, const char *format
, ...)
128 va_start(ap
, format
);
129 width
= vsnprintf(&c1
, sizeof (c1
), format
, ap
);
132 if (c
+ width
>= cols
) {
133 console_printf("\n");
135 if (format
[0] != ' ' && depth
> 0) {
141 va_start(ap
, format
);
142 console_vprintf(format
, ap
);
145 return ((c
+ width
) % cols
);
149 * Recursively print a nvlist in the specified column width and return the
150 * column we end up in. This function is called recursively by fm_nvprint(),
151 * below. We generically format the entire nvpair using hexadecimal
152 * integers and strings, and elide any integer arrays. Arrays are basically
153 * used for cache dumps right now, so we suppress them so as not to overwhelm
154 * the amount of console output we produce at panic time. This can be further
155 * enhanced as FMA technology grows based upon the needs of consumers. All
156 * FMA telemetry is logged using the dump device transport, so the console
157 * output serves only as a fallback in case this procedure is unsuccessful.
160 fm_nvprintr(nvlist_t
*nvl
, int d
, int c
, int cols
)
164 for (nvp
= nvlist_next_nvpair(nvl
, NULL
);
165 nvp
!= NULL
; nvp
= nvlist_next_nvpair(nvl
, nvp
)) {
167 data_type_t type
= nvpair_type(nvp
);
168 const char *name
= nvpair_name(nvp
);
178 if (strcmp(name
, FM_CLASS
) == 0)
179 continue; /* already printed by caller */
181 c
= fm_printf(d
, c
, cols
, " %s=", name
);
184 case DATA_TYPE_BOOLEAN
:
185 c
= fm_printf(d
+ 1, c
, cols
, " 1");
188 case DATA_TYPE_BOOLEAN_VALUE
:
189 (void) nvpair_value_boolean_value(nvp
, &b
);
190 c
= fm_printf(d
+ 1, c
, cols
, b
? "1" : "0");
194 (void) nvpair_value_byte(nvp
, &i8
);
195 c
= fm_printf(d
+ 1, c
, cols
, "0x%x", i8
);
199 (void) nvpair_value_int8(nvp
, (void *)&i8
);
200 c
= fm_printf(d
+ 1, c
, cols
, "0x%x", i8
);
203 case DATA_TYPE_UINT8
:
204 (void) nvpair_value_uint8(nvp
, &i8
);
205 c
= fm_printf(d
+ 1, c
, cols
, "0x%x", i8
);
208 case DATA_TYPE_INT16
:
209 (void) nvpair_value_int16(nvp
, (void *)&i16
);
210 c
= fm_printf(d
+ 1, c
, cols
, "0x%x", i16
);
213 case DATA_TYPE_UINT16
:
214 (void) nvpair_value_uint16(nvp
, &i16
);
215 c
= fm_printf(d
+ 1, c
, cols
, "0x%x", i16
);
218 case DATA_TYPE_INT32
:
219 (void) nvpair_value_int32(nvp
, (void *)&i32
);
220 c
= fm_printf(d
+ 1, c
, cols
, "0x%x", i32
);
223 case DATA_TYPE_UINT32
:
224 (void) nvpair_value_uint32(nvp
, &i32
);
225 c
= fm_printf(d
+ 1, c
, cols
, "0x%x", i32
);
228 case DATA_TYPE_INT64
:
229 (void) nvpair_value_int64(nvp
, (void *)&i64
);
230 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx",
234 case DATA_TYPE_UINT64
:
235 (void) nvpair_value_uint64(nvp
, &i64
);
236 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx",
240 case DATA_TYPE_HRTIME
:
241 (void) nvpair_value_hrtime(nvp
, (void *)&i64
);
242 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx",
246 case DATA_TYPE_STRING
:
247 (void) nvpair_value_string(nvp
, &str
);
248 c
= fm_printf(d
+ 1, c
, cols
, "\"%s\"",
249 str
? str
: "<NULL>");
252 case DATA_TYPE_NVLIST
:
253 c
= fm_printf(d
+ 1, c
, cols
, "[");
254 (void) nvpair_value_nvlist(nvp
, &cnv
);
255 c
= fm_nvprintr(cnv
, d
+ 1, c
, cols
);
256 c
= fm_printf(d
+ 1, c
, cols
, " ]");
259 case DATA_TYPE_NVLIST_ARRAY
: {
263 c
= fm_printf(d
+ 1, c
, cols
, "[");
264 (void) nvpair_value_nvlist_array(nvp
, &val
, &nelem
);
265 for (i
= 0; i
< nelem
; i
++) {
266 c
= fm_nvprintr(val
[i
], d
+ 1, c
, cols
);
268 c
= fm_printf(d
+ 1, c
, cols
, " ]");
272 case DATA_TYPE_INT8_ARRAY
: {
276 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
277 (void) nvpair_value_int8_array(nvp
, &val
, &nelem
);
278 for (i
= 0; i
< nelem
; i
++)
279 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
280 (u_longlong_t
)val
[i
]);
282 c
= fm_printf(d
+ 1, c
, cols
, "]");
286 case DATA_TYPE_UINT8_ARRAY
: {
290 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
291 (void) nvpair_value_uint8_array(nvp
, &val
, &nelem
);
292 for (i
= 0; i
< nelem
; i
++)
293 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
294 (u_longlong_t
)val
[i
]);
296 c
= fm_printf(d
+ 1, c
, cols
, "]");
300 case DATA_TYPE_INT16_ARRAY
: {
304 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
305 (void) nvpair_value_int16_array(nvp
, &val
, &nelem
);
306 for (i
= 0; i
< nelem
; i
++)
307 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
308 (u_longlong_t
)val
[i
]);
310 c
= fm_printf(d
+ 1, c
, cols
, "]");
314 case DATA_TYPE_UINT16_ARRAY
: {
318 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
319 (void) nvpair_value_uint16_array(nvp
, &val
, &nelem
);
320 for (i
= 0; i
< nelem
; i
++)
321 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
322 (u_longlong_t
)val
[i
]);
324 c
= fm_printf(d
+ 1, c
, cols
, "]");
328 case DATA_TYPE_INT32_ARRAY
: {
332 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
333 (void) nvpair_value_int32_array(nvp
, &val
, &nelem
);
334 for (i
= 0; i
< nelem
; i
++)
335 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
336 (u_longlong_t
)val
[i
]);
338 c
= fm_printf(d
+ 1, c
, cols
, "]");
342 case DATA_TYPE_UINT32_ARRAY
: {
346 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
347 (void) nvpair_value_uint32_array(nvp
, &val
, &nelem
);
348 for (i
= 0; i
< nelem
; i
++)
349 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
350 (u_longlong_t
)val
[i
]);
352 c
= fm_printf(d
+ 1, c
, cols
, "]");
356 case DATA_TYPE_INT64_ARRAY
: {
360 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
361 (void) nvpair_value_int64_array(nvp
, &val
, &nelem
);
362 for (i
= 0; i
< nelem
; i
++)
363 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
364 (u_longlong_t
)val
[i
]);
366 c
= fm_printf(d
+ 1, c
, cols
, "]");
370 case DATA_TYPE_UINT64_ARRAY
: {
374 c
= fm_printf(d
+ 1, c
, cols
, "[ ");
375 (void) nvpair_value_uint64_array(nvp
, &val
, &nelem
);
376 for (i
= 0; i
< nelem
; i
++)
377 c
= fm_printf(d
+ 1, c
, cols
, "0x%llx ",
378 (u_longlong_t
)val
[i
]);
380 c
= fm_printf(d
+ 1, c
, cols
, "]");
384 case DATA_TYPE_STRING_ARRAY
:
385 case DATA_TYPE_BOOLEAN_ARRAY
:
386 case DATA_TYPE_BYTE_ARRAY
:
387 c
= fm_printf(d
+ 1, c
, cols
, "[...]");
390 case DATA_TYPE_UNKNOWN
:
391 c
= fm_printf(d
+ 1, c
, cols
, "<unknown>");
400 fm_nvprint(nvlist_t
*nvl
)
405 console_printf("\n");
407 if (nvlist_lookup_string(nvl
, FM_CLASS
, &class) == 0)
408 c
= fm_printf(0, c
, zfs_zevent_cols
, "%s", class);
410 if (fm_nvprintr(nvl
, 0, c
, zfs_zevent_cols
) != 0)
411 console_printf("\n");
413 console_printf("\n");
417 zfs_zevent_alloc(void)
421 ev
= kmem_zalloc(sizeof (zevent_t
), KM_PUSHPAGE
);
425 list_create(&ev
->ev_ze_list
, sizeof (zfs_zevent_t
),
426 offsetof(zfs_zevent_t
, ze_node
));
427 list_link_init(&ev
->ev_node
);
433 zfs_zevent_free(zevent_t
*ev
)
435 /* Run provided cleanup callback */
436 ev
->ev_cb(ev
->ev_nvl
, ev
->ev_detector
);
438 list_destroy(&ev
->ev_ze_list
);
439 kmem_free(ev
, sizeof (zevent_t
));
443 zfs_zevent_drain(zevent_t
*ev
)
447 ASSERT(MUTEX_HELD(&zevent_lock
));
448 list_remove(&zevent_list
, ev
);
450 /* Remove references to this event in all private file data */
451 while ((ze
= list_head(&ev
->ev_ze_list
)) != NULL
) {
452 list_remove(&ev
->ev_ze_list
, ze
);
453 ze
->ze_zevent
= NULL
;
461 zfs_zevent_drain_all(int *count
)
465 mutex_enter(&zevent_lock
);
466 while ((ev
= list_head(&zevent_list
)) != NULL
)
467 zfs_zevent_drain(ev
);
469 *count
= zevent_len_cur
;
471 mutex_exit(&zevent_lock
);
475 * New zevents are inserted at the head. If the maximum queue
476 * length is exceeded a zevent will be drained from the tail.
477 * As part of this any user space processes which currently have
478 * a reference to this zevent_t in their private data will have
479 * this reference set to NULL.
482 zfs_zevent_insert(zevent_t
*ev
)
484 ASSERT(MUTEX_HELD(&zevent_lock
));
485 list_insert_head(&zevent_list
, ev
);
487 if (zevent_len_cur
>= zfs_zevent_len_max
)
488 zfs_zevent_drain(list_tail(&zevent_list
));
497 zfs_zevent_post(nvlist_t
*nvl
, nvlist_t
*detector
, zevent_cb_t
*cb
)
505 tv_array
[0] = tv
.tv_sec
;
506 tv_array
[1] = tv
.tv_nsec
;
507 if (nvlist_add_int64_array(nvl
, FM_EREPORT_TIME
, tv_array
, 2)) {
508 atomic_add_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
, 1);
512 (void) nvlist_size(nvl
, &nvl_size
, NV_ENCODE_NATIVE
);
513 if (nvl_size
> ERPT_DATA_SZ
|| nvl_size
== 0) {
514 atomic_add_64(&erpt_kstat_data
.erpt_dropped
.value
.ui64
, 1);
518 if (zfs_zevent_console
)
521 ev
= zfs_zevent_alloc();
523 atomic_add_64(&erpt_kstat_data
.erpt_dropped
.value
.ui64
, 1);
528 ev
->ev_detector
= detector
;
531 mutex_enter(&zevent_lock
);
532 zfs_zevent_insert(ev
);
533 cv_broadcast(&zevent_cv
);
534 mutex_exit(&zevent_lock
);
538 zfs_zevent_minor_to_state(minor_t minor
, zfs_zevent_t
**ze
)
540 *ze
= zfsdev_get_state(minor
, ZST_ZEVENT
);
548 zfs_zevent_fd_hold(int fd
, minor_t
*minorp
, zfs_zevent_t
**ze
)
557 *minorp
= zfsdev_getminor(fp
->f_file
);
558 error
= zfs_zevent_minor_to_state(*minorp
, ze
);
561 zfs_zevent_fd_rele(fd
);
567 zfs_zevent_fd_rele(int fd
)
573 * Get the next zevent in the stream and place a copy in 'event'. This
574 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
575 * 'event_size'. In this case the stream pointer is not advanced and
576 * and 'event_size' is set to the minimum required buffer size.
579 zfs_zevent_next(zfs_zevent_t
*ze
, nvlist_t
**event
, uint64_t *event_size
,
586 mutex_enter(&zevent_lock
);
587 if (ze
->ze_zevent
== NULL
) {
588 /* New stream start at the beginning/tail */
589 ev
= list_tail(&zevent_list
);
596 * Existing stream continue with the next element and remove
597 * ourselves from the wait queue for the previous element
599 ev
= list_prev(&zevent_list
, ze
->ze_zevent
);
606 VERIFY(nvlist_size(ev
->ev_nvl
, &size
, NV_ENCODE_NATIVE
) == 0);
607 if (size
> *event_size
) {
614 list_remove(&ze
->ze_zevent
->ev_ze_list
, ze
);
617 list_insert_head(&ev
->ev_ze_list
, ze
);
618 nvlist_dup(ev
->ev_nvl
, event
, KM_SLEEP
);
619 *dropped
= ze
->ze_dropped
;
622 mutex_exit(&zevent_lock
);
628 zfs_zevent_wait(zfs_zevent_t
*ze
)
632 mutex_enter(&zevent_lock
);
634 if (zevent_flags
& ZEVENT_SHUTDOWN
) {
640 cv_wait_interruptible(&zevent_cv
, &zevent_lock
);
641 if (issig(JUSTLOOKING
))
646 mutex_exit(&zevent_lock
);
652 zfs_zevent_init(zfs_zevent_t
**zep
)
656 ze
= *zep
= kmem_zalloc(sizeof (zfs_zevent_t
), KM_SLEEP
);
657 list_link_init(&ze
->ze_node
);
661 zfs_zevent_destroy(zfs_zevent_t
*ze
)
663 mutex_enter(&zevent_lock
);
665 list_remove(&ze
->ze_zevent
->ev_ze_list
, ze
);
666 mutex_exit(&zevent_lock
);
668 kmem_free(ze
, sizeof (zfs_zevent_t
));
673 * Wrapppers for FM nvlist allocators
677 i_fm_alloc(nv_alloc_t
*nva
, size_t size
)
679 return (kmem_zalloc(size
, KM_PUSHPAGE
));
684 i_fm_free(nv_alloc_t
*nva
, void *buf
, size_t size
)
686 kmem_free(buf
, size
);
689 const nv_alloc_ops_t fm_mem_alloc_ops
= {
698 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
699 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
700 * is returned to indicate that the nv_alloc structure could not be created.
703 fm_nva_xcreate(char *buf
, size_t bufsz
)
705 nv_alloc_t
*nvhdl
= kmem_zalloc(sizeof (nv_alloc_t
), KM_SLEEP
);
707 if (bufsz
== 0 || nv_alloc_init(nvhdl
, nv_fixed_ops
, buf
, bufsz
) != 0) {
708 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
716 * Destroy a previously allocated nv_alloc structure. The fixed buffer
717 * associated with nva must be freed by the caller.
720 fm_nva_xdestroy(nv_alloc_t
*nva
)
723 kmem_free(nva
, sizeof (nv_alloc_t
));
727 * Create a new nv list. A pointer to a new nv list structure is returned
728 * upon success or NULL is returned to indicate that the structure could
729 * not be created. The newly created nv list is created and managed by the
730 * operations installed in nva. If nva is NULL, the default FMA nva
731 * operations are installed and used.
733 * When called from the kernel and nva == NULL, this function must be called
734 * from passive kernel context with no locks held that can prevent a
735 * sleeping memory allocation from occurring. Otherwise, this function may
736 * be called from other kernel contexts as long a valid nva created via
737 * fm_nva_create() is supplied.
740 fm_nvlist_create(nv_alloc_t
*nva
)
747 nvhdl
= kmem_zalloc(sizeof (nv_alloc_t
), KM_PUSHPAGE
);
749 if (nv_alloc_init(nvhdl
, &fm_mem_alloc_ops
, NULL
, 0) != 0) {
750 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
758 if (nvlist_xalloc(&nvl
, NV_UNIQUE_NAME
, nvhdl
) != 0) {
760 nv_alloc_fini(nvhdl
);
761 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
770 * Destroy a previously allocated nvlist structure. flag indicates whether
771 * or not the associated nva structure should be freed (FM_NVA_FREE) or
772 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
773 * it to be re-used for future nvlist creation operations.
776 fm_nvlist_destroy(nvlist_t
*nvl
, int flag
)
778 nv_alloc_t
*nva
= nvlist_lookup_nv_alloc(nvl
);
783 if (flag
== FM_NVA_FREE
)
784 fm_nva_xdestroy(nva
);
789 i_fm_payload_set(nvlist_t
*payload
, const char *name
, va_list ap
)
794 while (ret
== 0 && name
!= NULL
) {
795 type
= va_arg(ap
, data_type_t
);
798 ret
= nvlist_add_byte(payload
, name
,
801 case DATA_TYPE_BYTE_ARRAY
:
802 nelem
= va_arg(ap
, int);
803 ret
= nvlist_add_byte_array(payload
, name
,
804 va_arg(ap
, uchar_t
*), nelem
);
806 case DATA_TYPE_BOOLEAN_VALUE
:
807 ret
= nvlist_add_boolean_value(payload
, name
,
808 va_arg(ap
, boolean_t
));
810 case DATA_TYPE_BOOLEAN_ARRAY
:
811 nelem
= va_arg(ap
, int);
812 ret
= nvlist_add_boolean_array(payload
, name
,
813 va_arg(ap
, boolean_t
*), nelem
);
816 ret
= nvlist_add_int8(payload
, name
,
819 case DATA_TYPE_INT8_ARRAY
:
820 nelem
= va_arg(ap
, int);
821 ret
= nvlist_add_int8_array(payload
, name
,
822 va_arg(ap
, int8_t *), nelem
);
824 case DATA_TYPE_UINT8
:
825 ret
= nvlist_add_uint8(payload
, name
,
828 case DATA_TYPE_UINT8_ARRAY
:
829 nelem
= va_arg(ap
, int);
830 ret
= nvlist_add_uint8_array(payload
, name
,
831 va_arg(ap
, uint8_t *), nelem
);
833 case DATA_TYPE_INT16
:
834 ret
= nvlist_add_int16(payload
, name
,
837 case DATA_TYPE_INT16_ARRAY
:
838 nelem
= va_arg(ap
, int);
839 ret
= nvlist_add_int16_array(payload
, name
,
840 va_arg(ap
, int16_t *), nelem
);
842 case DATA_TYPE_UINT16
:
843 ret
= nvlist_add_uint16(payload
, name
,
846 case DATA_TYPE_UINT16_ARRAY
:
847 nelem
= va_arg(ap
, int);
848 ret
= nvlist_add_uint16_array(payload
, name
,
849 va_arg(ap
, uint16_t *), nelem
);
851 case DATA_TYPE_INT32
:
852 ret
= nvlist_add_int32(payload
, name
,
853 va_arg(ap
, int32_t));
855 case DATA_TYPE_INT32_ARRAY
:
856 nelem
= va_arg(ap
, int);
857 ret
= nvlist_add_int32_array(payload
, name
,
858 va_arg(ap
, int32_t *), nelem
);
860 case DATA_TYPE_UINT32
:
861 ret
= nvlist_add_uint32(payload
, name
,
862 va_arg(ap
, uint32_t));
864 case DATA_TYPE_UINT32_ARRAY
:
865 nelem
= va_arg(ap
, int);
866 ret
= nvlist_add_uint32_array(payload
, name
,
867 va_arg(ap
, uint32_t *), nelem
);
869 case DATA_TYPE_INT64
:
870 ret
= nvlist_add_int64(payload
, name
,
871 va_arg(ap
, int64_t));
873 case DATA_TYPE_INT64_ARRAY
:
874 nelem
= va_arg(ap
, int);
875 ret
= nvlist_add_int64_array(payload
, name
,
876 va_arg(ap
, int64_t *), nelem
);
878 case DATA_TYPE_UINT64
:
879 ret
= nvlist_add_uint64(payload
, name
,
880 va_arg(ap
, uint64_t));
882 case DATA_TYPE_UINT64_ARRAY
:
883 nelem
= va_arg(ap
, int);
884 ret
= nvlist_add_uint64_array(payload
, name
,
885 va_arg(ap
, uint64_t *), nelem
);
887 case DATA_TYPE_STRING
:
888 ret
= nvlist_add_string(payload
, name
,
891 case DATA_TYPE_STRING_ARRAY
:
892 nelem
= va_arg(ap
, int);
893 ret
= nvlist_add_string_array(payload
, name
,
894 va_arg(ap
, char **), nelem
);
896 case DATA_TYPE_NVLIST
:
897 ret
= nvlist_add_nvlist(payload
, name
,
898 va_arg(ap
, nvlist_t
*));
900 case DATA_TYPE_NVLIST_ARRAY
:
901 nelem
= va_arg(ap
, int);
902 ret
= nvlist_add_nvlist_array(payload
, name
,
903 va_arg(ap
, nvlist_t
**), nelem
);
909 name
= va_arg(ap
, char *);
915 fm_payload_set(nvlist_t
*payload
, ...)
921 va_start(ap
, payload
);
922 name
= va_arg(ap
, char *);
923 ret
= i_fm_payload_set(payload
, name
, ap
);
928 &erpt_kstat_data
.payload_set_failed
.value
.ui64
, 1);
932 * Set-up and validate the members of an ereport event according to:
934 * Member name Type Value
935 * ====================================================
936 * class string ereport
939 * detector nvlist_t <detector>
940 * ereport-payload nvlist_t <var args>
942 * We don't actually add a 'version' member to the payload. Really,
943 * the version quoted to us by our caller is that of the category 1
944 * "ereport" event class (and we require FM_EREPORT_VERS0) but
945 * the payload version of the actual leaf class event under construction
946 * may be something else. Callers should supply a version in the varargs,
947 * or (better) we could take two version arguments - one for the
948 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
949 * for the leaf class.
952 fm_ereport_set(nvlist_t
*ereport
, int version
, const char *erpt_class
,
953 uint64_t ena
, const nvlist_t
*detector
, ...)
955 char ereport_class
[FM_MAX_CLASS
];
960 if (version
!= FM_EREPORT_VERS0
) {
961 atomic_add_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
, 1);
965 (void) snprintf(ereport_class
, FM_MAX_CLASS
, "%s.%s",
966 FM_EREPORT_CLASS
, erpt_class
);
967 if (nvlist_add_string(ereport
, FM_CLASS
, ereport_class
) != 0) {
968 atomic_add_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
, 1);
972 if (nvlist_add_uint64(ereport
, FM_EREPORT_ENA
, ena
)) {
973 atomic_add_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
, 1);
976 if (nvlist_add_nvlist(ereport
, FM_EREPORT_DETECTOR
,
977 (nvlist_t
*)detector
) != 0) {
978 atomic_add_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
, 1);
981 va_start(ap
, detector
);
982 name
= va_arg(ap
, const char *);
983 ret
= i_fm_payload_set(ereport
, name
, ap
);
987 atomic_add_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
, 1);
991 * Set-up and validate the members of an hc fmri according to;
993 * Member name Type Value
994 * ===================================================
996 * auth nvlist_t <auth>
997 * hc-name string <name>
1000 * Note that auth and hc-id are optional members.
1003 #define HC_MAXPAIRS 20
1004 #define HC_MAXNAMELEN 50
1007 fm_fmri_hc_set_common(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
)
1009 if (version
!= FM_HC_SCHEME_VERSION
) {
1010 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1014 if (nvlist_add_uint8(fmri
, FM_VERSION
, version
) != 0 ||
1015 nvlist_add_string(fmri
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_HC
) != 0) {
1016 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1020 if (auth
!= NULL
&& nvlist_add_nvlist(fmri
, FM_FMRI_AUTHORITY
,
1021 (nvlist_t
*)auth
) != 0) {
1022 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1030 fm_fmri_hc_set(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
,
1031 nvlist_t
*snvl
, int npairs
, ...)
1033 nv_alloc_t
*nva
= nvlist_lookup_nv_alloc(fmri
);
1034 nvlist_t
*pairs
[HC_MAXPAIRS
];
1038 if (!fm_fmri_hc_set_common(fmri
, version
, auth
))
1041 npairs
= MIN(npairs
, HC_MAXPAIRS
);
1043 va_start(ap
, npairs
);
1044 for (i
= 0; i
< npairs
; i
++) {
1045 const char *name
= va_arg(ap
, const char *);
1046 uint32_t id
= va_arg(ap
, uint32_t);
1049 (void) snprintf(idstr
, sizeof (idstr
), "%u", id
);
1051 pairs
[i
] = fm_nvlist_create(nva
);
1052 if (nvlist_add_string(pairs
[i
], FM_FMRI_HC_NAME
, name
) != 0 ||
1053 nvlist_add_string(pairs
[i
], FM_FMRI_HC_ID
, idstr
) != 0) {
1055 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1060 if (nvlist_add_nvlist_array(fmri
, FM_FMRI_HC_LIST
, pairs
, npairs
) != 0)
1061 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1063 for (i
= 0; i
< npairs
; i
++)
1064 fm_nvlist_destroy(pairs
[i
], FM_NVA_RETAIN
);
1067 if (nvlist_add_nvlist(fmri
, FM_FMRI_HC_SPECIFIC
, snvl
) != 0) {
1069 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1075 fm_fmri_hc_create(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
,
1076 nvlist_t
*snvl
, nvlist_t
*bboard
, int npairs
, ...)
1078 nv_alloc_t
*nva
= nvlist_lookup_nv_alloc(fmri
);
1079 nvlist_t
*pairs
[HC_MAXPAIRS
];
1084 char *hcname
, *hcid
;
1086 if (!fm_fmri_hc_set_common(fmri
, version
, auth
))
1090 * copy the bboard nvpairs to the pairs array
1092 if (nvlist_lookup_nvlist_array(bboard
, FM_FMRI_HC_LIST
, &hcl
, &n
)
1094 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1098 for (i
= 0; i
< n
; i
++) {
1099 if (nvlist_lookup_string(hcl
[i
], FM_FMRI_HC_NAME
,
1102 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1105 if (nvlist_lookup_string(hcl
[i
], FM_FMRI_HC_ID
, &hcid
) != 0) {
1107 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1111 pairs
[i
] = fm_nvlist_create(nva
);
1112 if (nvlist_add_string(pairs
[i
], FM_FMRI_HC_NAME
, hcname
) != 0 ||
1113 nvlist_add_string(pairs
[i
], FM_FMRI_HC_ID
, hcid
) != 0) {
1114 for (j
= 0; j
<= i
; j
++) {
1115 if (pairs
[j
] != NULL
)
1116 fm_nvlist_destroy(pairs
[j
],
1120 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1126 * create the pairs from passed in pairs
1128 npairs
= MIN(npairs
, HC_MAXPAIRS
);
1130 va_start(ap
, npairs
);
1131 for (i
= n
; i
< npairs
+ n
; i
++) {
1132 const char *name
= va_arg(ap
, const char *);
1133 uint32_t id
= va_arg(ap
, uint32_t);
1135 (void) snprintf(idstr
, sizeof (idstr
), "%u", id
);
1136 pairs
[i
] = fm_nvlist_create(nva
);
1137 if (nvlist_add_string(pairs
[i
], FM_FMRI_HC_NAME
, name
) != 0 ||
1138 nvlist_add_string(pairs
[i
], FM_FMRI_HC_ID
, idstr
) != 0) {
1139 for (j
= 0; j
<= i
; j
++) {
1140 if (pairs
[j
] != NULL
)
1141 fm_nvlist_destroy(pairs
[j
],
1145 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1152 * Create the fmri hc list
1154 if (nvlist_add_nvlist_array(fmri
, FM_FMRI_HC_LIST
, pairs
,
1156 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1160 for (i
= 0; i
< npairs
+ n
; i
++) {
1161 fm_nvlist_destroy(pairs
[i
], FM_NVA_RETAIN
);
1165 if (nvlist_add_nvlist(fmri
, FM_FMRI_HC_SPECIFIC
, snvl
) != 0) {
1167 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1174 * Set-up and validate the members of an dev fmri according to:
1176 * Member name Type Value
1177 * ====================================================
1179 * auth nvlist_t <auth>
1180 * devpath string <devpath>
1181 * [devid] string <devid>
1182 * [target-port-l0id] string <target-port-lun0-id>
1184 * Note that auth and devid are optional members.
1187 fm_fmri_dev_set(nvlist_t
*fmri_dev
, int version
, const nvlist_t
*auth
,
1188 const char *devpath
, const char *devid
, const char *tpl0
)
1192 if (version
!= DEV_SCHEME_VERSION0
) {
1193 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1197 err
|= nvlist_add_uint8(fmri_dev
, FM_VERSION
, version
);
1198 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_DEV
);
1201 err
|= nvlist_add_nvlist(fmri_dev
, FM_FMRI_AUTHORITY
,
1205 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_DEV_PATH
, devpath
);
1208 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_DEV_ID
, devid
);
1211 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_DEV_TGTPTLUN0
, tpl0
);
1214 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1219 * Set-up and validate the members of an cpu fmri according to:
1221 * Member name Type Value
1222 * ====================================================
1224 * auth nvlist_t <auth>
1225 * cpuid uint32_t <cpu_id>
1226 * cpumask uint8_t <cpu_mask>
1227 * serial uint64_t <serial_id>
1229 * Note that auth, cpumask, serial are optional members.
1233 fm_fmri_cpu_set(nvlist_t
*fmri_cpu
, int version
, const nvlist_t
*auth
,
1234 uint32_t cpu_id
, uint8_t *cpu_maskp
, const char *serial_idp
)
1236 uint64_t *failedp
= &erpt_kstat_data
.fmri_set_failed
.value
.ui64
;
1238 if (version
< CPU_SCHEME_VERSION1
) {
1239 atomic_add_64(failedp
, 1);
1243 if (nvlist_add_uint8(fmri_cpu
, FM_VERSION
, version
) != 0) {
1244 atomic_add_64(failedp
, 1);
1248 if (nvlist_add_string(fmri_cpu
, FM_FMRI_SCHEME
,
1249 FM_FMRI_SCHEME_CPU
) != 0) {
1250 atomic_add_64(failedp
, 1);
1254 if (auth
!= NULL
&& nvlist_add_nvlist(fmri_cpu
, FM_FMRI_AUTHORITY
,
1255 (nvlist_t
*)auth
) != 0)
1256 atomic_add_64(failedp
, 1);
1258 if (nvlist_add_uint32(fmri_cpu
, FM_FMRI_CPU_ID
, cpu_id
) != 0)
1259 atomic_add_64(failedp
, 1);
1261 if (cpu_maskp
!= NULL
&& nvlist_add_uint8(fmri_cpu
, FM_FMRI_CPU_MASK
,
1263 atomic_add_64(failedp
, 1);
1265 if (serial_idp
== NULL
|| nvlist_add_string(fmri_cpu
,
1266 FM_FMRI_CPU_SERIAL_ID
, (char *)serial_idp
) != 0)
1267 atomic_add_64(failedp
, 1);
1271 * Set-up and validate the members of a mem according to:
1273 * Member name Type Value
1274 * ====================================================
1276 * auth nvlist_t <auth> [optional]
1277 * unum string <unum>
1278 * serial string <serial> [optional*]
1279 * offset uint64_t <offset> [optional]
1281 * * serial is required if offset is present
1284 fm_fmri_mem_set(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
,
1285 const char *unum
, const char *serial
, uint64_t offset
)
1287 if (version
!= MEM_SCHEME_VERSION0
) {
1288 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1292 if (!serial
&& (offset
!= (uint64_t)-1)) {
1293 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1297 if (nvlist_add_uint8(fmri
, FM_VERSION
, version
) != 0) {
1298 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1302 if (nvlist_add_string(fmri
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_MEM
) != 0) {
1303 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1308 if (nvlist_add_nvlist(fmri
, FM_FMRI_AUTHORITY
,
1309 (nvlist_t
*)auth
) != 0) {
1311 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1315 if (nvlist_add_string(fmri
, FM_FMRI_MEM_UNUM
, unum
) != 0) {
1316 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1319 if (serial
!= NULL
) {
1320 if (nvlist_add_string_array(fmri
, FM_FMRI_MEM_SERIAL_ID
,
1321 (char **)&serial
, 1) != 0) {
1323 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1325 if (offset
!= (uint64_t)-1) {
1326 if (nvlist_add_uint64(fmri
, FM_FMRI_MEM_OFFSET
,
1328 atomic_add_64(&erpt_kstat_data
.
1329 fmri_set_failed
.value
.ui64
, 1);
1336 fm_fmri_zfs_set(nvlist_t
*fmri
, int version
, uint64_t pool_guid
,
1339 if (version
!= ZFS_SCHEME_VERSION0
) {
1340 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1344 if (nvlist_add_uint8(fmri
, FM_VERSION
, version
) != 0) {
1345 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1349 if (nvlist_add_string(fmri
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_ZFS
) != 0) {
1350 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1354 if (nvlist_add_uint64(fmri
, FM_FMRI_ZFS_POOL
, pool_guid
) != 0) {
1355 atomic_add_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1358 if (vdev_guid
!= 0) {
1359 if (nvlist_add_uint64(fmri
, FM_FMRI_ZFS_VDEV
, vdev_guid
) != 0) {
1361 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
, 1);
1367 fm_ena_increment(uint64_t ena
)
1371 switch (ENA_FORMAT(ena
)) {
1373 new_ena
= ena
+ (1 << ENA_FMT1_GEN_SHFT
);
1376 new_ena
= ena
+ (1 << ENA_FMT2_GEN_SHFT
);
1386 fm_ena_generate_cpu(uint64_t timestamp
, processorid_t cpuid
, uchar_t format
)
1393 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
1394 ((cpuid
<< ENA_FMT1_CPUID_SHFT
) &
1395 ENA_FMT1_CPUID_MASK
) |
1396 ((timestamp
<< ENA_FMT1_TIME_SHFT
) &
1397 ENA_FMT1_TIME_MASK
));
1399 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
1400 ((cpuid
<< ENA_FMT1_CPUID_SHFT
) &
1401 ENA_FMT1_CPUID_MASK
) |
1402 ((gethrtime() << ENA_FMT1_TIME_SHFT
) &
1403 ENA_FMT1_TIME_MASK
));
1407 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
1408 ((timestamp
<< ENA_FMT2_TIME_SHFT
) & ENA_FMT2_TIME_MASK
));
1418 fm_ena_generate(uint64_t timestamp
, uchar_t format
)
1423 ena
= fm_ena_generate_cpu(timestamp
, getcpuid(), format
);
1430 fm_ena_generation_get(uint64_t ena
)
1434 switch (ENA_FORMAT(ena
)) {
1436 gen
= (ena
& ENA_FMT1_GEN_MASK
) >> ENA_FMT1_GEN_SHFT
;
1439 gen
= (ena
& ENA_FMT2_GEN_MASK
) >> ENA_FMT2_GEN_SHFT
;
1450 fm_ena_format_get(uint64_t ena
)
1453 return (ENA_FORMAT(ena
));
1457 fm_ena_id_get(uint64_t ena
)
1461 switch (ENA_FORMAT(ena
)) {
1463 id
= (ena
& ENA_FMT1_ID_MASK
) >> ENA_FMT1_ID_SHFT
;
1466 id
= (ena
& ENA_FMT2_ID_MASK
) >> ENA_FMT2_ID_SHFT
;
1476 fm_ena_time_get(uint64_t ena
)
1480 switch (ENA_FORMAT(ena
)) {
1482 time
= (ena
& ENA_FMT1_TIME_MASK
) >> ENA_FMT1_TIME_SHFT
;
1485 time
= (ena
& ENA_FMT2_TIME_MASK
) >> ENA_FMT2_TIME_SHFT
;
1501 if (zfs_zevent_len_max
== 0)
1502 zfs_zevent_len_max
= ERPT_MAX_ERRS
* MAX(max_ncpus
, 4);
1504 /* Initialize zevent allocation and generation kstats */
1505 fm_ksp
= kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED
,
1506 sizeof (struct erpt_kstat
) / sizeof (kstat_named_t
),
1507 KSTAT_FLAG_VIRTUAL
);
1509 if (fm_ksp
!= NULL
) {
1510 fm_ksp
->ks_data
= &erpt_kstat_data
;
1511 kstat_install(fm_ksp
);
1513 cmn_err(CE_NOTE
, "failed to create fm/misc kstat\n");
1516 mutex_init(&zevent_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1517 list_create(&zevent_list
, sizeof (zevent_t
),
1518 offsetof(zevent_t
, ev_node
));
1519 cv_init(&zevent_cv
, NULL
, CV_DEFAULT
, NULL
);
1527 zfs_zevent_drain_all(&count
);
1529 mutex_enter(&zevent_lock
);
1530 cv_broadcast(&zevent_cv
);
1532 zevent_flags
|= ZEVENT_SHUTDOWN
;
1533 while (zevent_waiters
> 0) {
1534 mutex_exit(&zevent_lock
);
1536 mutex_enter(&zevent_lock
);
1538 mutex_exit(&zevent_lock
);
1540 cv_destroy(&zevent_cv
);
1541 list_destroy(&zevent_list
);
1542 mutex_destroy(&zevent_lock
);
1544 if (fm_ksp
!= NULL
) {
1545 kstat_delete(fm_ksp
);
1550 module_param(zfs_zevent_len_max
, int, 0644);
1551 MODULE_PARM_DESC(zfs_zevent_len_max
, "Max event queue length");
1553 module_param(zfs_zevent_cols
, int, 0644);
1554 MODULE_PARM_DESC(zfs_zevent_cols
, "Max event column width");
1556 module_param(zfs_zevent_console
, int, 0644);
1557 MODULE_PARM_DESC(zfs_zevent_console
, "Log events to the console");
1559 #endif /* _KERNEL */