]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/fm.c
Don't scale zfs_zevent_len_max by CPU count
[mirror_zfs.git] / module / zfs / fm.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Fault Management Architecture (FMA) Resource and Protocol Support
27 *
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30 *
31 * Name-Value Pair Lists
32 *
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
39 *
40 * Protocol Event and FMRI Construction
41 *
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45 *
46 * ENA Manipulation
47 *
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
52 */
53
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
66 #ifdef _KERNEL
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/console.h>
70 #include <sys/zfs_ioctl.h>
71
72 int zfs_zevent_len_max = 512;
73 int zfs_zevent_cols = 80;
74 int zfs_zevent_console = 0;
75
76 static int zevent_len_cur = 0;
77 static int zevent_waiters = 0;
78 static int zevent_flags = 0;
79
80 /* Num events rate limited since the last time zfs_zevent_next() was called */
81 static uint64_t ratelimit_dropped = 0;
82
83 /*
84 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
85 * posted. The posted EIDs are monotonically increasing but not persistent.
86 * They will be reset to the initial value (1) each time the kernel module is
87 * loaded.
88 */
89 static uint64_t zevent_eid = 0;
90
91 static kmutex_t zevent_lock;
92 static list_t zevent_list;
93 static kcondvar_t zevent_cv;
94 #endif /* _KERNEL */
95
96
97 /*
98 * Common fault management kstats to record event generation failures
99 */
100
101 struct erpt_kstat {
102 kstat_named_t erpt_dropped; /* num erpts dropped on post */
103 kstat_named_t erpt_set_failed; /* num erpt set failures */
104 kstat_named_t fmri_set_failed; /* num fmri set failures */
105 kstat_named_t payload_set_failed; /* num payload set failures */
106 kstat_named_t erpt_duplicates; /* num duplicate erpts */
107 };
108
109 static struct erpt_kstat erpt_kstat_data = {
110 { "erpt-dropped", KSTAT_DATA_UINT64 },
111 { "erpt-set-failed", KSTAT_DATA_UINT64 },
112 { "fmri-set-failed", KSTAT_DATA_UINT64 },
113 { "payload-set-failed", KSTAT_DATA_UINT64 },
114 { "erpt-duplicates", KSTAT_DATA_UINT64 }
115 };
116
117 kstat_t *fm_ksp;
118
119 #ifdef _KERNEL
120
121 /*
122 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
123 * output so they aren't split across console lines, and return the end column.
124 */
125 /*PRINTFLIKE4*/
126 static int
127 fm_printf(int depth, int c, int cols, const char *format, ...)
128 {
129 va_list ap;
130 int width;
131 char c1;
132
133 va_start(ap, format);
134 width = vsnprintf(&c1, sizeof (c1), format, ap);
135 va_end(ap);
136
137 if (c + width >= cols) {
138 console_printf("\n");
139 c = 0;
140 if (format[0] != ' ' && depth > 0) {
141 console_printf(" ");
142 c++;
143 }
144 }
145
146 va_start(ap, format);
147 console_vprintf(format, ap);
148 va_end(ap);
149
150 return ((c + width) % cols);
151 }
152
153 /*
154 * Recursively print an nvlist in the specified column width and return the
155 * column we end up in. This function is called recursively by fm_nvprint(),
156 * below. We generically format the entire nvpair using hexadecimal
157 * integers and strings, and elide any integer arrays. Arrays are basically
158 * used for cache dumps right now, so we suppress them so as not to overwhelm
159 * the amount of console output we produce at panic time. This can be further
160 * enhanced as FMA technology grows based upon the needs of consumers. All
161 * FMA telemetry is logged using the dump device transport, so the console
162 * output serves only as a fallback in case this procedure is unsuccessful.
163 */
164 static int
165 fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
166 {
167 nvpair_t *nvp;
168
169 for (nvp = nvlist_next_nvpair(nvl, NULL);
170 nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
171
172 data_type_t type = nvpair_type(nvp);
173 const char *name = nvpair_name(nvp);
174
175 boolean_t b;
176 uint8_t i8;
177 uint16_t i16;
178 uint32_t i32;
179 uint64_t i64;
180 char *str;
181 nvlist_t *cnv;
182
183 if (strcmp(name, FM_CLASS) == 0)
184 continue; /* already printed by caller */
185
186 c = fm_printf(d, c, cols, " %s=", name);
187
188 switch (type) {
189 case DATA_TYPE_BOOLEAN:
190 c = fm_printf(d + 1, c, cols, " 1");
191 break;
192
193 case DATA_TYPE_BOOLEAN_VALUE:
194 (void) nvpair_value_boolean_value(nvp, &b);
195 c = fm_printf(d + 1, c, cols, b ? "1" : "0");
196 break;
197
198 case DATA_TYPE_BYTE:
199 (void) nvpair_value_byte(nvp, &i8);
200 c = fm_printf(d + 1, c, cols, "0x%x", i8);
201 break;
202
203 case DATA_TYPE_INT8:
204 (void) nvpair_value_int8(nvp, (void *)&i8);
205 c = fm_printf(d + 1, c, cols, "0x%x", i8);
206 break;
207
208 case DATA_TYPE_UINT8:
209 (void) nvpair_value_uint8(nvp, &i8);
210 c = fm_printf(d + 1, c, cols, "0x%x", i8);
211 break;
212
213 case DATA_TYPE_INT16:
214 (void) nvpair_value_int16(nvp, (void *)&i16);
215 c = fm_printf(d + 1, c, cols, "0x%x", i16);
216 break;
217
218 case DATA_TYPE_UINT16:
219 (void) nvpair_value_uint16(nvp, &i16);
220 c = fm_printf(d + 1, c, cols, "0x%x", i16);
221 break;
222
223 case DATA_TYPE_INT32:
224 (void) nvpair_value_int32(nvp, (void *)&i32);
225 c = fm_printf(d + 1, c, cols, "0x%x", i32);
226 break;
227
228 case DATA_TYPE_UINT32:
229 (void) nvpair_value_uint32(nvp, &i32);
230 c = fm_printf(d + 1, c, cols, "0x%x", i32);
231 break;
232
233 case DATA_TYPE_INT64:
234 (void) nvpair_value_int64(nvp, (void *)&i64);
235 c = fm_printf(d + 1, c, cols, "0x%llx",
236 (u_longlong_t)i64);
237 break;
238
239 case DATA_TYPE_UINT64:
240 (void) nvpair_value_uint64(nvp, &i64);
241 c = fm_printf(d + 1, c, cols, "0x%llx",
242 (u_longlong_t)i64);
243 break;
244
245 case DATA_TYPE_HRTIME:
246 (void) nvpair_value_hrtime(nvp, (void *)&i64);
247 c = fm_printf(d + 1, c, cols, "0x%llx",
248 (u_longlong_t)i64);
249 break;
250
251 case DATA_TYPE_STRING:
252 (void) nvpair_value_string(nvp, &str);
253 c = fm_printf(d + 1, c, cols, "\"%s\"",
254 str ? str : "<NULL>");
255 break;
256
257 case DATA_TYPE_NVLIST:
258 c = fm_printf(d + 1, c, cols, "[");
259 (void) nvpair_value_nvlist(nvp, &cnv);
260 c = fm_nvprintr(cnv, d + 1, c, cols);
261 c = fm_printf(d + 1, c, cols, " ]");
262 break;
263
264 case DATA_TYPE_NVLIST_ARRAY: {
265 nvlist_t **val;
266 uint_t i, nelem;
267
268 c = fm_printf(d + 1, c, cols, "[");
269 (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
270 for (i = 0; i < nelem; i++) {
271 c = fm_nvprintr(val[i], d + 1, c, cols);
272 }
273 c = fm_printf(d + 1, c, cols, " ]");
274 }
275 break;
276
277 case DATA_TYPE_INT8_ARRAY: {
278 int8_t *val;
279 uint_t i, nelem;
280
281 c = fm_printf(d + 1, c, cols, "[ ");
282 (void) nvpair_value_int8_array(nvp, &val, &nelem);
283 for (i = 0; i < nelem; i++)
284 c = fm_printf(d + 1, c, cols, "0x%llx ",
285 (u_longlong_t)val[i]);
286
287 c = fm_printf(d + 1, c, cols, "]");
288 break;
289 }
290
291 case DATA_TYPE_UINT8_ARRAY: {
292 uint8_t *val;
293 uint_t i, nelem;
294
295 c = fm_printf(d + 1, c, cols, "[ ");
296 (void) nvpair_value_uint8_array(nvp, &val, &nelem);
297 for (i = 0; i < nelem; i++)
298 c = fm_printf(d + 1, c, cols, "0x%llx ",
299 (u_longlong_t)val[i]);
300
301 c = fm_printf(d + 1, c, cols, "]");
302 break;
303 }
304
305 case DATA_TYPE_INT16_ARRAY: {
306 int16_t *val;
307 uint_t i, nelem;
308
309 c = fm_printf(d + 1, c, cols, "[ ");
310 (void) nvpair_value_int16_array(nvp, &val, &nelem);
311 for (i = 0; i < nelem; i++)
312 c = fm_printf(d + 1, c, cols, "0x%llx ",
313 (u_longlong_t)val[i]);
314
315 c = fm_printf(d + 1, c, cols, "]");
316 break;
317 }
318
319 case DATA_TYPE_UINT16_ARRAY: {
320 uint16_t *val;
321 uint_t i, nelem;
322
323 c = fm_printf(d + 1, c, cols, "[ ");
324 (void) nvpair_value_uint16_array(nvp, &val, &nelem);
325 for (i = 0; i < nelem; i++)
326 c = fm_printf(d + 1, c, cols, "0x%llx ",
327 (u_longlong_t)val[i]);
328
329 c = fm_printf(d + 1, c, cols, "]");
330 break;
331 }
332
333 case DATA_TYPE_INT32_ARRAY: {
334 int32_t *val;
335 uint_t i, nelem;
336
337 c = fm_printf(d + 1, c, cols, "[ ");
338 (void) nvpair_value_int32_array(nvp, &val, &nelem);
339 for (i = 0; i < nelem; i++)
340 c = fm_printf(d + 1, c, cols, "0x%llx ",
341 (u_longlong_t)val[i]);
342
343 c = fm_printf(d + 1, c, cols, "]");
344 break;
345 }
346
347 case DATA_TYPE_UINT32_ARRAY: {
348 uint32_t *val;
349 uint_t i, nelem;
350
351 c = fm_printf(d + 1, c, cols, "[ ");
352 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
353 for (i = 0; i < nelem; i++)
354 c = fm_printf(d + 1, c, cols, "0x%llx ",
355 (u_longlong_t)val[i]);
356
357 c = fm_printf(d + 1, c, cols, "]");
358 break;
359 }
360
361 case DATA_TYPE_INT64_ARRAY: {
362 int64_t *val;
363 uint_t i, nelem;
364
365 c = fm_printf(d + 1, c, cols, "[ ");
366 (void) nvpair_value_int64_array(nvp, &val, &nelem);
367 for (i = 0; i < nelem; i++)
368 c = fm_printf(d + 1, c, cols, "0x%llx ",
369 (u_longlong_t)val[i]);
370
371 c = fm_printf(d + 1, c, cols, "]");
372 break;
373 }
374
375 case DATA_TYPE_UINT64_ARRAY: {
376 uint64_t *val;
377 uint_t i, nelem;
378
379 c = fm_printf(d + 1, c, cols, "[ ");
380 (void) nvpair_value_uint64_array(nvp, &val, &nelem);
381 for (i = 0; i < nelem; i++)
382 c = fm_printf(d + 1, c, cols, "0x%llx ",
383 (u_longlong_t)val[i]);
384
385 c = fm_printf(d + 1, c, cols, "]");
386 break;
387 }
388
389 case DATA_TYPE_STRING_ARRAY:
390 case DATA_TYPE_BOOLEAN_ARRAY:
391 case DATA_TYPE_BYTE_ARRAY:
392 c = fm_printf(d + 1, c, cols, "[...]");
393 break;
394
395 case DATA_TYPE_UNKNOWN:
396 case DATA_TYPE_DONTCARE:
397 c = fm_printf(d + 1, c, cols, "<unknown>");
398 break;
399 }
400 }
401
402 return (c);
403 }
404
405 void
406 fm_nvprint(nvlist_t *nvl)
407 {
408 char *class;
409 int c = 0;
410
411 console_printf("\n");
412
413 if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
414 c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
415
416 if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
417 console_printf("\n");
418
419 console_printf("\n");
420 }
421
422 static zevent_t *
423 zfs_zevent_alloc(void)
424 {
425 zevent_t *ev;
426
427 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
428
429 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
430 offsetof(zfs_zevent_t, ze_node));
431 list_link_init(&ev->ev_node);
432
433 return (ev);
434 }
435
436 static void
437 zfs_zevent_free(zevent_t *ev)
438 {
439 /* Run provided cleanup callback */
440 ev->ev_cb(ev->ev_nvl, ev->ev_detector);
441
442 list_destroy(&ev->ev_ze_list);
443 kmem_free(ev, sizeof (zevent_t));
444 }
445
446 static void
447 zfs_zevent_drain(zevent_t *ev)
448 {
449 zfs_zevent_t *ze;
450
451 ASSERT(MUTEX_HELD(&zevent_lock));
452 list_remove(&zevent_list, ev);
453
454 /* Remove references to this event in all private file data */
455 while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
456 list_remove(&ev->ev_ze_list, ze);
457 ze->ze_zevent = NULL;
458 ze->ze_dropped++;
459 }
460
461 zfs_zevent_free(ev);
462 }
463
464 void
465 zfs_zevent_drain_all(int *count)
466 {
467 zevent_t *ev;
468
469 mutex_enter(&zevent_lock);
470 while ((ev = list_head(&zevent_list)) != NULL)
471 zfs_zevent_drain(ev);
472
473 *count = zevent_len_cur;
474 zevent_len_cur = 0;
475 mutex_exit(&zevent_lock);
476 }
477
478 /*
479 * New zevents are inserted at the head. If the maximum queue
480 * length is exceeded a zevent will be drained from the tail.
481 * As part of this any user space processes which currently have
482 * a reference to this zevent_t in their private data will have
483 * this reference set to NULL.
484 */
485 static void
486 zfs_zevent_insert(zevent_t *ev)
487 {
488 ASSERT(MUTEX_HELD(&zevent_lock));
489 list_insert_head(&zevent_list, ev);
490
491 if (zevent_len_cur >= zfs_zevent_len_max)
492 zfs_zevent_drain(list_tail(&zevent_list));
493 else
494 zevent_len_cur++;
495 }
496
497 /*
498 * Post a zevent. The cb will be called when nvl and detector are no longer
499 * needed, i.e.:
500 * - An error happened and a zevent can't be posted. In this case, cb is called
501 * before zfs_zevent_post() returns.
502 * - The event is being drained and freed.
503 */
504 int
505 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
506 {
507 inode_timespec_t tv;
508 int64_t tv_array[2];
509 uint64_t eid;
510 size_t nvl_size = 0;
511 zevent_t *ev;
512 int error;
513
514 ASSERT(cb != NULL);
515
516 gethrestime(&tv);
517 tv_array[0] = tv.tv_sec;
518 tv_array[1] = tv.tv_nsec;
519
520 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
521 if (error) {
522 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
523 goto out;
524 }
525
526 eid = atomic_inc_64_nv(&zevent_eid);
527 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
528 if (error) {
529 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
530 goto out;
531 }
532
533 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
534 if (error) {
535 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
536 goto out;
537 }
538
539 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
540 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
541 error = EOVERFLOW;
542 goto out;
543 }
544
545 if (zfs_zevent_console)
546 fm_nvprint(nvl);
547
548 ev = zfs_zevent_alloc();
549 if (ev == NULL) {
550 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
551 error = ENOMEM;
552 goto out;
553 }
554
555 ev->ev_nvl = nvl;
556 ev->ev_detector = detector;
557 ev->ev_cb = cb;
558 ev->ev_eid = eid;
559
560 mutex_enter(&zevent_lock);
561 zfs_zevent_insert(ev);
562 cv_broadcast(&zevent_cv);
563 mutex_exit(&zevent_lock);
564
565 out:
566 if (error)
567 cb(nvl, detector);
568
569 return (error);
570 }
571
572 void
573 zfs_zevent_track_duplicate(void)
574 {
575 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
576 }
577
578 static int
579 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
580 {
581 *ze = zfsdev_get_state(minor, ZST_ZEVENT);
582 if (*ze == NULL)
583 return (SET_ERROR(EBADF));
584
585 return (0);
586 }
587
588 int
589 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
590 {
591 int error;
592
593 error = zfsdev_getminor(fd, minorp);
594 if (error == 0)
595 error = zfs_zevent_minor_to_state(*minorp, ze);
596
597 if (error)
598 zfs_zevent_fd_rele(fd);
599
600 return (error);
601 }
602
603 void
604 zfs_zevent_fd_rele(int fd)
605 {
606 zfs_file_put(fd);
607 }
608
609 /*
610 * Get the next zevent in the stream and place a copy in 'event'. This
611 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
612 * 'event_size'. In this case the stream pointer is not advanced and
613 * and 'event_size' is set to the minimum required buffer size.
614 */
615 int
616 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
617 uint64_t *dropped)
618 {
619 zevent_t *ev;
620 size_t size;
621 int error = 0;
622
623 mutex_enter(&zevent_lock);
624 if (ze->ze_zevent == NULL) {
625 /* New stream start at the beginning/tail */
626 ev = list_tail(&zevent_list);
627 if (ev == NULL) {
628 error = ENOENT;
629 goto out;
630 }
631 } else {
632 /*
633 * Existing stream continue with the next element and remove
634 * ourselves from the wait queue for the previous element
635 */
636 ev = list_prev(&zevent_list, ze->ze_zevent);
637 if (ev == NULL) {
638 error = ENOENT;
639 goto out;
640 }
641 }
642
643 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
644 if (size > *event_size) {
645 *event_size = size;
646 error = ENOMEM;
647 goto out;
648 }
649
650 if (ze->ze_zevent)
651 list_remove(&ze->ze_zevent->ev_ze_list, ze);
652
653 ze->ze_zevent = ev;
654 list_insert_head(&ev->ev_ze_list, ze);
655 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
656 *dropped = ze->ze_dropped;
657
658 #ifdef _KERNEL
659 /* Include events dropped due to rate limiting */
660 *dropped += atomic_swap_64(&ratelimit_dropped, 0);
661 #endif
662 ze->ze_dropped = 0;
663 out:
664 mutex_exit(&zevent_lock);
665
666 return (error);
667 }
668
669 /*
670 * Wait in an interruptible state for any new events.
671 */
672 int
673 zfs_zevent_wait(zfs_zevent_t *ze)
674 {
675 int error = EAGAIN;
676
677 mutex_enter(&zevent_lock);
678 zevent_waiters++;
679
680 while (error == EAGAIN) {
681 if (zevent_flags & ZEVENT_SHUTDOWN) {
682 error = SET_ERROR(ESHUTDOWN);
683 break;
684 }
685
686 error = cv_wait_sig(&zevent_cv, &zevent_lock);
687 if (signal_pending(current)) {
688 error = SET_ERROR(EINTR);
689 break;
690 } else if (!list_is_empty(&zevent_list)) {
691 error = 0;
692 continue;
693 } else {
694 error = EAGAIN;
695 }
696 }
697
698 zevent_waiters--;
699 mutex_exit(&zevent_lock);
700
701 return (error);
702 }
703
704 /*
705 * The caller may seek to a specific EID by passing that EID. If the EID
706 * is still available in the posted list of events the cursor is positioned
707 * there. Otherwise ENOENT is returned and the cursor is not moved.
708 *
709 * There are two reserved EIDs which may be passed and will never fail.
710 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
711 * ZEVENT_SEEK_END positions the cursor at the end of the list.
712 */
713 int
714 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
715 {
716 zevent_t *ev;
717 int error = 0;
718
719 mutex_enter(&zevent_lock);
720
721 if (eid == ZEVENT_SEEK_START) {
722 if (ze->ze_zevent)
723 list_remove(&ze->ze_zevent->ev_ze_list, ze);
724
725 ze->ze_zevent = NULL;
726 goto out;
727 }
728
729 if (eid == ZEVENT_SEEK_END) {
730 if (ze->ze_zevent)
731 list_remove(&ze->ze_zevent->ev_ze_list, ze);
732
733 ev = list_head(&zevent_list);
734 if (ev) {
735 ze->ze_zevent = ev;
736 list_insert_head(&ev->ev_ze_list, ze);
737 } else {
738 ze->ze_zevent = NULL;
739 }
740
741 goto out;
742 }
743
744 for (ev = list_tail(&zevent_list); ev != NULL;
745 ev = list_prev(&zevent_list, ev)) {
746 if (ev->ev_eid == eid) {
747 if (ze->ze_zevent)
748 list_remove(&ze->ze_zevent->ev_ze_list, ze);
749
750 ze->ze_zevent = ev;
751 list_insert_head(&ev->ev_ze_list, ze);
752 break;
753 }
754 }
755
756 if (ev == NULL)
757 error = ENOENT;
758
759 out:
760 mutex_exit(&zevent_lock);
761
762 return (error);
763 }
764
765 void
766 zfs_zevent_init(zfs_zevent_t **zep)
767 {
768 zfs_zevent_t *ze;
769
770 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
771 list_link_init(&ze->ze_node);
772 }
773
774 void
775 zfs_zevent_destroy(zfs_zevent_t *ze)
776 {
777 mutex_enter(&zevent_lock);
778 if (ze->ze_zevent)
779 list_remove(&ze->ze_zevent->ev_ze_list, ze);
780 mutex_exit(&zevent_lock);
781
782 kmem_free(ze, sizeof (zfs_zevent_t));
783 }
784 #endif /* _KERNEL */
785
786 /*
787 * Wrappers for FM nvlist allocators
788 */
789 /* ARGSUSED */
790 static void *
791 i_fm_alloc(nv_alloc_t *nva, size_t size)
792 {
793 return (kmem_zalloc(size, KM_SLEEP));
794 }
795
796 /* ARGSUSED */
797 static void
798 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
799 {
800 kmem_free(buf, size);
801 }
802
803 const nv_alloc_ops_t fm_mem_alloc_ops = {
804 .nv_ao_init = NULL,
805 .nv_ao_fini = NULL,
806 .nv_ao_alloc = i_fm_alloc,
807 .nv_ao_free = i_fm_free,
808 .nv_ao_reset = NULL
809 };
810
811 /*
812 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
813 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
814 * is returned to indicate that the nv_alloc structure could not be created.
815 */
816 nv_alloc_t *
817 fm_nva_xcreate(char *buf, size_t bufsz)
818 {
819 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
820
821 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
822 kmem_free(nvhdl, sizeof (nv_alloc_t));
823 return (NULL);
824 }
825
826 return (nvhdl);
827 }
828
829 /*
830 * Destroy a previously allocated nv_alloc structure. The fixed buffer
831 * associated with nva must be freed by the caller.
832 */
833 void
834 fm_nva_xdestroy(nv_alloc_t *nva)
835 {
836 nv_alloc_fini(nva);
837 kmem_free(nva, sizeof (nv_alloc_t));
838 }
839
840 /*
841 * Create a new nv list. A pointer to a new nv list structure is returned
842 * upon success or NULL is returned to indicate that the structure could
843 * not be created. The newly created nv list is created and managed by the
844 * operations installed in nva. If nva is NULL, the default FMA nva
845 * operations are installed and used.
846 *
847 * When called from the kernel and nva == NULL, this function must be called
848 * from passive kernel context with no locks held that can prevent a
849 * sleeping memory allocation from occurring. Otherwise, this function may
850 * be called from other kernel contexts as long a valid nva created via
851 * fm_nva_create() is supplied.
852 */
853 nvlist_t *
854 fm_nvlist_create(nv_alloc_t *nva)
855 {
856 int hdl_alloced = 0;
857 nvlist_t *nvl;
858 nv_alloc_t *nvhdl;
859
860 if (nva == NULL) {
861 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
862
863 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
864 kmem_free(nvhdl, sizeof (nv_alloc_t));
865 return (NULL);
866 }
867 hdl_alloced = 1;
868 } else {
869 nvhdl = nva;
870 }
871
872 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
873 if (hdl_alloced) {
874 nv_alloc_fini(nvhdl);
875 kmem_free(nvhdl, sizeof (nv_alloc_t));
876 }
877 return (NULL);
878 }
879
880 return (nvl);
881 }
882
883 /*
884 * Destroy a previously allocated nvlist structure. flag indicates whether
885 * or not the associated nva structure should be freed (FM_NVA_FREE) or
886 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
887 * it to be re-used for future nvlist creation operations.
888 */
889 void
890 fm_nvlist_destroy(nvlist_t *nvl, int flag)
891 {
892 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
893
894 nvlist_free(nvl);
895
896 if (nva != NULL) {
897 if (flag == FM_NVA_FREE)
898 fm_nva_xdestroy(nva);
899 }
900 }
901
902 int
903 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
904 {
905 int nelem, ret = 0;
906 data_type_t type;
907
908 while (ret == 0 && name != NULL) {
909 type = va_arg(ap, data_type_t);
910 switch (type) {
911 case DATA_TYPE_BYTE:
912 ret = nvlist_add_byte(payload, name,
913 va_arg(ap, uint_t));
914 break;
915 case DATA_TYPE_BYTE_ARRAY:
916 nelem = va_arg(ap, int);
917 ret = nvlist_add_byte_array(payload, name,
918 va_arg(ap, uchar_t *), nelem);
919 break;
920 case DATA_TYPE_BOOLEAN_VALUE:
921 ret = nvlist_add_boolean_value(payload, name,
922 va_arg(ap, boolean_t));
923 break;
924 case DATA_TYPE_BOOLEAN_ARRAY:
925 nelem = va_arg(ap, int);
926 ret = nvlist_add_boolean_array(payload, name,
927 va_arg(ap, boolean_t *), nelem);
928 break;
929 case DATA_TYPE_INT8:
930 ret = nvlist_add_int8(payload, name,
931 va_arg(ap, int));
932 break;
933 case DATA_TYPE_INT8_ARRAY:
934 nelem = va_arg(ap, int);
935 ret = nvlist_add_int8_array(payload, name,
936 va_arg(ap, int8_t *), nelem);
937 break;
938 case DATA_TYPE_UINT8:
939 ret = nvlist_add_uint8(payload, name,
940 va_arg(ap, uint_t));
941 break;
942 case DATA_TYPE_UINT8_ARRAY:
943 nelem = va_arg(ap, int);
944 ret = nvlist_add_uint8_array(payload, name,
945 va_arg(ap, uint8_t *), nelem);
946 break;
947 case DATA_TYPE_INT16:
948 ret = nvlist_add_int16(payload, name,
949 va_arg(ap, int));
950 break;
951 case DATA_TYPE_INT16_ARRAY:
952 nelem = va_arg(ap, int);
953 ret = nvlist_add_int16_array(payload, name,
954 va_arg(ap, int16_t *), nelem);
955 break;
956 case DATA_TYPE_UINT16:
957 ret = nvlist_add_uint16(payload, name,
958 va_arg(ap, uint_t));
959 break;
960 case DATA_TYPE_UINT16_ARRAY:
961 nelem = va_arg(ap, int);
962 ret = nvlist_add_uint16_array(payload, name,
963 va_arg(ap, uint16_t *), nelem);
964 break;
965 case DATA_TYPE_INT32:
966 ret = nvlist_add_int32(payload, name,
967 va_arg(ap, int32_t));
968 break;
969 case DATA_TYPE_INT32_ARRAY:
970 nelem = va_arg(ap, int);
971 ret = nvlist_add_int32_array(payload, name,
972 va_arg(ap, int32_t *), nelem);
973 break;
974 case DATA_TYPE_UINT32:
975 ret = nvlist_add_uint32(payload, name,
976 va_arg(ap, uint32_t));
977 break;
978 case DATA_TYPE_UINT32_ARRAY:
979 nelem = va_arg(ap, int);
980 ret = nvlist_add_uint32_array(payload, name,
981 va_arg(ap, uint32_t *), nelem);
982 break;
983 case DATA_TYPE_INT64:
984 ret = nvlist_add_int64(payload, name,
985 va_arg(ap, int64_t));
986 break;
987 case DATA_TYPE_INT64_ARRAY:
988 nelem = va_arg(ap, int);
989 ret = nvlist_add_int64_array(payload, name,
990 va_arg(ap, int64_t *), nelem);
991 break;
992 case DATA_TYPE_UINT64:
993 ret = nvlist_add_uint64(payload, name,
994 va_arg(ap, uint64_t));
995 break;
996 case DATA_TYPE_UINT64_ARRAY:
997 nelem = va_arg(ap, int);
998 ret = nvlist_add_uint64_array(payload, name,
999 va_arg(ap, uint64_t *), nelem);
1000 break;
1001 case DATA_TYPE_STRING:
1002 ret = nvlist_add_string(payload, name,
1003 va_arg(ap, char *));
1004 break;
1005 case DATA_TYPE_STRING_ARRAY:
1006 nelem = va_arg(ap, int);
1007 ret = nvlist_add_string_array(payload, name,
1008 va_arg(ap, char **), nelem);
1009 break;
1010 case DATA_TYPE_NVLIST:
1011 ret = nvlist_add_nvlist(payload, name,
1012 va_arg(ap, nvlist_t *));
1013 break;
1014 case DATA_TYPE_NVLIST_ARRAY:
1015 nelem = va_arg(ap, int);
1016 ret = nvlist_add_nvlist_array(payload, name,
1017 va_arg(ap, nvlist_t **), nelem);
1018 break;
1019 default:
1020 ret = EINVAL;
1021 }
1022
1023 name = va_arg(ap, char *);
1024 }
1025 return (ret);
1026 }
1027
1028 void
1029 fm_payload_set(nvlist_t *payload, ...)
1030 {
1031 int ret;
1032 const char *name;
1033 va_list ap;
1034
1035 va_start(ap, payload);
1036 name = va_arg(ap, char *);
1037 ret = i_fm_payload_set(payload, name, ap);
1038 va_end(ap);
1039
1040 if (ret)
1041 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
1042 }
1043
1044 /*
1045 * Set-up and validate the members of an ereport event according to:
1046 *
1047 * Member name Type Value
1048 * ====================================================
1049 * class string ereport
1050 * version uint8_t 0
1051 * ena uint64_t <ena>
1052 * detector nvlist_t <detector>
1053 * ereport-payload nvlist_t <var args>
1054 *
1055 * We don't actually add a 'version' member to the payload. Really,
1056 * the version quoted to us by our caller is that of the category 1
1057 * "ereport" event class (and we require FM_EREPORT_VERS0) but
1058 * the payload version of the actual leaf class event under construction
1059 * may be something else. Callers should supply a version in the varargs,
1060 * or (better) we could take two version arguments - one for the
1061 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
1062 * for the leaf class.
1063 */
1064 void
1065 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
1066 uint64_t ena, const nvlist_t *detector, ...)
1067 {
1068 char ereport_class[FM_MAX_CLASS];
1069 const char *name;
1070 va_list ap;
1071 int ret;
1072
1073 if (version != FM_EREPORT_VERS0) {
1074 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1075 return;
1076 }
1077
1078 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
1079 FM_EREPORT_CLASS, erpt_class);
1080 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
1081 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1082 return;
1083 }
1084
1085 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
1086 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1087 }
1088
1089 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
1090 (nvlist_t *)detector) != 0) {
1091 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1092 }
1093
1094 va_start(ap, detector);
1095 name = va_arg(ap, const char *);
1096 ret = i_fm_payload_set(ereport, name, ap);
1097 va_end(ap);
1098
1099 if (ret)
1100 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1101 }
1102
1103 /*
1104 * Set-up and validate the members of an hc fmri according to;
1105 *
1106 * Member name Type Value
1107 * ===================================================
1108 * version uint8_t 0
1109 * auth nvlist_t <auth>
1110 * hc-name string <name>
1111 * hc-id string <id>
1112 *
1113 * Note that auth and hc-id are optional members.
1114 */
1115
1116 #define HC_MAXPAIRS 20
1117 #define HC_MAXNAMELEN 50
1118
1119 static int
1120 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
1121 {
1122 if (version != FM_HC_SCHEME_VERSION) {
1123 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1124 return (0);
1125 }
1126
1127 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
1128 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
1129 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1130 return (0);
1131 }
1132
1133 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1134 (nvlist_t *)auth) != 0) {
1135 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1136 return (0);
1137 }
1138
1139 return (1);
1140 }
1141
1142 void
1143 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1144 nvlist_t *snvl, int npairs, ...)
1145 {
1146 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1147 nvlist_t *pairs[HC_MAXPAIRS];
1148 va_list ap;
1149 int i;
1150
1151 if (!fm_fmri_hc_set_common(fmri, version, auth))
1152 return;
1153
1154 npairs = MIN(npairs, HC_MAXPAIRS);
1155
1156 va_start(ap, npairs);
1157 for (i = 0; i < npairs; i++) {
1158 const char *name = va_arg(ap, const char *);
1159 uint32_t id = va_arg(ap, uint32_t);
1160 char idstr[11];
1161
1162 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1163
1164 pairs[i] = fm_nvlist_create(nva);
1165 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1166 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1167 atomic_inc_64(
1168 &erpt_kstat_data.fmri_set_failed.value.ui64);
1169 }
1170 }
1171 va_end(ap);
1172
1173 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
1174 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1175
1176 for (i = 0; i < npairs; i++)
1177 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1178
1179 if (snvl != NULL) {
1180 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1181 atomic_inc_64(
1182 &erpt_kstat_data.fmri_set_failed.value.ui64);
1183 }
1184 }
1185 }
1186
1187 void
1188 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
1189 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
1190 {
1191 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1192 nvlist_t *pairs[HC_MAXPAIRS];
1193 nvlist_t **hcl;
1194 uint_t n;
1195 int i, j;
1196 va_list ap;
1197 char *hcname, *hcid;
1198
1199 if (!fm_fmri_hc_set_common(fmri, version, auth))
1200 return;
1201
1202 /*
1203 * copy the bboard nvpairs to the pairs array
1204 */
1205 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
1206 != 0) {
1207 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1208 return;
1209 }
1210
1211 for (i = 0; i < n; i++) {
1212 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
1213 &hcname) != 0) {
1214 atomic_inc_64(
1215 &erpt_kstat_data.fmri_set_failed.value.ui64);
1216 return;
1217 }
1218 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
1219 atomic_inc_64(
1220 &erpt_kstat_data.fmri_set_failed.value.ui64);
1221 return;
1222 }
1223
1224 pairs[i] = fm_nvlist_create(nva);
1225 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
1226 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
1227 for (j = 0; j <= i; j++) {
1228 if (pairs[j] != NULL)
1229 fm_nvlist_destroy(pairs[j],
1230 FM_NVA_RETAIN);
1231 }
1232 atomic_inc_64(
1233 &erpt_kstat_data.fmri_set_failed.value.ui64);
1234 return;
1235 }
1236 }
1237
1238 /*
1239 * create the pairs from passed in pairs
1240 */
1241 npairs = MIN(npairs, HC_MAXPAIRS);
1242
1243 va_start(ap, npairs);
1244 for (i = n; i < npairs + n; i++) {
1245 const char *name = va_arg(ap, const char *);
1246 uint32_t id = va_arg(ap, uint32_t);
1247 char idstr[11];
1248 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1249 pairs[i] = fm_nvlist_create(nva);
1250 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1251 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1252 for (j = 0; j <= i; j++) {
1253 if (pairs[j] != NULL)
1254 fm_nvlist_destroy(pairs[j],
1255 FM_NVA_RETAIN);
1256 }
1257 atomic_inc_64(
1258 &erpt_kstat_data.fmri_set_failed.value.ui64);
1259 return;
1260 }
1261 }
1262 va_end(ap);
1263
1264 /*
1265 * Create the fmri hc list
1266 */
1267 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
1268 npairs + n) != 0) {
1269 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1270 return;
1271 }
1272
1273 for (i = 0; i < npairs + n; i++) {
1274 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1275 }
1276
1277 if (snvl != NULL) {
1278 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1279 atomic_inc_64(
1280 &erpt_kstat_data.fmri_set_failed.value.ui64);
1281 return;
1282 }
1283 }
1284 }
1285
1286 /*
1287 * Set-up and validate the members of an dev fmri according to:
1288 *
1289 * Member name Type Value
1290 * ====================================================
1291 * version uint8_t 0
1292 * auth nvlist_t <auth>
1293 * devpath string <devpath>
1294 * [devid] string <devid>
1295 * [target-port-l0id] string <target-port-lun0-id>
1296 *
1297 * Note that auth and devid are optional members.
1298 */
1299 void
1300 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1301 const char *devpath, const char *devid, const char *tpl0)
1302 {
1303 int err = 0;
1304
1305 if (version != DEV_SCHEME_VERSION0) {
1306 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1307 return;
1308 }
1309
1310 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1311 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1312
1313 if (auth != NULL) {
1314 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1315 (nvlist_t *)auth);
1316 }
1317
1318 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1319
1320 if (devid != NULL)
1321 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1322
1323 if (tpl0 != NULL)
1324 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1325
1326 if (err)
1327 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1328
1329 }
1330
1331 /*
1332 * Set-up and validate the members of an cpu fmri according to:
1333 *
1334 * Member name Type Value
1335 * ====================================================
1336 * version uint8_t 0
1337 * auth nvlist_t <auth>
1338 * cpuid uint32_t <cpu_id>
1339 * cpumask uint8_t <cpu_mask>
1340 * serial uint64_t <serial_id>
1341 *
1342 * Note that auth, cpumask, serial are optional members.
1343 *
1344 */
1345 void
1346 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1347 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1348 {
1349 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1350
1351 if (version < CPU_SCHEME_VERSION1) {
1352 atomic_inc_64(failedp);
1353 return;
1354 }
1355
1356 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1357 atomic_inc_64(failedp);
1358 return;
1359 }
1360
1361 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1362 FM_FMRI_SCHEME_CPU) != 0) {
1363 atomic_inc_64(failedp);
1364 return;
1365 }
1366
1367 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1368 (nvlist_t *)auth) != 0)
1369 atomic_inc_64(failedp);
1370
1371 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1372 atomic_inc_64(failedp);
1373
1374 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1375 *cpu_maskp) != 0)
1376 atomic_inc_64(failedp);
1377
1378 if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1379 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1380 atomic_inc_64(failedp);
1381 }
1382
1383 /*
1384 * Set-up and validate the members of a mem according to:
1385 *
1386 * Member name Type Value
1387 * ====================================================
1388 * version uint8_t 0
1389 * auth nvlist_t <auth> [optional]
1390 * unum string <unum>
1391 * serial string <serial> [optional*]
1392 * offset uint64_t <offset> [optional]
1393 *
1394 * * serial is required if offset is present
1395 */
1396 void
1397 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1398 const char *unum, const char *serial, uint64_t offset)
1399 {
1400 if (version != MEM_SCHEME_VERSION0) {
1401 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1402 return;
1403 }
1404
1405 if (!serial && (offset != (uint64_t)-1)) {
1406 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1407 return;
1408 }
1409
1410 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1411 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1412 return;
1413 }
1414
1415 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1416 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1417 return;
1418 }
1419
1420 if (auth != NULL) {
1421 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1422 (nvlist_t *)auth) != 0) {
1423 atomic_inc_64(
1424 &erpt_kstat_data.fmri_set_failed.value.ui64);
1425 }
1426 }
1427
1428 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1429 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1430 }
1431
1432 if (serial != NULL) {
1433 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1434 (char **)&serial, 1) != 0) {
1435 atomic_inc_64(
1436 &erpt_kstat_data.fmri_set_failed.value.ui64);
1437 }
1438 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1439 FM_FMRI_MEM_OFFSET, offset) != 0) {
1440 atomic_inc_64(
1441 &erpt_kstat_data.fmri_set_failed.value.ui64);
1442 }
1443 }
1444 }
1445
1446 void
1447 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1448 uint64_t vdev_guid)
1449 {
1450 if (version != ZFS_SCHEME_VERSION0) {
1451 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1452 return;
1453 }
1454
1455 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1456 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1457 return;
1458 }
1459
1460 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1461 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1462 return;
1463 }
1464
1465 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1466 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1467 }
1468
1469 if (vdev_guid != 0) {
1470 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1471 atomic_inc_64(
1472 &erpt_kstat_data.fmri_set_failed.value.ui64);
1473 }
1474 }
1475 }
1476
1477 uint64_t
1478 fm_ena_increment(uint64_t ena)
1479 {
1480 uint64_t new_ena;
1481
1482 switch (ENA_FORMAT(ena)) {
1483 case FM_ENA_FMT1:
1484 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1485 break;
1486 case FM_ENA_FMT2:
1487 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1488 break;
1489 default:
1490 new_ena = 0;
1491 }
1492
1493 return (new_ena);
1494 }
1495
1496 uint64_t
1497 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1498 {
1499 uint64_t ena = 0;
1500
1501 switch (format) {
1502 case FM_ENA_FMT1:
1503 if (timestamp) {
1504 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1505 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1506 ENA_FMT1_CPUID_MASK) |
1507 ((timestamp << ENA_FMT1_TIME_SHFT) &
1508 ENA_FMT1_TIME_MASK));
1509 } else {
1510 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1511 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1512 ENA_FMT1_CPUID_MASK) |
1513 ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1514 ENA_FMT1_TIME_MASK));
1515 }
1516 break;
1517 case FM_ENA_FMT2:
1518 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1519 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1520 break;
1521 default:
1522 break;
1523 }
1524
1525 return (ena);
1526 }
1527
1528 uint64_t
1529 fm_ena_generate(uint64_t timestamp, uchar_t format)
1530 {
1531 uint64_t ena;
1532
1533 kpreempt_disable();
1534 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1535 kpreempt_enable();
1536
1537 return (ena);
1538 }
1539
1540 uint64_t
1541 fm_ena_generation_get(uint64_t ena)
1542 {
1543 uint64_t gen;
1544
1545 switch (ENA_FORMAT(ena)) {
1546 case FM_ENA_FMT1:
1547 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1548 break;
1549 case FM_ENA_FMT2:
1550 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1551 break;
1552 default:
1553 gen = 0;
1554 break;
1555 }
1556
1557 return (gen);
1558 }
1559
1560 uchar_t
1561 fm_ena_format_get(uint64_t ena)
1562 {
1563
1564 return (ENA_FORMAT(ena));
1565 }
1566
1567 uint64_t
1568 fm_ena_id_get(uint64_t ena)
1569 {
1570 uint64_t id;
1571
1572 switch (ENA_FORMAT(ena)) {
1573 case FM_ENA_FMT1:
1574 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1575 break;
1576 case FM_ENA_FMT2:
1577 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1578 break;
1579 default:
1580 id = 0;
1581 }
1582
1583 return (id);
1584 }
1585
1586 uint64_t
1587 fm_ena_time_get(uint64_t ena)
1588 {
1589 uint64_t time;
1590
1591 switch (ENA_FORMAT(ena)) {
1592 case FM_ENA_FMT1:
1593 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1594 break;
1595 case FM_ENA_FMT2:
1596 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1597 break;
1598 default:
1599 time = 0;
1600 }
1601
1602 return (time);
1603 }
1604
1605 #ifdef _KERNEL
1606 /*
1607 * Helper function to increment ereport dropped count. Used by the event
1608 * rate limiting code to give feedback to the user about how many events were
1609 * rate limited by including them in the 'dropped' count.
1610 */
1611 void
1612 fm_erpt_dropped_increment(void)
1613 {
1614 atomic_inc_64(&ratelimit_dropped);
1615 }
1616
1617 void
1618 fm_init(void)
1619 {
1620 zevent_len_cur = 0;
1621 zevent_flags = 0;
1622
1623 /* Initialize zevent allocation and generation kstats */
1624 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1625 sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1626 KSTAT_FLAG_VIRTUAL);
1627
1628 if (fm_ksp != NULL) {
1629 fm_ksp->ks_data = &erpt_kstat_data;
1630 kstat_install(fm_ksp);
1631 } else {
1632 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1633 }
1634
1635 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1636 list_create(&zevent_list, sizeof (zevent_t),
1637 offsetof(zevent_t, ev_node));
1638 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1639
1640 zfs_ereport_init();
1641 }
1642
1643 void
1644 fm_fini(void)
1645 {
1646 int count;
1647
1648 zfs_ereport_fini();
1649
1650 zfs_zevent_drain_all(&count);
1651
1652 mutex_enter(&zevent_lock);
1653 cv_broadcast(&zevent_cv);
1654
1655 zevent_flags |= ZEVENT_SHUTDOWN;
1656 while (zevent_waiters > 0) {
1657 mutex_exit(&zevent_lock);
1658 schedule();
1659 mutex_enter(&zevent_lock);
1660 }
1661 mutex_exit(&zevent_lock);
1662
1663 cv_destroy(&zevent_cv);
1664 list_destroy(&zevent_list);
1665 mutex_destroy(&zevent_lock);
1666
1667 if (fm_ksp != NULL) {
1668 kstat_delete(fm_ksp);
1669 fm_ksp = NULL;
1670 }
1671 }
1672 #endif /* _KERNEL */
1673
1674 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW,
1675 "Max event queue length");
1676
1677 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, cols, INT, ZMOD_RW,
1678 "Max event column width");
1679
1680 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, console, INT, ZMOD_RW,
1681 "Log events to the console");