]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/fm.c
Fix zio->io_priority failed (7 < 6) assert
[mirror_zfs.git] / module / zfs / fm.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Fault Management Architecture (FMA) Resource and Protocol Support
27 *
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30 *
31 * Name-Value Pair Lists
32 *
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist construtor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
39 *
40 * Protocol Event and FMRI Construction
41 *
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45 *
46 * ENA Manipulation
47 *
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
52 */
53
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
66 #ifdef _KERNEL
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/console.h>
70 #include <sys/kobj.h>
71 #include <sys/time.h>
72 #include <sys/zfs_ioctl.h>
73
74 int zfs_zevent_len_max = 0;
75 int zfs_zevent_cols = 80;
76 int zfs_zevent_console = 0;
77
78 static int zevent_len_cur = 0;
79 static int zevent_waiters = 0;
80 static int zevent_flags = 0;
81
82 /* Num events rate limited since the last time zfs_zevent_next() was called */
83 static uint64_t ratelimit_dropped = 0;
84
85 /*
86 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
87 * posted. The posted EIDs are monotonically increasing but not persistent.
88 * They will be reset to the initial value (1) each time the kernel module is
89 * loaded.
90 */
91 static uint64_t zevent_eid = 0;
92
93 static kmutex_t zevent_lock;
94 static list_t zevent_list;
95 static kcondvar_t zevent_cv;
96 #endif /* _KERNEL */
97
98
99 /*
100 * Common fault management kstats to record event generation failures
101 */
102
103 struct erpt_kstat {
104 kstat_named_t erpt_dropped; /* num erpts dropped on post */
105 kstat_named_t erpt_set_failed; /* num erpt set failures */
106 kstat_named_t fmri_set_failed; /* num fmri set failures */
107 kstat_named_t payload_set_failed; /* num payload set failures */
108 };
109
110 static struct erpt_kstat erpt_kstat_data = {
111 { "erpt-dropped", KSTAT_DATA_UINT64 },
112 { "erpt-set-failed", KSTAT_DATA_UINT64 },
113 { "fmri-set-failed", KSTAT_DATA_UINT64 },
114 { "payload-set-failed", KSTAT_DATA_UINT64 }
115 };
116
117 kstat_t *fm_ksp;
118
119 #ifdef _KERNEL
120
121 /*
122 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
123 * output so they aren't split across console lines, and return the end column.
124 */
125 /*PRINTFLIKE4*/
126 static int
127 fm_printf(int depth, int c, int cols, const char *format, ...)
128 {
129 va_list ap;
130 int width;
131 char c1;
132
133 va_start(ap, format);
134 width = vsnprintf(&c1, sizeof (c1), format, ap);
135 va_end(ap);
136
137 if (c + width >= cols) {
138 console_printf("\n");
139 c = 0;
140 if (format[0] != ' ' && depth > 0) {
141 console_printf(" ");
142 c++;
143 }
144 }
145
146 va_start(ap, format);
147 console_vprintf(format, ap);
148 va_end(ap);
149
150 return ((c + width) % cols);
151 }
152
153 /*
154 * Recursively print an nvlist in the specified column width and return the
155 * column we end up in. This function is called recursively by fm_nvprint(),
156 * below. We generically format the entire nvpair using hexadecimal
157 * integers and strings, and elide any integer arrays. Arrays are basically
158 * used for cache dumps right now, so we suppress them so as not to overwhelm
159 * the amount of console output we produce at panic time. This can be further
160 * enhanced as FMA technology grows based upon the needs of consumers. All
161 * FMA telemetry is logged using the dump device transport, so the console
162 * output serves only as a fallback in case this procedure is unsuccessful.
163 */
164 static int
165 fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
166 {
167 nvpair_t *nvp;
168
169 for (nvp = nvlist_next_nvpair(nvl, NULL);
170 nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
171
172 data_type_t type = nvpair_type(nvp);
173 const char *name = nvpair_name(nvp);
174
175 boolean_t b;
176 uint8_t i8;
177 uint16_t i16;
178 uint32_t i32;
179 uint64_t i64;
180 char *str;
181 nvlist_t *cnv;
182
183 if (strcmp(name, FM_CLASS) == 0)
184 continue; /* already printed by caller */
185
186 c = fm_printf(d, c, cols, " %s=", name);
187
188 switch (type) {
189 case DATA_TYPE_BOOLEAN:
190 c = fm_printf(d + 1, c, cols, " 1");
191 break;
192
193 case DATA_TYPE_BOOLEAN_VALUE:
194 (void) nvpair_value_boolean_value(nvp, &b);
195 c = fm_printf(d + 1, c, cols, b ? "1" : "0");
196 break;
197
198 case DATA_TYPE_BYTE:
199 (void) nvpair_value_byte(nvp, &i8);
200 c = fm_printf(d + 1, c, cols, "0x%x", i8);
201 break;
202
203 case DATA_TYPE_INT8:
204 (void) nvpair_value_int8(nvp, (void *)&i8);
205 c = fm_printf(d + 1, c, cols, "0x%x", i8);
206 break;
207
208 case DATA_TYPE_UINT8:
209 (void) nvpair_value_uint8(nvp, &i8);
210 c = fm_printf(d + 1, c, cols, "0x%x", i8);
211 break;
212
213 case DATA_TYPE_INT16:
214 (void) nvpair_value_int16(nvp, (void *)&i16);
215 c = fm_printf(d + 1, c, cols, "0x%x", i16);
216 break;
217
218 case DATA_TYPE_UINT16:
219 (void) nvpair_value_uint16(nvp, &i16);
220 c = fm_printf(d + 1, c, cols, "0x%x", i16);
221 break;
222
223 case DATA_TYPE_INT32:
224 (void) nvpair_value_int32(nvp, (void *)&i32);
225 c = fm_printf(d + 1, c, cols, "0x%x", i32);
226 break;
227
228 case DATA_TYPE_UINT32:
229 (void) nvpair_value_uint32(nvp, &i32);
230 c = fm_printf(d + 1, c, cols, "0x%x", i32);
231 break;
232
233 case DATA_TYPE_INT64:
234 (void) nvpair_value_int64(nvp, (void *)&i64);
235 c = fm_printf(d + 1, c, cols, "0x%llx",
236 (u_longlong_t)i64);
237 break;
238
239 case DATA_TYPE_UINT64:
240 (void) nvpair_value_uint64(nvp, &i64);
241 c = fm_printf(d + 1, c, cols, "0x%llx",
242 (u_longlong_t)i64);
243 break;
244
245 case DATA_TYPE_HRTIME:
246 (void) nvpair_value_hrtime(nvp, (void *)&i64);
247 c = fm_printf(d + 1, c, cols, "0x%llx",
248 (u_longlong_t)i64);
249 break;
250
251 case DATA_TYPE_STRING:
252 (void) nvpair_value_string(nvp, &str);
253 c = fm_printf(d + 1, c, cols, "\"%s\"",
254 str ? str : "<NULL>");
255 break;
256
257 case DATA_TYPE_NVLIST:
258 c = fm_printf(d + 1, c, cols, "[");
259 (void) nvpair_value_nvlist(nvp, &cnv);
260 c = fm_nvprintr(cnv, d + 1, c, cols);
261 c = fm_printf(d + 1, c, cols, " ]");
262 break;
263
264 case DATA_TYPE_NVLIST_ARRAY: {
265 nvlist_t **val;
266 uint_t i, nelem;
267
268 c = fm_printf(d + 1, c, cols, "[");
269 (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
270 for (i = 0; i < nelem; i++) {
271 c = fm_nvprintr(val[i], d + 1, c, cols);
272 }
273 c = fm_printf(d + 1, c, cols, " ]");
274 }
275 break;
276
277 case DATA_TYPE_INT8_ARRAY: {
278 int8_t *val;
279 uint_t i, nelem;
280
281 c = fm_printf(d + 1, c, cols, "[ ");
282 (void) nvpair_value_int8_array(nvp, &val, &nelem);
283 for (i = 0; i < nelem; i++)
284 c = fm_printf(d + 1, c, cols, "0x%llx ",
285 (u_longlong_t)val[i]);
286
287 c = fm_printf(d + 1, c, cols, "]");
288 break;
289 }
290
291 case DATA_TYPE_UINT8_ARRAY: {
292 uint8_t *val;
293 uint_t i, nelem;
294
295 c = fm_printf(d + 1, c, cols, "[ ");
296 (void) nvpair_value_uint8_array(nvp, &val, &nelem);
297 for (i = 0; i < nelem; i++)
298 c = fm_printf(d + 1, c, cols, "0x%llx ",
299 (u_longlong_t)val[i]);
300
301 c = fm_printf(d + 1, c, cols, "]");
302 break;
303 }
304
305 case DATA_TYPE_INT16_ARRAY: {
306 int16_t *val;
307 uint_t i, nelem;
308
309 c = fm_printf(d + 1, c, cols, "[ ");
310 (void) nvpair_value_int16_array(nvp, &val, &nelem);
311 for (i = 0; i < nelem; i++)
312 c = fm_printf(d + 1, c, cols, "0x%llx ",
313 (u_longlong_t)val[i]);
314
315 c = fm_printf(d + 1, c, cols, "]");
316 break;
317 }
318
319 case DATA_TYPE_UINT16_ARRAY: {
320 uint16_t *val;
321 uint_t i, nelem;
322
323 c = fm_printf(d + 1, c, cols, "[ ");
324 (void) nvpair_value_uint16_array(nvp, &val, &nelem);
325 for (i = 0; i < nelem; i++)
326 c = fm_printf(d + 1, c, cols, "0x%llx ",
327 (u_longlong_t)val[i]);
328
329 c = fm_printf(d + 1, c, cols, "]");
330 break;
331 }
332
333 case DATA_TYPE_INT32_ARRAY: {
334 int32_t *val;
335 uint_t i, nelem;
336
337 c = fm_printf(d + 1, c, cols, "[ ");
338 (void) nvpair_value_int32_array(nvp, &val, &nelem);
339 for (i = 0; i < nelem; i++)
340 c = fm_printf(d + 1, c, cols, "0x%llx ",
341 (u_longlong_t)val[i]);
342
343 c = fm_printf(d + 1, c, cols, "]");
344 break;
345 }
346
347 case DATA_TYPE_UINT32_ARRAY: {
348 uint32_t *val;
349 uint_t i, nelem;
350
351 c = fm_printf(d + 1, c, cols, "[ ");
352 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
353 for (i = 0; i < nelem; i++)
354 c = fm_printf(d + 1, c, cols, "0x%llx ",
355 (u_longlong_t)val[i]);
356
357 c = fm_printf(d + 1, c, cols, "]");
358 break;
359 }
360
361 case DATA_TYPE_INT64_ARRAY: {
362 int64_t *val;
363 uint_t i, nelem;
364
365 c = fm_printf(d + 1, c, cols, "[ ");
366 (void) nvpair_value_int64_array(nvp, &val, &nelem);
367 for (i = 0; i < nelem; i++)
368 c = fm_printf(d + 1, c, cols, "0x%llx ",
369 (u_longlong_t)val[i]);
370
371 c = fm_printf(d + 1, c, cols, "]");
372 break;
373 }
374
375 case DATA_TYPE_UINT64_ARRAY: {
376 uint64_t *val;
377 uint_t i, nelem;
378
379 c = fm_printf(d + 1, c, cols, "[ ");
380 (void) nvpair_value_uint64_array(nvp, &val, &nelem);
381 for (i = 0; i < nelem; i++)
382 c = fm_printf(d + 1, c, cols, "0x%llx ",
383 (u_longlong_t)val[i]);
384
385 c = fm_printf(d + 1, c, cols, "]");
386 break;
387 }
388
389 case DATA_TYPE_STRING_ARRAY:
390 case DATA_TYPE_BOOLEAN_ARRAY:
391 case DATA_TYPE_BYTE_ARRAY:
392 c = fm_printf(d + 1, c, cols, "[...]");
393 break;
394
395 case DATA_TYPE_UNKNOWN:
396 c = fm_printf(d + 1, c, cols, "<unknown>");
397 break;
398 }
399 }
400
401 return (c);
402 }
403
404 void
405 fm_nvprint(nvlist_t *nvl)
406 {
407 char *class;
408 int c = 0;
409
410 console_printf("\n");
411
412 if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
413 c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
414
415 if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
416 console_printf("\n");
417
418 console_printf("\n");
419 }
420
421 static zevent_t *
422 zfs_zevent_alloc(void)
423 {
424 zevent_t *ev;
425
426 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
427
428 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
429 offsetof(zfs_zevent_t, ze_node));
430 list_link_init(&ev->ev_node);
431
432 return (ev);
433 }
434
435 static void
436 zfs_zevent_free(zevent_t *ev)
437 {
438 /* Run provided cleanup callback */
439 ev->ev_cb(ev->ev_nvl, ev->ev_detector);
440
441 list_destroy(&ev->ev_ze_list);
442 kmem_free(ev, sizeof (zevent_t));
443 }
444
445 static void
446 zfs_zevent_drain(zevent_t *ev)
447 {
448 zfs_zevent_t *ze;
449
450 ASSERT(MUTEX_HELD(&zevent_lock));
451 list_remove(&zevent_list, ev);
452
453 /* Remove references to this event in all private file data */
454 while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
455 list_remove(&ev->ev_ze_list, ze);
456 ze->ze_zevent = NULL;
457 ze->ze_dropped++;
458 }
459
460 zfs_zevent_free(ev);
461 }
462
463 void
464 zfs_zevent_drain_all(int *count)
465 {
466 zevent_t *ev;
467
468 mutex_enter(&zevent_lock);
469 while ((ev = list_head(&zevent_list)) != NULL)
470 zfs_zevent_drain(ev);
471
472 *count = zevent_len_cur;
473 zevent_len_cur = 0;
474 mutex_exit(&zevent_lock);
475 }
476
477 /*
478 * New zevents are inserted at the head. If the maximum queue
479 * length is exceeded a zevent will be drained from the tail.
480 * As part of this any user space processes which currently have
481 * a reference to this zevent_t in their private data will have
482 * this reference set to NULL.
483 */
484 static void
485 zfs_zevent_insert(zevent_t *ev)
486 {
487 ASSERT(MUTEX_HELD(&zevent_lock));
488 list_insert_head(&zevent_list, ev);
489
490 if (zevent_len_cur >= zfs_zevent_len_max)
491 zfs_zevent_drain(list_tail(&zevent_list));
492 else
493 zevent_len_cur++;
494 }
495
496 /*
497 * Post a zevent. The cb will be called when nvl and detector are no longer
498 * needed, i.e.:
499 * - An error happened and a zevent can't be posted. In this case, cb is called
500 * before zfs_zevent_post() returns.
501 * - The event is being drained and freed.
502 */
503 int
504 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
505 {
506 int64_t tv_array[2];
507 timestruc_t tv;
508 uint64_t eid;
509 size_t nvl_size = 0;
510 zevent_t *ev;
511 int error;
512
513 ASSERT(cb != NULL);
514
515 gethrestime(&tv);
516 tv_array[0] = tv.tv_sec;
517 tv_array[1] = tv.tv_nsec;
518
519 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
520 if (error) {
521 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
522 goto out;
523 }
524
525 eid = atomic_inc_64_nv(&zevent_eid);
526 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
527 if (error) {
528 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
529 goto out;
530 }
531
532 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
533 if (error) {
534 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
535 goto out;
536 }
537
538 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
539 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
540 error = EOVERFLOW;
541 goto out;
542 }
543
544 if (zfs_zevent_console)
545 fm_nvprint(nvl);
546
547 ev = zfs_zevent_alloc();
548 if (ev == NULL) {
549 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
550 error = ENOMEM;
551 goto out;
552 }
553
554 ev->ev_nvl = nvl;
555 ev->ev_detector = detector;
556 ev->ev_cb = cb;
557 ev->ev_eid = eid;
558
559 mutex_enter(&zevent_lock);
560 zfs_zevent_insert(ev);
561 cv_broadcast(&zevent_cv);
562 mutex_exit(&zevent_lock);
563
564 out:
565 if (error)
566 cb(nvl, detector);
567
568 return (error);
569 }
570
571 static int
572 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
573 {
574 *ze = zfsdev_get_state(minor, ZST_ZEVENT);
575 if (*ze == NULL)
576 return (SET_ERROR(EBADF));
577
578 return (0);
579 }
580
581 int
582 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
583 {
584 file_t *fp;
585 int error;
586
587 fp = getf(fd);
588 if (fp == NULL)
589 return (SET_ERROR(EBADF));
590
591 error = zfsdev_getminor(fp->f_file, minorp);
592 if (error == 0)
593 error = zfs_zevent_minor_to_state(*minorp, ze);
594
595 if (error)
596 zfs_zevent_fd_rele(fd);
597
598 return (error);
599 }
600
601 void
602 zfs_zevent_fd_rele(int fd)
603 {
604 releasef(fd);
605 }
606
607 /*
608 * Get the next zevent in the stream and place a copy in 'event'. This
609 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
610 * 'event_size'. In this case the stream pointer is not advanced and
611 * and 'event_size' is set to the minimum required buffer size.
612 */
613 int
614 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
615 uint64_t *dropped)
616 {
617 zevent_t *ev;
618 size_t size;
619 int error = 0;
620
621 mutex_enter(&zevent_lock);
622 if (ze->ze_zevent == NULL) {
623 /* New stream start at the beginning/tail */
624 ev = list_tail(&zevent_list);
625 if (ev == NULL) {
626 error = ENOENT;
627 goto out;
628 }
629 } else {
630 /*
631 * Existing stream continue with the next element and remove
632 * ourselves from the wait queue for the previous element
633 */
634 ev = list_prev(&zevent_list, ze->ze_zevent);
635 if (ev == NULL) {
636 error = ENOENT;
637 goto out;
638 }
639 }
640
641 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
642 if (size > *event_size) {
643 *event_size = size;
644 error = ENOMEM;
645 goto out;
646 }
647
648 if (ze->ze_zevent)
649 list_remove(&ze->ze_zevent->ev_ze_list, ze);
650
651 ze->ze_zevent = ev;
652 list_insert_head(&ev->ev_ze_list, ze);
653 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
654 *dropped = ze->ze_dropped;
655
656 #ifdef _KERNEL
657 /* Include events dropped due to rate limiting */
658 *dropped += ratelimit_dropped;
659 ratelimit_dropped = 0;
660 #endif
661 ze->ze_dropped = 0;
662 out:
663 mutex_exit(&zevent_lock);
664
665 return (error);
666 }
667
668 int
669 zfs_zevent_wait(zfs_zevent_t *ze)
670 {
671 int error = 0;
672
673 mutex_enter(&zevent_lock);
674
675 if (zevent_flags & ZEVENT_SHUTDOWN) {
676 error = ESHUTDOWN;
677 goto out;
678 }
679
680 zevent_waiters++;
681 cv_wait_sig(&zevent_cv, &zevent_lock);
682 if (issig(JUSTLOOKING))
683 error = EINTR;
684
685 zevent_waiters--;
686 out:
687 mutex_exit(&zevent_lock);
688
689 return (error);
690 }
691
692 /*
693 * The caller may seek to a specific EID by passing that EID. If the EID
694 * is still available in the posted list of events the cursor is positioned
695 * there. Otherwise ENOENT is returned and the cursor is not moved.
696 *
697 * There are two reserved EIDs which may be passed and will never fail.
698 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
699 * ZEVENT_SEEK_END positions the cursor at the end of the list.
700 */
701 int
702 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
703 {
704 zevent_t *ev;
705 int error = 0;
706
707 mutex_enter(&zevent_lock);
708
709 if (eid == ZEVENT_SEEK_START) {
710 if (ze->ze_zevent)
711 list_remove(&ze->ze_zevent->ev_ze_list, ze);
712
713 ze->ze_zevent = NULL;
714 goto out;
715 }
716
717 if (eid == ZEVENT_SEEK_END) {
718 if (ze->ze_zevent)
719 list_remove(&ze->ze_zevent->ev_ze_list, ze);
720
721 ev = list_head(&zevent_list);
722 if (ev) {
723 ze->ze_zevent = ev;
724 list_insert_head(&ev->ev_ze_list, ze);
725 } else {
726 ze->ze_zevent = NULL;
727 }
728
729 goto out;
730 }
731
732 for (ev = list_tail(&zevent_list); ev != NULL;
733 ev = list_prev(&zevent_list, ev)) {
734 if (ev->ev_eid == eid) {
735 if (ze->ze_zevent)
736 list_remove(&ze->ze_zevent->ev_ze_list, ze);
737
738 ze->ze_zevent = ev;
739 list_insert_head(&ev->ev_ze_list, ze);
740 break;
741 }
742 }
743
744 if (ev == NULL)
745 error = ENOENT;
746
747 out:
748 mutex_exit(&zevent_lock);
749
750 return (error);
751 }
752
753 void
754 zfs_zevent_init(zfs_zevent_t **zep)
755 {
756 zfs_zevent_t *ze;
757
758 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
759 list_link_init(&ze->ze_node);
760 }
761
762 void
763 zfs_zevent_destroy(zfs_zevent_t *ze)
764 {
765 mutex_enter(&zevent_lock);
766 if (ze->ze_zevent)
767 list_remove(&ze->ze_zevent->ev_ze_list, ze);
768 mutex_exit(&zevent_lock);
769
770 kmem_free(ze, sizeof (zfs_zevent_t));
771 }
772 #endif /* _KERNEL */
773
774 /*
775 * Wrapppers for FM nvlist allocators
776 */
777 /* ARGSUSED */
778 static void *
779 i_fm_alloc(nv_alloc_t *nva, size_t size)
780 {
781 return (kmem_zalloc(size, KM_SLEEP));
782 }
783
784 /* ARGSUSED */
785 static void
786 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
787 {
788 kmem_free(buf, size);
789 }
790
791 const nv_alloc_ops_t fm_mem_alloc_ops = {
792 .nv_ao_init = NULL,
793 .nv_ao_fini = NULL,
794 .nv_ao_alloc = i_fm_alloc,
795 .nv_ao_free = i_fm_free,
796 .nv_ao_reset = NULL
797 };
798
799 /*
800 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
801 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
802 * is returned to indicate that the nv_alloc structure could not be created.
803 */
804 nv_alloc_t *
805 fm_nva_xcreate(char *buf, size_t bufsz)
806 {
807 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
808
809 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
810 kmem_free(nvhdl, sizeof (nv_alloc_t));
811 return (NULL);
812 }
813
814 return (nvhdl);
815 }
816
817 /*
818 * Destroy a previously allocated nv_alloc structure. The fixed buffer
819 * associated with nva must be freed by the caller.
820 */
821 void
822 fm_nva_xdestroy(nv_alloc_t *nva)
823 {
824 nv_alloc_fini(nva);
825 kmem_free(nva, sizeof (nv_alloc_t));
826 }
827
828 /*
829 * Create a new nv list. A pointer to a new nv list structure is returned
830 * upon success or NULL is returned to indicate that the structure could
831 * not be created. The newly created nv list is created and managed by the
832 * operations installed in nva. If nva is NULL, the default FMA nva
833 * operations are installed and used.
834 *
835 * When called from the kernel and nva == NULL, this function must be called
836 * from passive kernel context with no locks held that can prevent a
837 * sleeping memory allocation from occurring. Otherwise, this function may
838 * be called from other kernel contexts as long a valid nva created via
839 * fm_nva_create() is supplied.
840 */
841 nvlist_t *
842 fm_nvlist_create(nv_alloc_t *nva)
843 {
844 int hdl_alloced = 0;
845 nvlist_t *nvl;
846 nv_alloc_t *nvhdl;
847
848 if (nva == NULL) {
849 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
850
851 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
852 kmem_free(nvhdl, sizeof (nv_alloc_t));
853 return (NULL);
854 }
855 hdl_alloced = 1;
856 } else {
857 nvhdl = nva;
858 }
859
860 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
861 if (hdl_alloced) {
862 nv_alloc_fini(nvhdl);
863 kmem_free(nvhdl, sizeof (nv_alloc_t));
864 }
865 return (NULL);
866 }
867
868 return (nvl);
869 }
870
871 /*
872 * Destroy a previously allocated nvlist structure. flag indicates whether
873 * or not the associated nva structure should be freed (FM_NVA_FREE) or
874 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
875 * it to be re-used for future nvlist creation operations.
876 */
877 void
878 fm_nvlist_destroy(nvlist_t *nvl, int flag)
879 {
880 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
881
882 nvlist_free(nvl);
883
884 if (nva != NULL) {
885 if (flag == FM_NVA_FREE)
886 fm_nva_xdestroy(nva);
887 }
888 }
889
890 int
891 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
892 {
893 int nelem, ret = 0;
894 data_type_t type;
895
896 while (ret == 0 && name != NULL) {
897 type = va_arg(ap, data_type_t);
898 switch (type) {
899 case DATA_TYPE_BYTE:
900 ret = nvlist_add_byte(payload, name,
901 va_arg(ap, uint_t));
902 break;
903 case DATA_TYPE_BYTE_ARRAY:
904 nelem = va_arg(ap, int);
905 ret = nvlist_add_byte_array(payload, name,
906 va_arg(ap, uchar_t *), nelem);
907 break;
908 case DATA_TYPE_BOOLEAN_VALUE:
909 ret = nvlist_add_boolean_value(payload, name,
910 va_arg(ap, boolean_t));
911 break;
912 case DATA_TYPE_BOOLEAN_ARRAY:
913 nelem = va_arg(ap, int);
914 ret = nvlist_add_boolean_array(payload, name,
915 va_arg(ap, boolean_t *), nelem);
916 break;
917 case DATA_TYPE_INT8:
918 ret = nvlist_add_int8(payload, name,
919 va_arg(ap, int));
920 break;
921 case DATA_TYPE_INT8_ARRAY:
922 nelem = va_arg(ap, int);
923 ret = nvlist_add_int8_array(payload, name,
924 va_arg(ap, int8_t *), nelem);
925 break;
926 case DATA_TYPE_UINT8:
927 ret = nvlist_add_uint8(payload, name,
928 va_arg(ap, uint_t));
929 break;
930 case DATA_TYPE_UINT8_ARRAY:
931 nelem = va_arg(ap, int);
932 ret = nvlist_add_uint8_array(payload, name,
933 va_arg(ap, uint8_t *), nelem);
934 break;
935 case DATA_TYPE_INT16:
936 ret = nvlist_add_int16(payload, name,
937 va_arg(ap, int));
938 break;
939 case DATA_TYPE_INT16_ARRAY:
940 nelem = va_arg(ap, int);
941 ret = nvlist_add_int16_array(payload, name,
942 va_arg(ap, int16_t *), nelem);
943 break;
944 case DATA_TYPE_UINT16:
945 ret = nvlist_add_uint16(payload, name,
946 va_arg(ap, uint_t));
947 break;
948 case DATA_TYPE_UINT16_ARRAY:
949 nelem = va_arg(ap, int);
950 ret = nvlist_add_uint16_array(payload, name,
951 va_arg(ap, uint16_t *), nelem);
952 break;
953 case DATA_TYPE_INT32:
954 ret = nvlist_add_int32(payload, name,
955 va_arg(ap, int32_t));
956 break;
957 case DATA_TYPE_INT32_ARRAY:
958 nelem = va_arg(ap, int);
959 ret = nvlist_add_int32_array(payload, name,
960 va_arg(ap, int32_t *), nelem);
961 break;
962 case DATA_TYPE_UINT32:
963 ret = nvlist_add_uint32(payload, name,
964 va_arg(ap, uint32_t));
965 break;
966 case DATA_TYPE_UINT32_ARRAY:
967 nelem = va_arg(ap, int);
968 ret = nvlist_add_uint32_array(payload, name,
969 va_arg(ap, uint32_t *), nelem);
970 break;
971 case DATA_TYPE_INT64:
972 ret = nvlist_add_int64(payload, name,
973 va_arg(ap, int64_t));
974 break;
975 case DATA_TYPE_INT64_ARRAY:
976 nelem = va_arg(ap, int);
977 ret = nvlist_add_int64_array(payload, name,
978 va_arg(ap, int64_t *), nelem);
979 break;
980 case DATA_TYPE_UINT64:
981 ret = nvlist_add_uint64(payload, name,
982 va_arg(ap, uint64_t));
983 break;
984 case DATA_TYPE_UINT64_ARRAY:
985 nelem = va_arg(ap, int);
986 ret = nvlist_add_uint64_array(payload, name,
987 va_arg(ap, uint64_t *), nelem);
988 break;
989 case DATA_TYPE_STRING:
990 ret = nvlist_add_string(payload, name,
991 va_arg(ap, char *));
992 break;
993 case DATA_TYPE_STRING_ARRAY:
994 nelem = va_arg(ap, int);
995 ret = nvlist_add_string_array(payload, name,
996 va_arg(ap, char **), nelem);
997 break;
998 case DATA_TYPE_NVLIST:
999 ret = nvlist_add_nvlist(payload, name,
1000 va_arg(ap, nvlist_t *));
1001 break;
1002 case DATA_TYPE_NVLIST_ARRAY:
1003 nelem = va_arg(ap, int);
1004 ret = nvlist_add_nvlist_array(payload, name,
1005 va_arg(ap, nvlist_t **), nelem);
1006 break;
1007 default:
1008 ret = EINVAL;
1009 }
1010
1011 name = va_arg(ap, char *);
1012 }
1013 return (ret);
1014 }
1015
1016 void
1017 fm_payload_set(nvlist_t *payload, ...)
1018 {
1019 int ret;
1020 const char *name;
1021 va_list ap;
1022
1023 va_start(ap, payload);
1024 name = va_arg(ap, char *);
1025 ret = i_fm_payload_set(payload, name, ap);
1026 va_end(ap);
1027
1028 if (ret)
1029 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
1030 }
1031
1032 /*
1033 * Set-up and validate the members of an ereport event according to:
1034 *
1035 * Member name Type Value
1036 * ====================================================
1037 * class string ereport
1038 * version uint8_t 0
1039 * ena uint64_t <ena>
1040 * detector nvlist_t <detector>
1041 * ereport-payload nvlist_t <var args>
1042 *
1043 * We don't actually add a 'version' member to the payload. Really,
1044 * the version quoted to us by our caller is that of the category 1
1045 * "ereport" event class (and we require FM_EREPORT_VERS0) but
1046 * the payload version of the actual leaf class event under construction
1047 * may be something else. Callers should supply a version in the varargs,
1048 * or (better) we could take two version arguments - one for the
1049 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
1050 * for the leaf class.
1051 */
1052 void
1053 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
1054 uint64_t ena, const nvlist_t *detector, ...)
1055 {
1056 char ereport_class[FM_MAX_CLASS];
1057 const char *name;
1058 va_list ap;
1059 int ret;
1060
1061 if (version != FM_EREPORT_VERS0) {
1062 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1063 return;
1064 }
1065
1066 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
1067 FM_EREPORT_CLASS, erpt_class);
1068 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
1069 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1070 return;
1071 }
1072
1073 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
1074 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1075 }
1076
1077 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
1078 (nvlist_t *)detector) != 0) {
1079 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1080 }
1081
1082 va_start(ap, detector);
1083 name = va_arg(ap, const char *);
1084 ret = i_fm_payload_set(ereport, name, ap);
1085 va_end(ap);
1086
1087 if (ret)
1088 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1089 }
1090
1091 /*
1092 * Set-up and validate the members of an hc fmri according to;
1093 *
1094 * Member name Type Value
1095 * ===================================================
1096 * version uint8_t 0
1097 * auth nvlist_t <auth>
1098 * hc-name string <name>
1099 * hc-id string <id>
1100 *
1101 * Note that auth and hc-id are optional members.
1102 */
1103
1104 #define HC_MAXPAIRS 20
1105 #define HC_MAXNAMELEN 50
1106
1107 static int
1108 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
1109 {
1110 if (version != FM_HC_SCHEME_VERSION) {
1111 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1112 return (0);
1113 }
1114
1115 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
1116 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
1117 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1118 return (0);
1119 }
1120
1121 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1122 (nvlist_t *)auth) != 0) {
1123 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1124 return (0);
1125 }
1126
1127 return (1);
1128 }
1129
1130 void
1131 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1132 nvlist_t *snvl, int npairs, ...)
1133 {
1134 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1135 nvlist_t *pairs[HC_MAXPAIRS];
1136 va_list ap;
1137 int i;
1138
1139 if (!fm_fmri_hc_set_common(fmri, version, auth))
1140 return;
1141
1142 npairs = MIN(npairs, HC_MAXPAIRS);
1143
1144 va_start(ap, npairs);
1145 for (i = 0; i < npairs; i++) {
1146 const char *name = va_arg(ap, const char *);
1147 uint32_t id = va_arg(ap, uint32_t);
1148 char idstr[11];
1149
1150 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1151
1152 pairs[i] = fm_nvlist_create(nva);
1153 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1154 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1155 atomic_inc_64(
1156 &erpt_kstat_data.fmri_set_failed.value.ui64);
1157 }
1158 }
1159 va_end(ap);
1160
1161 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
1162 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1163
1164 for (i = 0; i < npairs; i++)
1165 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1166
1167 if (snvl != NULL) {
1168 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1169 atomic_inc_64(
1170 &erpt_kstat_data.fmri_set_failed.value.ui64);
1171 }
1172 }
1173 }
1174
1175 void
1176 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
1177 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
1178 {
1179 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1180 nvlist_t *pairs[HC_MAXPAIRS];
1181 nvlist_t **hcl;
1182 uint_t n;
1183 int i, j;
1184 va_list ap;
1185 char *hcname, *hcid;
1186
1187 if (!fm_fmri_hc_set_common(fmri, version, auth))
1188 return;
1189
1190 /*
1191 * copy the bboard nvpairs to the pairs array
1192 */
1193 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
1194 != 0) {
1195 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1196 return;
1197 }
1198
1199 for (i = 0; i < n; i++) {
1200 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
1201 &hcname) != 0) {
1202 atomic_inc_64(
1203 &erpt_kstat_data.fmri_set_failed.value.ui64);
1204 return;
1205 }
1206 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
1207 atomic_inc_64(
1208 &erpt_kstat_data.fmri_set_failed.value.ui64);
1209 return;
1210 }
1211
1212 pairs[i] = fm_nvlist_create(nva);
1213 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
1214 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
1215 for (j = 0; j <= i; j++) {
1216 if (pairs[j] != NULL)
1217 fm_nvlist_destroy(pairs[j],
1218 FM_NVA_RETAIN);
1219 }
1220 atomic_inc_64(
1221 &erpt_kstat_data.fmri_set_failed.value.ui64);
1222 return;
1223 }
1224 }
1225
1226 /*
1227 * create the pairs from passed in pairs
1228 */
1229 npairs = MIN(npairs, HC_MAXPAIRS);
1230
1231 va_start(ap, npairs);
1232 for (i = n; i < npairs + n; i++) {
1233 const char *name = va_arg(ap, const char *);
1234 uint32_t id = va_arg(ap, uint32_t);
1235 char idstr[11];
1236 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1237 pairs[i] = fm_nvlist_create(nva);
1238 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1239 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1240 for (j = 0; j <= i; j++) {
1241 if (pairs[j] != NULL)
1242 fm_nvlist_destroy(pairs[j],
1243 FM_NVA_RETAIN);
1244 }
1245 atomic_inc_64(
1246 &erpt_kstat_data.fmri_set_failed.value.ui64);
1247 return;
1248 }
1249 }
1250 va_end(ap);
1251
1252 /*
1253 * Create the fmri hc list
1254 */
1255 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
1256 npairs + n) != 0) {
1257 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1258 return;
1259 }
1260
1261 for (i = 0; i < npairs + n; i++) {
1262 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1263 }
1264
1265 if (snvl != NULL) {
1266 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1267 atomic_inc_64(
1268 &erpt_kstat_data.fmri_set_failed.value.ui64);
1269 return;
1270 }
1271 }
1272 }
1273
1274 /*
1275 * Set-up and validate the members of an dev fmri according to:
1276 *
1277 * Member name Type Value
1278 * ====================================================
1279 * version uint8_t 0
1280 * auth nvlist_t <auth>
1281 * devpath string <devpath>
1282 * [devid] string <devid>
1283 * [target-port-l0id] string <target-port-lun0-id>
1284 *
1285 * Note that auth and devid are optional members.
1286 */
1287 void
1288 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1289 const char *devpath, const char *devid, const char *tpl0)
1290 {
1291 int err = 0;
1292
1293 if (version != DEV_SCHEME_VERSION0) {
1294 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1295 return;
1296 }
1297
1298 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1299 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1300
1301 if (auth != NULL) {
1302 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1303 (nvlist_t *)auth);
1304 }
1305
1306 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1307
1308 if (devid != NULL)
1309 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1310
1311 if (tpl0 != NULL)
1312 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1313
1314 if (err)
1315 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1316
1317 }
1318
1319 /*
1320 * Set-up and validate the members of an cpu fmri according to:
1321 *
1322 * Member name Type Value
1323 * ====================================================
1324 * version uint8_t 0
1325 * auth nvlist_t <auth>
1326 * cpuid uint32_t <cpu_id>
1327 * cpumask uint8_t <cpu_mask>
1328 * serial uint64_t <serial_id>
1329 *
1330 * Note that auth, cpumask, serial are optional members.
1331 *
1332 */
1333 void
1334 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1335 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1336 {
1337 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1338
1339 if (version < CPU_SCHEME_VERSION1) {
1340 atomic_inc_64(failedp);
1341 return;
1342 }
1343
1344 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1345 atomic_inc_64(failedp);
1346 return;
1347 }
1348
1349 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1350 FM_FMRI_SCHEME_CPU) != 0) {
1351 atomic_inc_64(failedp);
1352 return;
1353 }
1354
1355 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1356 (nvlist_t *)auth) != 0)
1357 atomic_inc_64(failedp);
1358
1359 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1360 atomic_inc_64(failedp);
1361
1362 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1363 *cpu_maskp) != 0)
1364 atomic_inc_64(failedp);
1365
1366 if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1367 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1368 atomic_inc_64(failedp);
1369 }
1370
1371 /*
1372 * Set-up and validate the members of a mem according to:
1373 *
1374 * Member name Type Value
1375 * ====================================================
1376 * version uint8_t 0
1377 * auth nvlist_t <auth> [optional]
1378 * unum string <unum>
1379 * serial string <serial> [optional*]
1380 * offset uint64_t <offset> [optional]
1381 *
1382 * * serial is required if offset is present
1383 */
1384 void
1385 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1386 const char *unum, const char *serial, uint64_t offset)
1387 {
1388 if (version != MEM_SCHEME_VERSION0) {
1389 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1390 return;
1391 }
1392
1393 if (!serial && (offset != (uint64_t)-1)) {
1394 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1395 return;
1396 }
1397
1398 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1399 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1400 return;
1401 }
1402
1403 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1404 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1405 return;
1406 }
1407
1408 if (auth != NULL) {
1409 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1410 (nvlist_t *)auth) != 0) {
1411 atomic_inc_64(
1412 &erpt_kstat_data.fmri_set_failed.value.ui64);
1413 }
1414 }
1415
1416 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1417 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1418 }
1419
1420 if (serial != NULL) {
1421 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1422 (char **)&serial, 1) != 0) {
1423 atomic_inc_64(
1424 &erpt_kstat_data.fmri_set_failed.value.ui64);
1425 }
1426 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1427 FM_FMRI_MEM_OFFSET, offset) != 0) {
1428 atomic_inc_64(
1429 &erpt_kstat_data.fmri_set_failed.value.ui64);
1430 }
1431 }
1432 }
1433
1434 void
1435 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1436 uint64_t vdev_guid)
1437 {
1438 if (version != ZFS_SCHEME_VERSION0) {
1439 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1440 return;
1441 }
1442
1443 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1444 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1445 return;
1446 }
1447
1448 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1449 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1450 return;
1451 }
1452
1453 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1454 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1455 }
1456
1457 if (vdev_guid != 0) {
1458 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1459 atomic_inc_64(
1460 &erpt_kstat_data.fmri_set_failed.value.ui64);
1461 }
1462 }
1463 }
1464
1465 uint64_t
1466 fm_ena_increment(uint64_t ena)
1467 {
1468 uint64_t new_ena;
1469
1470 switch (ENA_FORMAT(ena)) {
1471 case FM_ENA_FMT1:
1472 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1473 break;
1474 case FM_ENA_FMT2:
1475 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1476 break;
1477 default:
1478 new_ena = 0;
1479 }
1480
1481 return (new_ena);
1482 }
1483
1484 uint64_t
1485 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1486 {
1487 uint64_t ena = 0;
1488
1489 switch (format) {
1490 case FM_ENA_FMT1:
1491 if (timestamp) {
1492 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1493 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1494 ENA_FMT1_CPUID_MASK) |
1495 ((timestamp << ENA_FMT1_TIME_SHFT) &
1496 ENA_FMT1_TIME_MASK));
1497 } else {
1498 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1499 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1500 ENA_FMT1_CPUID_MASK) |
1501 ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1502 ENA_FMT1_TIME_MASK));
1503 }
1504 break;
1505 case FM_ENA_FMT2:
1506 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1507 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1508 break;
1509 default:
1510 break;
1511 }
1512
1513 return (ena);
1514 }
1515
1516 uint64_t
1517 fm_ena_generate(uint64_t timestamp, uchar_t format)
1518 {
1519 uint64_t ena;
1520
1521 kpreempt_disable();
1522 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1523 kpreempt_enable();
1524
1525 return (ena);
1526 }
1527
1528 uint64_t
1529 fm_ena_generation_get(uint64_t ena)
1530 {
1531 uint64_t gen;
1532
1533 switch (ENA_FORMAT(ena)) {
1534 case FM_ENA_FMT1:
1535 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1536 break;
1537 case FM_ENA_FMT2:
1538 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1539 break;
1540 default:
1541 gen = 0;
1542 break;
1543 }
1544
1545 return (gen);
1546 }
1547
1548 uchar_t
1549 fm_ena_format_get(uint64_t ena)
1550 {
1551
1552 return (ENA_FORMAT(ena));
1553 }
1554
1555 uint64_t
1556 fm_ena_id_get(uint64_t ena)
1557 {
1558 uint64_t id;
1559
1560 switch (ENA_FORMAT(ena)) {
1561 case FM_ENA_FMT1:
1562 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1563 break;
1564 case FM_ENA_FMT2:
1565 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1566 break;
1567 default:
1568 id = 0;
1569 }
1570
1571 return (id);
1572 }
1573
1574 uint64_t
1575 fm_ena_time_get(uint64_t ena)
1576 {
1577 uint64_t time;
1578
1579 switch (ENA_FORMAT(ena)) {
1580 case FM_ENA_FMT1:
1581 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1582 break;
1583 case FM_ENA_FMT2:
1584 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1585 break;
1586 default:
1587 time = 0;
1588 }
1589
1590 return (time);
1591 }
1592
1593 #ifdef _KERNEL
1594 /*
1595 * Helper function to increment ereport dropped count. Used by the event
1596 * rate limiting code to give feedback to the user about how many events were
1597 * rate limited by including them in the 'dropped' count.
1598 */
1599 void
1600 fm_erpt_dropped_increment(void)
1601 {
1602 atomic_inc_64(&ratelimit_dropped);
1603 }
1604 #endif
1605
1606 #ifdef _KERNEL
1607 void
1608 fm_init(void)
1609 {
1610 zevent_len_cur = 0;
1611 zevent_flags = 0;
1612
1613 if (zfs_zevent_len_max == 0)
1614 zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
1615
1616 /* Initialize zevent allocation and generation kstats */
1617 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1618 sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1619 KSTAT_FLAG_VIRTUAL);
1620
1621 if (fm_ksp != NULL) {
1622 fm_ksp->ks_data = &erpt_kstat_data;
1623 kstat_install(fm_ksp);
1624 } else {
1625 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1626 }
1627
1628 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1629 list_create(&zevent_list, sizeof (zevent_t),
1630 offsetof(zevent_t, ev_node));
1631 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1632 }
1633
1634 void
1635 fm_fini(void)
1636 {
1637 int count;
1638
1639 zfs_zevent_drain_all(&count);
1640
1641 mutex_enter(&zevent_lock);
1642 cv_broadcast(&zevent_cv);
1643
1644 zevent_flags |= ZEVENT_SHUTDOWN;
1645 while (zevent_waiters > 0) {
1646 mutex_exit(&zevent_lock);
1647 schedule();
1648 mutex_enter(&zevent_lock);
1649 }
1650 mutex_exit(&zevent_lock);
1651
1652 cv_destroy(&zevent_cv);
1653 list_destroy(&zevent_list);
1654 mutex_destroy(&zevent_lock);
1655
1656 if (fm_ksp != NULL) {
1657 kstat_delete(fm_ksp);
1658 fm_ksp = NULL;
1659 }
1660 }
1661
1662 module_param(zfs_zevent_len_max, int, 0644);
1663 MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length");
1664
1665 module_param(zfs_zevent_cols, int, 0644);
1666 MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width");
1667
1668 module_param(zfs_zevent_console, int, 0644);
1669 MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console");
1670
1671 #endif /* _KERNEL */