]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zfs/fm.c
f6ae1628480d88371cc94325b8b77acf95db9054
[mirror_zfs-debian.git] / module / zfs / fm.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Fault Management Architecture (FMA) Resource and Protocol Support
27 *
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30 *
31 * Name-Value Pair Lists
32 *
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist construtor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
39 *
40 * Protocol Event and FMRI Construction
41 *
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45 *
46 * ENA Manipulation
47 *
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
52 */
53
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/compress.h>
61 #include <sys/sunddi.h>
62 #include <sys/systeminfo.h>
63 #include <sys/fm/util.h>
64 #include <sys/fm/protocol.h>
65 #include <sys/kstat.h>
66 #include <sys/zfs_context.h>
67 #ifdef _KERNEL
68 #include <sys/atomic.h>
69 #include <sys/condvar.h>
70 #include <sys/cpuvar.h>
71 #include <sys/systm.h>
72 #include <sys/dumphdr.h>
73 #include <sys/cpuvar.h>
74 #include <sys/console.h>
75 #include <sys/kobj.h>
76 #include <sys/time.h>
77 #include <sys/zfs_ioctl.h>
78
79 int zfs_zevent_len_max = 0;
80 int zfs_zevent_cols = 80;
81 int zfs_zevent_console = 0;
82
83 static int zevent_len_cur = 0;
84 static int zevent_waiters = 0;
85 static int zevent_flags = 0;
86
87 /* Num events rate limited since the last time zfs_zevent_next() was called */
88 static uint64_t ratelimit_dropped = 0;
89
90 /*
91 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
92 * posted. The posted EIDs are monotonically increasing but not persistent.
93 * They will be reset to the initial value (1) each time the kernel module is
94 * loaded.
95 */
96 static uint64_t zevent_eid = 0;
97
98 static kmutex_t zevent_lock;
99 static list_t zevent_list;
100 static kcondvar_t zevent_cv;
101 #endif /* _KERNEL */
102
103
104 /*
105 * Common fault management kstats to record event generation failures
106 */
107
108 struct erpt_kstat {
109 kstat_named_t erpt_dropped; /* num erpts dropped on post */
110 kstat_named_t erpt_set_failed; /* num erpt set failures */
111 kstat_named_t fmri_set_failed; /* num fmri set failures */
112 kstat_named_t payload_set_failed; /* num payload set failures */
113 };
114
115 static struct erpt_kstat erpt_kstat_data = {
116 { "erpt-dropped", KSTAT_DATA_UINT64 },
117 { "erpt-set-failed", KSTAT_DATA_UINT64 },
118 { "fmri-set-failed", KSTAT_DATA_UINT64 },
119 { "payload-set-failed", KSTAT_DATA_UINT64 }
120 };
121
122 kstat_t *fm_ksp;
123
124 #ifdef _KERNEL
125
126 /*
127 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
128 * output so they aren't split across console lines, and return the end column.
129 */
130 /*PRINTFLIKE4*/
131 static int
132 fm_printf(int depth, int c, int cols, const char *format, ...)
133 {
134 va_list ap;
135 int width;
136 char c1;
137
138 va_start(ap, format);
139 width = vsnprintf(&c1, sizeof (c1), format, ap);
140 va_end(ap);
141
142 if (c + width >= cols) {
143 console_printf("\n");
144 c = 0;
145 if (format[0] != ' ' && depth > 0) {
146 console_printf(" ");
147 c++;
148 }
149 }
150
151 va_start(ap, format);
152 console_vprintf(format, ap);
153 va_end(ap);
154
155 return ((c + width) % cols);
156 }
157
158 /*
159 * Recursively print an nvlist in the specified column width and return the
160 * column we end up in. This function is called recursively by fm_nvprint(),
161 * below. We generically format the entire nvpair using hexadecimal
162 * integers and strings, and elide any integer arrays. Arrays are basically
163 * used for cache dumps right now, so we suppress them so as not to overwhelm
164 * the amount of console output we produce at panic time. This can be further
165 * enhanced as FMA technology grows based upon the needs of consumers. All
166 * FMA telemetry is logged using the dump device transport, so the console
167 * output serves only as a fallback in case this procedure is unsuccessful.
168 */
169 static int
170 fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
171 {
172 nvpair_t *nvp;
173
174 for (nvp = nvlist_next_nvpair(nvl, NULL);
175 nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
176
177 data_type_t type = nvpair_type(nvp);
178 const char *name = nvpair_name(nvp);
179
180 boolean_t b;
181 uint8_t i8;
182 uint16_t i16;
183 uint32_t i32;
184 uint64_t i64;
185 char *str;
186 nvlist_t *cnv;
187
188 if (strcmp(name, FM_CLASS) == 0)
189 continue; /* already printed by caller */
190
191 c = fm_printf(d, c, cols, " %s=", name);
192
193 switch (type) {
194 case DATA_TYPE_BOOLEAN:
195 c = fm_printf(d + 1, c, cols, " 1");
196 break;
197
198 case DATA_TYPE_BOOLEAN_VALUE:
199 (void) nvpair_value_boolean_value(nvp, &b);
200 c = fm_printf(d + 1, c, cols, b ? "1" : "0");
201 break;
202
203 case DATA_TYPE_BYTE:
204 (void) nvpair_value_byte(nvp, &i8);
205 c = fm_printf(d + 1, c, cols, "0x%x", i8);
206 break;
207
208 case DATA_TYPE_INT8:
209 (void) nvpair_value_int8(nvp, (void *)&i8);
210 c = fm_printf(d + 1, c, cols, "0x%x", i8);
211 break;
212
213 case DATA_TYPE_UINT8:
214 (void) nvpair_value_uint8(nvp, &i8);
215 c = fm_printf(d + 1, c, cols, "0x%x", i8);
216 break;
217
218 case DATA_TYPE_INT16:
219 (void) nvpair_value_int16(nvp, (void *)&i16);
220 c = fm_printf(d + 1, c, cols, "0x%x", i16);
221 break;
222
223 case DATA_TYPE_UINT16:
224 (void) nvpair_value_uint16(nvp, &i16);
225 c = fm_printf(d + 1, c, cols, "0x%x", i16);
226 break;
227
228 case DATA_TYPE_INT32:
229 (void) nvpair_value_int32(nvp, (void *)&i32);
230 c = fm_printf(d + 1, c, cols, "0x%x", i32);
231 break;
232
233 case DATA_TYPE_UINT32:
234 (void) nvpair_value_uint32(nvp, &i32);
235 c = fm_printf(d + 1, c, cols, "0x%x", i32);
236 break;
237
238 case DATA_TYPE_INT64:
239 (void) nvpair_value_int64(nvp, (void *)&i64);
240 c = fm_printf(d + 1, c, cols, "0x%llx",
241 (u_longlong_t)i64);
242 break;
243
244 case DATA_TYPE_UINT64:
245 (void) nvpair_value_uint64(nvp, &i64);
246 c = fm_printf(d + 1, c, cols, "0x%llx",
247 (u_longlong_t)i64);
248 break;
249
250 case DATA_TYPE_HRTIME:
251 (void) nvpair_value_hrtime(nvp, (void *)&i64);
252 c = fm_printf(d + 1, c, cols, "0x%llx",
253 (u_longlong_t)i64);
254 break;
255
256 case DATA_TYPE_STRING:
257 (void) nvpair_value_string(nvp, &str);
258 c = fm_printf(d + 1, c, cols, "\"%s\"",
259 str ? str : "<NULL>");
260 break;
261
262 case DATA_TYPE_NVLIST:
263 c = fm_printf(d + 1, c, cols, "[");
264 (void) nvpair_value_nvlist(nvp, &cnv);
265 c = fm_nvprintr(cnv, d + 1, c, cols);
266 c = fm_printf(d + 1, c, cols, " ]");
267 break;
268
269 case DATA_TYPE_NVLIST_ARRAY: {
270 nvlist_t **val;
271 uint_t i, nelem;
272
273 c = fm_printf(d + 1, c, cols, "[");
274 (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
275 for (i = 0; i < nelem; i++) {
276 c = fm_nvprintr(val[i], d + 1, c, cols);
277 }
278 c = fm_printf(d + 1, c, cols, " ]");
279 }
280 break;
281
282 case DATA_TYPE_INT8_ARRAY: {
283 int8_t *val;
284 uint_t i, nelem;
285
286 c = fm_printf(d + 1, c, cols, "[ ");
287 (void) nvpair_value_int8_array(nvp, &val, &nelem);
288 for (i = 0; i < nelem; i++)
289 c = fm_printf(d + 1, c, cols, "0x%llx ",
290 (u_longlong_t)val[i]);
291
292 c = fm_printf(d + 1, c, cols, "]");
293 break;
294 }
295
296 case DATA_TYPE_UINT8_ARRAY: {
297 uint8_t *val;
298 uint_t i, nelem;
299
300 c = fm_printf(d + 1, c, cols, "[ ");
301 (void) nvpair_value_uint8_array(nvp, &val, &nelem);
302 for (i = 0; i < nelem; i++)
303 c = fm_printf(d + 1, c, cols, "0x%llx ",
304 (u_longlong_t)val[i]);
305
306 c = fm_printf(d + 1, c, cols, "]");
307 break;
308 }
309
310 case DATA_TYPE_INT16_ARRAY: {
311 int16_t *val;
312 uint_t i, nelem;
313
314 c = fm_printf(d + 1, c, cols, "[ ");
315 (void) nvpair_value_int16_array(nvp, &val, &nelem);
316 for (i = 0; i < nelem; i++)
317 c = fm_printf(d + 1, c, cols, "0x%llx ",
318 (u_longlong_t)val[i]);
319
320 c = fm_printf(d + 1, c, cols, "]");
321 break;
322 }
323
324 case DATA_TYPE_UINT16_ARRAY: {
325 uint16_t *val;
326 uint_t i, nelem;
327
328 c = fm_printf(d + 1, c, cols, "[ ");
329 (void) nvpair_value_uint16_array(nvp, &val, &nelem);
330 for (i = 0; i < nelem; i++)
331 c = fm_printf(d + 1, c, cols, "0x%llx ",
332 (u_longlong_t)val[i]);
333
334 c = fm_printf(d + 1, c, cols, "]");
335 break;
336 }
337
338 case DATA_TYPE_INT32_ARRAY: {
339 int32_t *val;
340 uint_t i, nelem;
341
342 c = fm_printf(d + 1, c, cols, "[ ");
343 (void) nvpair_value_int32_array(nvp, &val, &nelem);
344 for (i = 0; i < nelem; i++)
345 c = fm_printf(d + 1, c, cols, "0x%llx ",
346 (u_longlong_t)val[i]);
347
348 c = fm_printf(d + 1, c, cols, "]");
349 break;
350 }
351
352 case DATA_TYPE_UINT32_ARRAY: {
353 uint32_t *val;
354 uint_t i, nelem;
355
356 c = fm_printf(d + 1, c, cols, "[ ");
357 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
358 for (i = 0; i < nelem; i++)
359 c = fm_printf(d + 1, c, cols, "0x%llx ",
360 (u_longlong_t)val[i]);
361
362 c = fm_printf(d + 1, c, cols, "]");
363 break;
364 }
365
366 case DATA_TYPE_INT64_ARRAY: {
367 int64_t *val;
368 uint_t i, nelem;
369
370 c = fm_printf(d + 1, c, cols, "[ ");
371 (void) nvpair_value_int64_array(nvp, &val, &nelem);
372 for (i = 0; i < nelem; i++)
373 c = fm_printf(d + 1, c, cols, "0x%llx ",
374 (u_longlong_t)val[i]);
375
376 c = fm_printf(d + 1, c, cols, "]");
377 break;
378 }
379
380 case DATA_TYPE_UINT64_ARRAY: {
381 uint64_t *val;
382 uint_t i, nelem;
383
384 c = fm_printf(d + 1, c, cols, "[ ");
385 (void) nvpair_value_uint64_array(nvp, &val, &nelem);
386 for (i = 0; i < nelem; i++)
387 c = fm_printf(d + 1, c, cols, "0x%llx ",
388 (u_longlong_t)val[i]);
389
390 c = fm_printf(d + 1, c, cols, "]");
391 break;
392 }
393
394 case DATA_TYPE_STRING_ARRAY:
395 case DATA_TYPE_BOOLEAN_ARRAY:
396 case DATA_TYPE_BYTE_ARRAY:
397 c = fm_printf(d + 1, c, cols, "[...]");
398 break;
399
400 case DATA_TYPE_UNKNOWN:
401 c = fm_printf(d + 1, c, cols, "<unknown>");
402 break;
403 }
404 }
405
406 return (c);
407 }
408
409 void
410 fm_nvprint(nvlist_t *nvl)
411 {
412 char *class;
413 int c = 0;
414
415 console_printf("\n");
416
417 if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
418 c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
419
420 if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
421 console_printf("\n");
422
423 console_printf("\n");
424 }
425
426 static zevent_t *
427 zfs_zevent_alloc(void)
428 {
429 zevent_t *ev;
430
431 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
432
433 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
434 offsetof(zfs_zevent_t, ze_node));
435 list_link_init(&ev->ev_node);
436
437 return (ev);
438 }
439
440 static void
441 zfs_zevent_free(zevent_t *ev)
442 {
443 /* Run provided cleanup callback */
444 ev->ev_cb(ev->ev_nvl, ev->ev_detector);
445
446 list_destroy(&ev->ev_ze_list);
447 kmem_free(ev, sizeof (zevent_t));
448 }
449
450 static void
451 zfs_zevent_drain(zevent_t *ev)
452 {
453 zfs_zevent_t *ze;
454
455 ASSERT(MUTEX_HELD(&zevent_lock));
456 list_remove(&zevent_list, ev);
457
458 /* Remove references to this event in all private file data */
459 while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
460 list_remove(&ev->ev_ze_list, ze);
461 ze->ze_zevent = NULL;
462 ze->ze_dropped++;
463 }
464
465 zfs_zevent_free(ev);
466 }
467
468 void
469 zfs_zevent_drain_all(int *count)
470 {
471 zevent_t *ev;
472
473 mutex_enter(&zevent_lock);
474 while ((ev = list_head(&zevent_list)) != NULL)
475 zfs_zevent_drain(ev);
476
477 *count = zevent_len_cur;
478 zevent_len_cur = 0;
479 mutex_exit(&zevent_lock);
480 }
481
482 /*
483 * New zevents are inserted at the head. If the maximum queue
484 * length is exceeded a zevent will be drained from the tail.
485 * As part of this any user space processes which currently have
486 * a reference to this zevent_t in their private data will have
487 * this reference set to NULL.
488 */
489 static void
490 zfs_zevent_insert(zevent_t *ev)
491 {
492 ASSERT(MUTEX_HELD(&zevent_lock));
493 list_insert_head(&zevent_list, ev);
494
495 if (zevent_len_cur >= zfs_zevent_len_max)
496 zfs_zevent_drain(list_tail(&zevent_list));
497 else
498 zevent_len_cur++;
499 }
500
501 /*
502 * Post a zevent. The cb will be called when nvl and detector are no longer
503 * needed, i.e.:
504 * - An error happened and a zevent can't be posted. In this case, cb is called
505 * before zfs_zevent_post() returns.
506 * - The event is being drained and freed.
507 */
508 int
509 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
510 {
511 int64_t tv_array[2];
512 timestruc_t tv;
513 uint64_t eid;
514 size_t nvl_size = 0;
515 zevent_t *ev;
516 int error;
517
518 ASSERT(cb != NULL);
519
520 gethrestime(&tv);
521 tv_array[0] = tv.tv_sec;
522 tv_array[1] = tv.tv_nsec;
523
524 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
525 if (error) {
526 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
527 goto out;
528 }
529
530 eid = atomic_inc_64_nv(&zevent_eid);
531 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
532 if (error) {
533 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
534 goto out;
535 }
536
537 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
538 if (error) {
539 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
540 goto out;
541 }
542
543 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
544 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
545 error = EOVERFLOW;
546 goto out;
547 }
548
549 if (zfs_zevent_console)
550 fm_nvprint(nvl);
551
552 ev = zfs_zevent_alloc();
553 if (ev == NULL) {
554 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
555 error = ENOMEM;
556 goto out;
557 }
558
559 ev->ev_nvl = nvl;
560 ev->ev_detector = detector;
561 ev->ev_cb = cb;
562 ev->ev_eid = eid;
563
564 mutex_enter(&zevent_lock);
565 zfs_zevent_insert(ev);
566 cv_broadcast(&zevent_cv);
567 mutex_exit(&zevent_lock);
568
569 out:
570 if (error)
571 cb(nvl, detector);
572
573 return (error);
574 }
575
576 static int
577 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
578 {
579 *ze = zfsdev_get_state(minor, ZST_ZEVENT);
580 if (*ze == NULL)
581 return (EBADF);
582
583 return (0);
584 }
585
586 int
587 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
588 {
589 file_t *fp;
590 int error;
591
592 fp = getf(fd);
593 if (fp == NULL)
594 return (EBADF);
595
596 error = zfsdev_getminor(fp->f_file, minorp);
597 if (error == 0)
598 error = zfs_zevent_minor_to_state(*minorp, ze);
599
600 if (error)
601 zfs_zevent_fd_rele(fd);
602
603 return (error);
604 }
605
606 void
607 zfs_zevent_fd_rele(int fd)
608 {
609 releasef(fd);
610 }
611
612 /*
613 * Get the next zevent in the stream and place a copy in 'event'. This
614 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
615 * 'event_size'. In this case the stream pointer is not advanced and
616 * and 'event_size' is set to the minimum required buffer size.
617 */
618 int
619 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
620 uint64_t *dropped)
621 {
622 zevent_t *ev;
623 size_t size;
624 int error = 0;
625
626 mutex_enter(&zevent_lock);
627 if (ze->ze_zevent == NULL) {
628 /* New stream start at the beginning/tail */
629 ev = list_tail(&zevent_list);
630 if (ev == NULL) {
631 error = ENOENT;
632 goto out;
633 }
634 } else {
635 /*
636 * Existing stream continue with the next element and remove
637 * ourselves from the wait queue for the previous element
638 */
639 ev = list_prev(&zevent_list, ze->ze_zevent);
640 if (ev == NULL) {
641 error = ENOENT;
642 goto out;
643 }
644 }
645
646 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
647 if (size > *event_size) {
648 *event_size = size;
649 error = ENOMEM;
650 goto out;
651 }
652
653 if (ze->ze_zevent)
654 list_remove(&ze->ze_zevent->ev_ze_list, ze);
655
656 ze->ze_zevent = ev;
657 list_insert_head(&ev->ev_ze_list, ze);
658 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
659 *dropped = ze->ze_dropped;
660
661 #ifdef _KERNEL
662 /* Include events dropped due to rate limiting */
663 *dropped += ratelimit_dropped;
664 ratelimit_dropped = 0;
665 #endif
666 ze->ze_dropped = 0;
667 out:
668 mutex_exit(&zevent_lock);
669
670 return (error);
671 }
672
673 int
674 zfs_zevent_wait(zfs_zevent_t *ze)
675 {
676 int error = 0;
677
678 mutex_enter(&zevent_lock);
679
680 if (zevent_flags & ZEVENT_SHUTDOWN) {
681 error = ESHUTDOWN;
682 goto out;
683 }
684
685 zevent_waiters++;
686 cv_wait_sig(&zevent_cv, &zevent_lock);
687 if (issig(JUSTLOOKING))
688 error = EINTR;
689
690 zevent_waiters--;
691 out:
692 mutex_exit(&zevent_lock);
693
694 return (error);
695 }
696
697 /*
698 * The caller may seek to a specific EID by passing that EID. If the EID
699 * is still available in the posted list of events the cursor is positioned
700 * there. Otherwise ENOENT is returned and the cursor is not moved.
701 *
702 * There are two reserved EIDs which may be passed and will never fail.
703 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
704 * ZEVENT_SEEK_END positions the cursor at the end of the list.
705 */
706 int
707 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
708 {
709 zevent_t *ev;
710 int error = 0;
711
712 mutex_enter(&zevent_lock);
713
714 if (eid == ZEVENT_SEEK_START) {
715 if (ze->ze_zevent)
716 list_remove(&ze->ze_zevent->ev_ze_list, ze);
717
718 ze->ze_zevent = NULL;
719 goto out;
720 }
721
722 if (eid == ZEVENT_SEEK_END) {
723 if (ze->ze_zevent)
724 list_remove(&ze->ze_zevent->ev_ze_list, ze);
725
726 ev = list_head(&zevent_list);
727 if (ev) {
728 ze->ze_zevent = ev;
729 list_insert_head(&ev->ev_ze_list, ze);
730 } else {
731 ze->ze_zevent = NULL;
732 }
733
734 goto out;
735 }
736
737 for (ev = list_tail(&zevent_list); ev != NULL;
738 ev = list_prev(&zevent_list, ev)) {
739 if (ev->ev_eid == eid) {
740 if (ze->ze_zevent)
741 list_remove(&ze->ze_zevent->ev_ze_list, ze);
742
743 ze->ze_zevent = ev;
744 list_insert_head(&ev->ev_ze_list, ze);
745 break;
746 }
747 }
748
749 if (ev == NULL)
750 error = ENOENT;
751
752 out:
753 mutex_exit(&zevent_lock);
754
755 return (error);
756 }
757
758 void
759 zfs_zevent_init(zfs_zevent_t **zep)
760 {
761 zfs_zevent_t *ze;
762
763 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
764 list_link_init(&ze->ze_node);
765 }
766
767 void
768 zfs_zevent_destroy(zfs_zevent_t *ze)
769 {
770 mutex_enter(&zevent_lock);
771 if (ze->ze_zevent)
772 list_remove(&ze->ze_zevent->ev_ze_list, ze);
773 mutex_exit(&zevent_lock);
774
775 kmem_free(ze, sizeof (zfs_zevent_t));
776 }
777 #endif /* _KERNEL */
778
779 /*
780 * Wrapppers for FM nvlist allocators
781 */
782 /* ARGSUSED */
783 static void *
784 i_fm_alloc(nv_alloc_t *nva, size_t size)
785 {
786 return (kmem_zalloc(size, KM_SLEEP));
787 }
788
789 /* ARGSUSED */
790 static void
791 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
792 {
793 kmem_free(buf, size);
794 }
795
796 const nv_alloc_ops_t fm_mem_alloc_ops = {
797 NULL,
798 NULL,
799 i_fm_alloc,
800 i_fm_free,
801 NULL
802 };
803
804 /*
805 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
806 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
807 * is returned to indicate that the nv_alloc structure could not be created.
808 */
809 nv_alloc_t *
810 fm_nva_xcreate(char *buf, size_t bufsz)
811 {
812 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
813
814 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
815 kmem_free(nvhdl, sizeof (nv_alloc_t));
816 return (NULL);
817 }
818
819 return (nvhdl);
820 }
821
822 /*
823 * Destroy a previously allocated nv_alloc structure. The fixed buffer
824 * associated with nva must be freed by the caller.
825 */
826 void
827 fm_nva_xdestroy(nv_alloc_t *nva)
828 {
829 nv_alloc_fini(nva);
830 kmem_free(nva, sizeof (nv_alloc_t));
831 }
832
833 /*
834 * Create a new nv list. A pointer to a new nv list structure is returned
835 * upon success or NULL is returned to indicate that the structure could
836 * not be created. The newly created nv list is created and managed by the
837 * operations installed in nva. If nva is NULL, the default FMA nva
838 * operations are installed and used.
839 *
840 * When called from the kernel and nva == NULL, this function must be called
841 * from passive kernel context with no locks held that can prevent a
842 * sleeping memory allocation from occurring. Otherwise, this function may
843 * be called from other kernel contexts as long a valid nva created via
844 * fm_nva_create() is supplied.
845 */
846 nvlist_t *
847 fm_nvlist_create(nv_alloc_t *nva)
848 {
849 int hdl_alloced = 0;
850 nvlist_t *nvl;
851 nv_alloc_t *nvhdl;
852
853 if (nva == NULL) {
854 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
855
856 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
857 kmem_free(nvhdl, sizeof (nv_alloc_t));
858 return (NULL);
859 }
860 hdl_alloced = 1;
861 } else {
862 nvhdl = nva;
863 }
864
865 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
866 if (hdl_alloced) {
867 nv_alloc_fini(nvhdl);
868 kmem_free(nvhdl, sizeof (nv_alloc_t));
869 }
870 return (NULL);
871 }
872
873 return (nvl);
874 }
875
876 /*
877 * Destroy a previously allocated nvlist structure. flag indicates whether
878 * or not the associated nva structure should be freed (FM_NVA_FREE) or
879 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
880 * it to be re-used for future nvlist creation operations.
881 */
882 void
883 fm_nvlist_destroy(nvlist_t *nvl, int flag)
884 {
885 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
886
887 nvlist_free(nvl);
888
889 if (nva != NULL) {
890 if (flag == FM_NVA_FREE)
891 fm_nva_xdestroy(nva);
892 }
893 }
894
895 int
896 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
897 {
898 int nelem, ret = 0;
899 data_type_t type;
900
901 while (ret == 0 && name != NULL) {
902 type = va_arg(ap, data_type_t);
903 switch (type) {
904 case DATA_TYPE_BYTE:
905 ret = nvlist_add_byte(payload, name,
906 va_arg(ap, uint_t));
907 break;
908 case DATA_TYPE_BYTE_ARRAY:
909 nelem = va_arg(ap, int);
910 ret = nvlist_add_byte_array(payload, name,
911 va_arg(ap, uchar_t *), nelem);
912 break;
913 case DATA_TYPE_BOOLEAN_VALUE:
914 ret = nvlist_add_boolean_value(payload, name,
915 va_arg(ap, boolean_t));
916 break;
917 case DATA_TYPE_BOOLEAN_ARRAY:
918 nelem = va_arg(ap, int);
919 ret = nvlist_add_boolean_array(payload, name,
920 va_arg(ap, boolean_t *), nelem);
921 break;
922 case DATA_TYPE_INT8:
923 ret = nvlist_add_int8(payload, name,
924 va_arg(ap, int));
925 break;
926 case DATA_TYPE_INT8_ARRAY:
927 nelem = va_arg(ap, int);
928 ret = nvlist_add_int8_array(payload, name,
929 va_arg(ap, int8_t *), nelem);
930 break;
931 case DATA_TYPE_UINT8:
932 ret = nvlist_add_uint8(payload, name,
933 va_arg(ap, uint_t));
934 break;
935 case DATA_TYPE_UINT8_ARRAY:
936 nelem = va_arg(ap, int);
937 ret = nvlist_add_uint8_array(payload, name,
938 va_arg(ap, uint8_t *), nelem);
939 break;
940 case DATA_TYPE_INT16:
941 ret = nvlist_add_int16(payload, name,
942 va_arg(ap, int));
943 break;
944 case DATA_TYPE_INT16_ARRAY:
945 nelem = va_arg(ap, int);
946 ret = nvlist_add_int16_array(payload, name,
947 va_arg(ap, int16_t *), nelem);
948 break;
949 case DATA_TYPE_UINT16:
950 ret = nvlist_add_uint16(payload, name,
951 va_arg(ap, uint_t));
952 break;
953 case DATA_TYPE_UINT16_ARRAY:
954 nelem = va_arg(ap, int);
955 ret = nvlist_add_uint16_array(payload, name,
956 va_arg(ap, uint16_t *), nelem);
957 break;
958 case DATA_TYPE_INT32:
959 ret = nvlist_add_int32(payload, name,
960 va_arg(ap, int32_t));
961 break;
962 case DATA_TYPE_INT32_ARRAY:
963 nelem = va_arg(ap, int);
964 ret = nvlist_add_int32_array(payload, name,
965 va_arg(ap, int32_t *), nelem);
966 break;
967 case DATA_TYPE_UINT32:
968 ret = nvlist_add_uint32(payload, name,
969 va_arg(ap, uint32_t));
970 break;
971 case DATA_TYPE_UINT32_ARRAY:
972 nelem = va_arg(ap, int);
973 ret = nvlist_add_uint32_array(payload, name,
974 va_arg(ap, uint32_t *), nelem);
975 break;
976 case DATA_TYPE_INT64:
977 ret = nvlist_add_int64(payload, name,
978 va_arg(ap, int64_t));
979 break;
980 case DATA_TYPE_INT64_ARRAY:
981 nelem = va_arg(ap, int);
982 ret = nvlist_add_int64_array(payload, name,
983 va_arg(ap, int64_t *), nelem);
984 break;
985 case DATA_TYPE_UINT64:
986 ret = nvlist_add_uint64(payload, name,
987 va_arg(ap, uint64_t));
988 break;
989 case DATA_TYPE_UINT64_ARRAY:
990 nelem = va_arg(ap, int);
991 ret = nvlist_add_uint64_array(payload, name,
992 va_arg(ap, uint64_t *), nelem);
993 break;
994 case DATA_TYPE_STRING:
995 ret = nvlist_add_string(payload, name,
996 va_arg(ap, char *));
997 break;
998 case DATA_TYPE_STRING_ARRAY:
999 nelem = va_arg(ap, int);
1000 ret = nvlist_add_string_array(payload, name,
1001 va_arg(ap, char **), nelem);
1002 break;
1003 case DATA_TYPE_NVLIST:
1004 ret = nvlist_add_nvlist(payload, name,
1005 va_arg(ap, nvlist_t *));
1006 break;
1007 case DATA_TYPE_NVLIST_ARRAY:
1008 nelem = va_arg(ap, int);
1009 ret = nvlist_add_nvlist_array(payload, name,
1010 va_arg(ap, nvlist_t **), nelem);
1011 break;
1012 default:
1013 ret = EINVAL;
1014 }
1015
1016 name = va_arg(ap, char *);
1017 }
1018 return (ret);
1019 }
1020
1021 void
1022 fm_payload_set(nvlist_t *payload, ...)
1023 {
1024 int ret;
1025 const char *name;
1026 va_list ap;
1027
1028 va_start(ap, payload);
1029 name = va_arg(ap, char *);
1030 ret = i_fm_payload_set(payload, name, ap);
1031 va_end(ap);
1032
1033 if (ret)
1034 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
1035 }
1036
1037 /*
1038 * Set-up and validate the members of an ereport event according to:
1039 *
1040 * Member name Type Value
1041 * ====================================================
1042 * class string ereport
1043 * version uint8_t 0
1044 * ena uint64_t <ena>
1045 * detector nvlist_t <detector>
1046 * ereport-payload nvlist_t <var args>
1047 *
1048 * We don't actually add a 'version' member to the payload. Really,
1049 * the version quoted to us by our caller is that of the category 1
1050 * "ereport" event class (and we require FM_EREPORT_VERS0) but
1051 * the payload version of the actual leaf class event under construction
1052 * may be something else. Callers should supply a version in the varargs,
1053 * or (better) we could take two version arguments - one for the
1054 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
1055 * for the leaf class.
1056 */
1057 void
1058 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
1059 uint64_t ena, const nvlist_t *detector, ...)
1060 {
1061 char ereport_class[FM_MAX_CLASS];
1062 const char *name;
1063 va_list ap;
1064 int ret;
1065
1066 if (version != FM_EREPORT_VERS0) {
1067 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1068 return;
1069 }
1070
1071 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
1072 FM_EREPORT_CLASS, erpt_class);
1073 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
1074 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1075 return;
1076 }
1077
1078 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
1079 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1080 }
1081
1082 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
1083 (nvlist_t *)detector) != 0) {
1084 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1085 }
1086
1087 va_start(ap, detector);
1088 name = va_arg(ap, const char *);
1089 ret = i_fm_payload_set(ereport, name, ap);
1090 va_end(ap);
1091
1092 if (ret)
1093 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1094 }
1095
1096 /*
1097 * Set-up and validate the members of an hc fmri according to;
1098 *
1099 * Member name Type Value
1100 * ===================================================
1101 * version uint8_t 0
1102 * auth nvlist_t <auth>
1103 * hc-name string <name>
1104 * hc-id string <id>
1105 *
1106 * Note that auth and hc-id are optional members.
1107 */
1108
1109 #define HC_MAXPAIRS 20
1110 #define HC_MAXNAMELEN 50
1111
1112 static int
1113 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
1114 {
1115 if (version != FM_HC_SCHEME_VERSION) {
1116 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1117 return (0);
1118 }
1119
1120 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
1121 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
1122 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1123 return (0);
1124 }
1125
1126 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1127 (nvlist_t *)auth) != 0) {
1128 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1129 return (0);
1130 }
1131
1132 return (1);
1133 }
1134
1135 void
1136 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1137 nvlist_t *snvl, int npairs, ...)
1138 {
1139 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1140 nvlist_t *pairs[HC_MAXPAIRS];
1141 va_list ap;
1142 int i;
1143
1144 if (!fm_fmri_hc_set_common(fmri, version, auth))
1145 return;
1146
1147 npairs = MIN(npairs, HC_MAXPAIRS);
1148
1149 va_start(ap, npairs);
1150 for (i = 0; i < npairs; i++) {
1151 const char *name = va_arg(ap, const char *);
1152 uint32_t id = va_arg(ap, uint32_t);
1153 char idstr[11];
1154
1155 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1156
1157 pairs[i] = fm_nvlist_create(nva);
1158 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1159 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1160 atomic_inc_64(
1161 &erpt_kstat_data.fmri_set_failed.value.ui64);
1162 }
1163 }
1164 va_end(ap);
1165
1166 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
1167 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1168
1169 for (i = 0; i < npairs; i++)
1170 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1171
1172 if (snvl != NULL) {
1173 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1174 atomic_inc_64(
1175 &erpt_kstat_data.fmri_set_failed.value.ui64);
1176 }
1177 }
1178 }
1179
1180 void
1181 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
1182 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
1183 {
1184 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1185 nvlist_t *pairs[HC_MAXPAIRS];
1186 nvlist_t **hcl;
1187 uint_t n;
1188 int i, j;
1189 va_list ap;
1190 char *hcname, *hcid;
1191
1192 if (!fm_fmri_hc_set_common(fmri, version, auth))
1193 return;
1194
1195 /*
1196 * copy the bboard nvpairs to the pairs array
1197 */
1198 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
1199 != 0) {
1200 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1201 return;
1202 }
1203
1204 for (i = 0; i < n; i++) {
1205 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
1206 &hcname) != 0) {
1207 atomic_inc_64(
1208 &erpt_kstat_data.fmri_set_failed.value.ui64);
1209 return;
1210 }
1211 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
1212 atomic_inc_64(
1213 &erpt_kstat_data.fmri_set_failed.value.ui64);
1214 return;
1215 }
1216
1217 pairs[i] = fm_nvlist_create(nva);
1218 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
1219 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
1220 for (j = 0; j <= i; j++) {
1221 if (pairs[j] != NULL)
1222 fm_nvlist_destroy(pairs[j],
1223 FM_NVA_RETAIN);
1224 }
1225 atomic_inc_64(
1226 &erpt_kstat_data.fmri_set_failed.value.ui64);
1227 return;
1228 }
1229 }
1230
1231 /*
1232 * create the pairs from passed in pairs
1233 */
1234 npairs = MIN(npairs, HC_MAXPAIRS);
1235
1236 va_start(ap, npairs);
1237 for (i = n; i < npairs + n; i++) {
1238 const char *name = va_arg(ap, const char *);
1239 uint32_t id = va_arg(ap, uint32_t);
1240 char idstr[11];
1241 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1242 pairs[i] = fm_nvlist_create(nva);
1243 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1244 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1245 for (j = 0; j <= i; j++) {
1246 if (pairs[j] != NULL)
1247 fm_nvlist_destroy(pairs[j],
1248 FM_NVA_RETAIN);
1249 }
1250 atomic_inc_64(
1251 &erpt_kstat_data.fmri_set_failed.value.ui64);
1252 return;
1253 }
1254 }
1255 va_end(ap);
1256
1257 /*
1258 * Create the fmri hc list
1259 */
1260 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
1261 npairs + n) != 0) {
1262 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1263 return;
1264 }
1265
1266 for (i = 0; i < npairs + n; i++) {
1267 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1268 }
1269
1270 if (snvl != NULL) {
1271 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1272 atomic_inc_64(
1273 &erpt_kstat_data.fmri_set_failed.value.ui64);
1274 return;
1275 }
1276 }
1277 }
1278
1279 /*
1280 * Set-up and validate the members of an dev fmri according to:
1281 *
1282 * Member name Type Value
1283 * ====================================================
1284 * version uint8_t 0
1285 * auth nvlist_t <auth>
1286 * devpath string <devpath>
1287 * [devid] string <devid>
1288 * [target-port-l0id] string <target-port-lun0-id>
1289 *
1290 * Note that auth and devid are optional members.
1291 */
1292 void
1293 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1294 const char *devpath, const char *devid, const char *tpl0)
1295 {
1296 int err = 0;
1297
1298 if (version != DEV_SCHEME_VERSION0) {
1299 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1300 return;
1301 }
1302
1303 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1304 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1305
1306 if (auth != NULL) {
1307 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1308 (nvlist_t *)auth);
1309 }
1310
1311 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1312
1313 if (devid != NULL)
1314 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1315
1316 if (tpl0 != NULL)
1317 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1318
1319 if (err)
1320 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1321
1322 }
1323
1324 /*
1325 * Set-up and validate the members of an cpu fmri according to:
1326 *
1327 * Member name Type Value
1328 * ====================================================
1329 * version uint8_t 0
1330 * auth nvlist_t <auth>
1331 * cpuid uint32_t <cpu_id>
1332 * cpumask uint8_t <cpu_mask>
1333 * serial uint64_t <serial_id>
1334 *
1335 * Note that auth, cpumask, serial are optional members.
1336 *
1337 */
1338 void
1339 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1340 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1341 {
1342 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1343
1344 if (version < CPU_SCHEME_VERSION1) {
1345 atomic_inc_64(failedp);
1346 return;
1347 }
1348
1349 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1350 atomic_inc_64(failedp);
1351 return;
1352 }
1353
1354 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1355 FM_FMRI_SCHEME_CPU) != 0) {
1356 atomic_inc_64(failedp);
1357 return;
1358 }
1359
1360 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1361 (nvlist_t *)auth) != 0)
1362 atomic_inc_64(failedp);
1363
1364 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1365 atomic_inc_64(failedp);
1366
1367 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1368 *cpu_maskp) != 0)
1369 atomic_inc_64(failedp);
1370
1371 if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1372 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1373 atomic_inc_64(failedp);
1374 }
1375
1376 /*
1377 * Set-up and validate the members of a mem according to:
1378 *
1379 * Member name Type Value
1380 * ====================================================
1381 * version uint8_t 0
1382 * auth nvlist_t <auth> [optional]
1383 * unum string <unum>
1384 * serial string <serial> [optional*]
1385 * offset uint64_t <offset> [optional]
1386 *
1387 * * serial is required if offset is present
1388 */
1389 void
1390 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1391 const char *unum, const char *serial, uint64_t offset)
1392 {
1393 if (version != MEM_SCHEME_VERSION0) {
1394 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1395 return;
1396 }
1397
1398 if (!serial && (offset != (uint64_t)-1)) {
1399 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1400 return;
1401 }
1402
1403 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1404 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1405 return;
1406 }
1407
1408 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1409 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1410 return;
1411 }
1412
1413 if (auth != NULL) {
1414 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1415 (nvlist_t *)auth) != 0) {
1416 atomic_inc_64(
1417 &erpt_kstat_data.fmri_set_failed.value.ui64);
1418 }
1419 }
1420
1421 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1422 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1423 }
1424
1425 if (serial != NULL) {
1426 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1427 (char **)&serial, 1) != 0) {
1428 atomic_inc_64(
1429 &erpt_kstat_data.fmri_set_failed.value.ui64);
1430 }
1431 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1432 FM_FMRI_MEM_OFFSET, offset) != 0) {
1433 atomic_inc_64(
1434 &erpt_kstat_data.fmri_set_failed.value.ui64);
1435 }
1436 }
1437 }
1438
1439 void
1440 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1441 uint64_t vdev_guid)
1442 {
1443 if (version != ZFS_SCHEME_VERSION0) {
1444 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1445 return;
1446 }
1447
1448 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1449 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1450 return;
1451 }
1452
1453 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1454 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1455 return;
1456 }
1457
1458 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1459 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1460 }
1461
1462 if (vdev_guid != 0) {
1463 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1464 atomic_inc_64(
1465 &erpt_kstat_data.fmri_set_failed.value.ui64);
1466 }
1467 }
1468 }
1469
1470 uint64_t
1471 fm_ena_increment(uint64_t ena)
1472 {
1473 uint64_t new_ena;
1474
1475 switch (ENA_FORMAT(ena)) {
1476 case FM_ENA_FMT1:
1477 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1478 break;
1479 case FM_ENA_FMT2:
1480 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1481 break;
1482 default:
1483 new_ena = 0;
1484 }
1485
1486 return (new_ena);
1487 }
1488
1489 uint64_t
1490 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1491 {
1492 uint64_t ena = 0;
1493
1494 switch (format) {
1495 case FM_ENA_FMT1:
1496 if (timestamp) {
1497 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1498 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1499 ENA_FMT1_CPUID_MASK) |
1500 ((timestamp << ENA_FMT1_TIME_SHFT) &
1501 ENA_FMT1_TIME_MASK));
1502 } else {
1503 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1504 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1505 ENA_FMT1_CPUID_MASK) |
1506 ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1507 ENA_FMT1_TIME_MASK));
1508 }
1509 break;
1510 case FM_ENA_FMT2:
1511 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1512 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1513 break;
1514 default:
1515 break;
1516 }
1517
1518 return (ena);
1519 }
1520
1521 uint64_t
1522 fm_ena_generate(uint64_t timestamp, uchar_t format)
1523 {
1524 uint64_t ena;
1525
1526 kpreempt_disable();
1527 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1528 kpreempt_enable();
1529
1530 return (ena);
1531 }
1532
1533 uint64_t
1534 fm_ena_generation_get(uint64_t ena)
1535 {
1536 uint64_t gen;
1537
1538 switch (ENA_FORMAT(ena)) {
1539 case FM_ENA_FMT1:
1540 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1541 break;
1542 case FM_ENA_FMT2:
1543 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1544 break;
1545 default:
1546 gen = 0;
1547 break;
1548 }
1549
1550 return (gen);
1551 }
1552
1553 uchar_t
1554 fm_ena_format_get(uint64_t ena)
1555 {
1556
1557 return (ENA_FORMAT(ena));
1558 }
1559
1560 uint64_t
1561 fm_ena_id_get(uint64_t ena)
1562 {
1563 uint64_t id;
1564
1565 switch (ENA_FORMAT(ena)) {
1566 case FM_ENA_FMT1:
1567 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1568 break;
1569 case FM_ENA_FMT2:
1570 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1571 break;
1572 default:
1573 id = 0;
1574 }
1575
1576 return (id);
1577 }
1578
1579 uint64_t
1580 fm_ena_time_get(uint64_t ena)
1581 {
1582 uint64_t time;
1583
1584 switch (ENA_FORMAT(ena)) {
1585 case FM_ENA_FMT1:
1586 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1587 break;
1588 case FM_ENA_FMT2:
1589 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1590 break;
1591 default:
1592 time = 0;
1593 }
1594
1595 return (time);
1596 }
1597
1598 #ifdef _KERNEL
1599 /*
1600 * Helper function to increment ereport dropped count. Used by the event
1601 * rate limiting code to give feedback to the user about how many events were
1602 * rate limited by including them in the 'dropped' count.
1603 */
1604 void
1605 fm_erpt_dropped_increment(void)
1606 {
1607 atomic_inc_64(&ratelimit_dropped);
1608 }
1609 #endif
1610
1611 #ifdef _KERNEL
1612 void
1613 fm_init(void)
1614 {
1615 zevent_len_cur = 0;
1616 zevent_flags = 0;
1617
1618 if (zfs_zevent_len_max == 0)
1619 zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
1620
1621 /* Initialize zevent allocation and generation kstats */
1622 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1623 sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1624 KSTAT_FLAG_VIRTUAL);
1625
1626 if (fm_ksp != NULL) {
1627 fm_ksp->ks_data = &erpt_kstat_data;
1628 kstat_install(fm_ksp);
1629 } else {
1630 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1631 }
1632
1633 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1634 list_create(&zevent_list, sizeof (zevent_t),
1635 offsetof(zevent_t, ev_node));
1636 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1637 }
1638
1639 void
1640 fm_fini(void)
1641 {
1642 int count;
1643
1644 zfs_zevent_drain_all(&count);
1645
1646 mutex_enter(&zevent_lock);
1647 cv_broadcast(&zevent_cv);
1648
1649 zevent_flags |= ZEVENT_SHUTDOWN;
1650 while (zevent_waiters > 0) {
1651 mutex_exit(&zevent_lock);
1652 schedule();
1653 mutex_enter(&zevent_lock);
1654 }
1655 mutex_exit(&zevent_lock);
1656
1657 cv_destroy(&zevent_cv);
1658 list_destroy(&zevent_list);
1659 mutex_destroy(&zevent_lock);
1660
1661 if (fm_ksp != NULL) {
1662 kstat_delete(fm_ksp);
1663 fm_ksp = NULL;
1664 }
1665 }
1666
1667 module_param(zfs_zevent_len_max, int, 0644);
1668 MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length");
1669
1670 module_param(zfs_zevent_cols, int, 0644);
1671 MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width");
1672
1673 module_param(zfs_zevent_console, int, 0644);
1674 MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console");
1675
1676 #endif /* _KERNEL */