]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/fm.c
nvpair: Constify string functions
[mirror_zfs.git] / module / zfs / fm.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Fault Management Architecture (FMA) Resource and Protocol Support
27 *
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30 *
31 * Name-Value Pair Lists
32 *
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
39 *
40 * Protocol Event and FMRI Construction
41 *
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45 *
46 * ENA Manipulation
47 *
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
52 */
53
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
66 #ifdef _KERNEL
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/zfs_ioctl.h>
70
71 static uint_t zfs_zevent_len_max = 512;
72
73 static uint_t zevent_len_cur = 0;
74 static int zevent_waiters = 0;
75 static int zevent_flags = 0;
76
77 /* Num events rate limited since the last time zfs_zevent_next() was called */
78 static uint64_t ratelimit_dropped = 0;
79
80 /*
81 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
82 * posted. The posted EIDs are monotonically increasing but not persistent.
83 * They will be reset to the initial value (1) each time the kernel module is
84 * loaded.
85 */
86 static uint64_t zevent_eid = 0;
87
88 static kmutex_t zevent_lock;
89 static list_t zevent_list;
90 static kcondvar_t zevent_cv;
91 #endif /* _KERNEL */
92
93
94 /*
95 * Common fault management kstats to record event generation failures
96 */
97
98 struct erpt_kstat {
99 kstat_named_t erpt_dropped; /* num erpts dropped on post */
100 kstat_named_t erpt_set_failed; /* num erpt set failures */
101 kstat_named_t fmri_set_failed; /* num fmri set failures */
102 kstat_named_t payload_set_failed; /* num payload set failures */
103 kstat_named_t erpt_duplicates; /* num duplicate erpts */
104 };
105
106 static struct erpt_kstat erpt_kstat_data = {
107 { "erpt-dropped", KSTAT_DATA_UINT64 },
108 { "erpt-set-failed", KSTAT_DATA_UINT64 },
109 { "fmri-set-failed", KSTAT_DATA_UINT64 },
110 { "payload-set-failed", KSTAT_DATA_UINT64 },
111 { "erpt-duplicates", KSTAT_DATA_UINT64 }
112 };
113
114 kstat_t *fm_ksp;
115
116 #ifdef _KERNEL
117
118 static zevent_t *
119 zfs_zevent_alloc(void)
120 {
121 zevent_t *ev;
122
123 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
124
125 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
126 offsetof(zfs_zevent_t, ze_node));
127 list_link_init(&ev->ev_node);
128
129 return (ev);
130 }
131
132 static void
133 zfs_zevent_free(zevent_t *ev)
134 {
135 /* Run provided cleanup callback */
136 ev->ev_cb(ev->ev_nvl, ev->ev_detector);
137
138 list_destroy(&ev->ev_ze_list);
139 kmem_free(ev, sizeof (zevent_t));
140 }
141
142 static void
143 zfs_zevent_drain(zevent_t *ev)
144 {
145 zfs_zevent_t *ze;
146
147 ASSERT(MUTEX_HELD(&zevent_lock));
148 list_remove(&zevent_list, ev);
149
150 /* Remove references to this event in all private file data */
151 while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
152 list_remove(&ev->ev_ze_list, ze);
153 ze->ze_zevent = NULL;
154 ze->ze_dropped++;
155 }
156
157 zfs_zevent_free(ev);
158 }
159
160 void
161 zfs_zevent_drain_all(uint_t *count)
162 {
163 zevent_t *ev;
164
165 mutex_enter(&zevent_lock);
166 while ((ev = list_head(&zevent_list)) != NULL)
167 zfs_zevent_drain(ev);
168
169 *count = zevent_len_cur;
170 zevent_len_cur = 0;
171 mutex_exit(&zevent_lock);
172 }
173
174 /*
175 * New zevents are inserted at the head. If the maximum queue
176 * length is exceeded a zevent will be drained from the tail.
177 * As part of this any user space processes which currently have
178 * a reference to this zevent_t in their private data will have
179 * this reference set to NULL.
180 */
181 static void
182 zfs_zevent_insert(zevent_t *ev)
183 {
184 ASSERT(MUTEX_HELD(&zevent_lock));
185 list_insert_head(&zevent_list, ev);
186
187 if (zevent_len_cur >= zfs_zevent_len_max)
188 zfs_zevent_drain(list_tail(&zevent_list));
189 else
190 zevent_len_cur++;
191 }
192
193 /*
194 * Post a zevent. The cb will be called when nvl and detector are no longer
195 * needed, i.e.:
196 * - An error happened and a zevent can't be posted. In this case, cb is called
197 * before zfs_zevent_post() returns.
198 * - The event is being drained and freed.
199 */
200 int
201 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
202 {
203 inode_timespec_t tv;
204 int64_t tv_array[2];
205 uint64_t eid;
206 size_t nvl_size = 0;
207 zevent_t *ev;
208 int error;
209
210 ASSERT(cb != NULL);
211
212 gethrestime(&tv);
213 tv_array[0] = tv.tv_sec;
214 tv_array[1] = tv.tv_nsec;
215
216 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
217 if (error) {
218 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
219 goto out;
220 }
221
222 eid = atomic_inc_64_nv(&zevent_eid);
223 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
224 if (error) {
225 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
226 goto out;
227 }
228
229 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
230 if (error) {
231 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
232 goto out;
233 }
234
235 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
236 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
237 error = EOVERFLOW;
238 goto out;
239 }
240
241 ev = zfs_zevent_alloc();
242 if (ev == NULL) {
243 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
244 error = ENOMEM;
245 goto out;
246 }
247
248 ev->ev_nvl = nvl;
249 ev->ev_detector = detector;
250 ev->ev_cb = cb;
251 ev->ev_eid = eid;
252
253 mutex_enter(&zevent_lock);
254 zfs_zevent_insert(ev);
255 cv_broadcast(&zevent_cv);
256 mutex_exit(&zevent_lock);
257
258 out:
259 if (error)
260 cb(nvl, detector);
261
262 return (error);
263 }
264
265 void
266 zfs_zevent_track_duplicate(void)
267 {
268 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
269 }
270
271 static int
272 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
273 {
274 *ze = zfsdev_get_state(minor, ZST_ZEVENT);
275 if (*ze == NULL)
276 return (SET_ERROR(EBADF));
277
278 return (0);
279 }
280
281 zfs_file_t *
282 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
283 {
284 zfs_file_t *fp = zfs_file_get(fd);
285 if (fp == NULL)
286 return (NULL);
287
288 int error = zfsdev_getminor(fp, minorp);
289 if (error == 0)
290 error = zfs_zevent_minor_to_state(*minorp, ze);
291
292 if (error) {
293 zfs_zevent_fd_rele(fp);
294 fp = NULL;
295 }
296
297 return (fp);
298 }
299
300 void
301 zfs_zevent_fd_rele(zfs_file_t *fp)
302 {
303 zfs_file_put(fp);
304 }
305
306 /*
307 * Get the next zevent in the stream and place a copy in 'event'. This
308 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
309 * 'event_size'. In this case the stream pointer is not advanced and
310 * and 'event_size' is set to the minimum required buffer size.
311 */
312 int
313 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
314 uint64_t *dropped)
315 {
316 zevent_t *ev;
317 size_t size;
318 int error = 0;
319
320 mutex_enter(&zevent_lock);
321 if (ze->ze_zevent == NULL) {
322 /* New stream start at the beginning/tail */
323 ev = list_tail(&zevent_list);
324 if (ev == NULL) {
325 error = ENOENT;
326 goto out;
327 }
328 } else {
329 /*
330 * Existing stream continue with the next element and remove
331 * ourselves from the wait queue for the previous element
332 */
333 ev = list_prev(&zevent_list, ze->ze_zevent);
334 if (ev == NULL) {
335 error = ENOENT;
336 goto out;
337 }
338 }
339
340 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
341 if (size > *event_size) {
342 *event_size = size;
343 error = ENOMEM;
344 goto out;
345 }
346
347 if (ze->ze_zevent)
348 list_remove(&ze->ze_zevent->ev_ze_list, ze);
349
350 ze->ze_zevent = ev;
351 list_insert_head(&ev->ev_ze_list, ze);
352 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
353 *dropped = ze->ze_dropped;
354
355 #ifdef _KERNEL
356 /* Include events dropped due to rate limiting */
357 *dropped += atomic_swap_64(&ratelimit_dropped, 0);
358 #endif
359 ze->ze_dropped = 0;
360 out:
361 mutex_exit(&zevent_lock);
362
363 return (error);
364 }
365
366 /*
367 * Wait in an interruptible state for any new events.
368 */
369 int
370 zfs_zevent_wait(zfs_zevent_t *ze)
371 {
372 int error = EAGAIN;
373
374 mutex_enter(&zevent_lock);
375 zevent_waiters++;
376
377 while (error == EAGAIN) {
378 if (zevent_flags & ZEVENT_SHUTDOWN) {
379 error = SET_ERROR(ESHUTDOWN);
380 break;
381 }
382
383 if (cv_wait_sig(&zevent_cv, &zevent_lock) == 0) {
384 error = SET_ERROR(EINTR);
385 break;
386 } else if (!list_is_empty(&zevent_list)) {
387 error = 0;
388 continue;
389 } else {
390 error = EAGAIN;
391 }
392 }
393
394 zevent_waiters--;
395 mutex_exit(&zevent_lock);
396
397 return (error);
398 }
399
400 /*
401 * The caller may seek to a specific EID by passing that EID. If the EID
402 * is still available in the posted list of events the cursor is positioned
403 * there. Otherwise ENOENT is returned and the cursor is not moved.
404 *
405 * There are two reserved EIDs which may be passed and will never fail.
406 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
407 * ZEVENT_SEEK_END positions the cursor at the end of the list.
408 */
409 int
410 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
411 {
412 zevent_t *ev;
413 int error = 0;
414
415 mutex_enter(&zevent_lock);
416
417 if (eid == ZEVENT_SEEK_START) {
418 if (ze->ze_zevent)
419 list_remove(&ze->ze_zevent->ev_ze_list, ze);
420
421 ze->ze_zevent = NULL;
422 goto out;
423 }
424
425 if (eid == ZEVENT_SEEK_END) {
426 if (ze->ze_zevent)
427 list_remove(&ze->ze_zevent->ev_ze_list, ze);
428
429 ev = list_head(&zevent_list);
430 if (ev) {
431 ze->ze_zevent = ev;
432 list_insert_head(&ev->ev_ze_list, ze);
433 } else {
434 ze->ze_zevent = NULL;
435 }
436
437 goto out;
438 }
439
440 for (ev = list_tail(&zevent_list); ev != NULL;
441 ev = list_prev(&zevent_list, ev)) {
442 if (ev->ev_eid == eid) {
443 if (ze->ze_zevent)
444 list_remove(&ze->ze_zevent->ev_ze_list, ze);
445
446 ze->ze_zevent = ev;
447 list_insert_head(&ev->ev_ze_list, ze);
448 break;
449 }
450 }
451
452 if (ev == NULL)
453 error = ENOENT;
454
455 out:
456 mutex_exit(&zevent_lock);
457
458 return (error);
459 }
460
461 void
462 zfs_zevent_init(zfs_zevent_t **zep)
463 {
464 zfs_zevent_t *ze;
465
466 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
467 list_link_init(&ze->ze_node);
468 }
469
470 void
471 zfs_zevent_destroy(zfs_zevent_t *ze)
472 {
473 mutex_enter(&zevent_lock);
474 if (ze->ze_zevent)
475 list_remove(&ze->ze_zevent->ev_ze_list, ze);
476 mutex_exit(&zevent_lock);
477
478 kmem_free(ze, sizeof (zfs_zevent_t));
479 }
480 #endif /* _KERNEL */
481
482 /*
483 * Wrappers for FM nvlist allocators
484 */
485 static void *
486 i_fm_alloc(nv_alloc_t *nva, size_t size)
487 {
488 (void) nva;
489 return (kmem_alloc(size, KM_SLEEP));
490 }
491
492 static void
493 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
494 {
495 (void) nva;
496 kmem_free(buf, size);
497 }
498
499 static const nv_alloc_ops_t fm_mem_alloc_ops = {
500 .nv_ao_init = NULL,
501 .nv_ao_fini = NULL,
502 .nv_ao_alloc = i_fm_alloc,
503 .nv_ao_free = i_fm_free,
504 .nv_ao_reset = NULL
505 };
506
507 /*
508 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
509 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
510 * is returned to indicate that the nv_alloc structure could not be created.
511 */
512 nv_alloc_t *
513 fm_nva_xcreate(char *buf, size_t bufsz)
514 {
515 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
516
517 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
518 kmem_free(nvhdl, sizeof (nv_alloc_t));
519 return (NULL);
520 }
521
522 return (nvhdl);
523 }
524
525 /*
526 * Destroy a previously allocated nv_alloc structure. The fixed buffer
527 * associated with nva must be freed by the caller.
528 */
529 void
530 fm_nva_xdestroy(nv_alloc_t *nva)
531 {
532 nv_alloc_fini(nva);
533 kmem_free(nva, sizeof (nv_alloc_t));
534 }
535
536 /*
537 * Create a new nv list. A pointer to a new nv list structure is returned
538 * upon success or NULL is returned to indicate that the structure could
539 * not be created. The newly created nv list is created and managed by the
540 * operations installed in nva. If nva is NULL, the default FMA nva
541 * operations are installed and used.
542 *
543 * When called from the kernel and nva == NULL, this function must be called
544 * from passive kernel context with no locks held that can prevent a
545 * sleeping memory allocation from occurring. Otherwise, this function may
546 * be called from other kernel contexts as long a valid nva created via
547 * fm_nva_create() is supplied.
548 */
549 nvlist_t *
550 fm_nvlist_create(nv_alloc_t *nva)
551 {
552 int hdl_alloced = 0;
553 nvlist_t *nvl;
554 nv_alloc_t *nvhdl;
555
556 if (nva == NULL) {
557 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
558
559 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
560 kmem_free(nvhdl, sizeof (nv_alloc_t));
561 return (NULL);
562 }
563 hdl_alloced = 1;
564 } else {
565 nvhdl = nva;
566 }
567
568 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
569 if (hdl_alloced) {
570 nv_alloc_fini(nvhdl);
571 kmem_free(nvhdl, sizeof (nv_alloc_t));
572 }
573 return (NULL);
574 }
575
576 return (nvl);
577 }
578
579 /*
580 * Destroy a previously allocated nvlist structure. flag indicates whether
581 * or not the associated nva structure should be freed (FM_NVA_FREE) or
582 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
583 * it to be re-used for future nvlist creation operations.
584 */
585 void
586 fm_nvlist_destroy(nvlist_t *nvl, int flag)
587 {
588 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
589
590 nvlist_free(nvl);
591
592 if (nva != NULL) {
593 if (flag == FM_NVA_FREE)
594 fm_nva_xdestroy(nva);
595 }
596 }
597
598 int
599 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
600 {
601 int nelem, ret = 0;
602 data_type_t type;
603
604 while (ret == 0 && name != NULL) {
605 type = va_arg(ap, data_type_t);
606 switch (type) {
607 case DATA_TYPE_BYTE:
608 ret = nvlist_add_byte(payload, name,
609 va_arg(ap, uint_t));
610 break;
611 case DATA_TYPE_BYTE_ARRAY:
612 nelem = va_arg(ap, int);
613 ret = nvlist_add_byte_array(payload, name,
614 va_arg(ap, uchar_t *), nelem);
615 break;
616 case DATA_TYPE_BOOLEAN_VALUE:
617 ret = nvlist_add_boolean_value(payload, name,
618 va_arg(ap, boolean_t));
619 break;
620 case DATA_TYPE_BOOLEAN_ARRAY:
621 nelem = va_arg(ap, int);
622 ret = nvlist_add_boolean_array(payload, name,
623 va_arg(ap, boolean_t *), nelem);
624 break;
625 case DATA_TYPE_INT8:
626 ret = nvlist_add_int8(payload, name,
627 va_arg(ap, int));
628 break;
629 case DATA_TYPE_INT8_ARRAY:
630 nelem = va_arg(ap, int);
631 ret = nvlist_add_int8_array(payload, name,
632 va_arg(ap, int8_t *), nelem);
633 break;
634 case DATA_TYPE_UINT8:
635 ret = nvlist_add_uint8(payload, name,
636 va_arg(ap, uint_t));
637 break;
638 case DATA_TYPE_UINT8_ARRAY:
639 nelem = va_arg(ap, int);
640 ret = nvlist_add_uint8_array(payload, name,
641 va_arg(ap, uint8_t *), nelem);
642 break;
643 case DATA_TYPE_INT16:
644 ret = nvlist_add_int16(payload, name,
645 va_arg(ap, int));
646 break;
647 case DATA_TYPE_INT16_ARRAY:
648 nelem = va_arg(ap, int);
649 ret = nvlist_add_int16_array(payload, name,
650 va_arg(ap, int16_t *), nelem);
651 break;
652 case DATA_TYPE_UINT16:
653 ret = nvlist_add_uint16(payload, name,
654 va_arg(ap, uint_t));
655 break;
656 case DATA_TYPE_UINT16_ARRAY:
657 nelem = va_arg(ap, int);
658 ret = nvlist_add_uint16_array(payload, name,
659 va_arg(ap, uint16_t *), nelem);
660 break;
661 case DATA_TYPE_INT32:
662 ret = nvlist_add_int32(payload, name,
663 va_arg(ap, int32_t));
664 break;
665 case DATA_TYPE_INT32_ARRAY:
666 nelem = va_arg(ap, int);
667 ret = nvlist_add_int32_array(payload, name,
668 va_arg(ap, int32_t *), nelem);
669 break;
670 case DATA_TYPE_UINT32:
671 ret = nvlist_add_uint32(payload, name,
672 va_arg(ap, uint32_t));
673 break;
674 case DATA_TYPE_UINT32_ARRAY:
675 nelem = va_arg(ap, int);
676 ret = nvlist_add_uint32_array(payload, name,
677 va_arg(ap, uint32_t *), nelem);
678 break;
679 case DATA_TYPE_INT64:
680 ret = nvlist_add_int64(payload, name,
681 va_arg(ap, int64_t));
682 break;
683 case DATA_TYPE_INT64_ARRAY:
684 nelem = va_arg(ap, int);
685 ret = nvlist_add_int64_array(payload, name,
686 va_arg(ap, int64_t *), nelem);
687 break;
688 case DATA_TYPE_UINT64:
689 ret = nvlist_add_uint64(payload, name,
690 va_arg(ap, uint64_t));
691 break;
692 case DATA_TYPE_UINT64_ARRAY:
693 nelem = va_arg(ap, int);
694 ret = nvlist_add_uint64_array(payload, name,
695 va_arg(ap, uint64_t *), nelem);
696 break;
697 case DATA_TYPE_STRING:
698 ret = nvlist_add_string(payload, name,
699 va_arg(ap, char *));
700 break;
701 case DATA_TYPE_STRING_ARRAY:
702 nelem = va_arg(ap, int);
703 ret = nvlist_add_string_array(payload, name,
704 va_arg(ap, const char **), nelem);
705 break;
706 case DATA_TYPE_NVLIST:
707 ret = nvlist_add_nvlist(payload, name,
708 va_arg(ap, nvlist_t *));
709 break;
710 case DATA_TYPE_NVLIST_ARRAY:
711 nelem = va_arg(ap, int);
712 ret = nvlist_add_nvlist_array(payload, name,
713 va_arg(ap, const nvlist_t **), nelem);
714 break;
715 default:
716 ret = EINVAL;
717 }
718
719 name = va_arg(ap, char *);
720 }
721 return (ret);
722 }
723
724 void
725 fm_payload_set(nvlist_t *payload, ...)
726 {
727 int ret;
728 const char *name;
729 va_list ap;
730
731 va_start(ap, payload);
732 name = va_arg(ap, char *);
733 ret = i_fm_payload_set(payload, name, ap);
734 va_end(ap);
735
736 if (ret)
737 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
738 }
739
740 /*
741 * Set-up and validate the members of an ereport event according to:
742 *
743 * Member name Type Value
744 * ====================================================
745 * class string ereport
746 * version uint8_t 0
747 * ena uint64_t <ena>
748 * detector nvlist_t <detector>
749 * ereport-payload nvlist_t <var args>
750 *
751 * We don't actually add a 'version' member to the payload. Really,
752 * the version quoted to us by our caller is that of the category 1
753 * "ereport" event class (and we require FM_EREPORT_VERS0) but
754 * the payload version of the actual leaf class event under construction
755 * may be something else. Callers should supply a version in the varargs,
756 * or (better) we could take two version arguments - one for the
757 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
758 * for the leaf class.
759 */
760 void
761 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
762 uint64_t ena, const nvlist_t *detector, ...)
763 {
764 char ereport_class[FM_MAX_CLASS];
765 const char *name;
766 va_list ap;
767 int ret;
768
769 if (version != FM_EREPORT_VERS0) {
770 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
771 return;
772 }
773
774 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
775 FM_EREPORT_CLASS, erpt_class);
776 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
777 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
778 return;
779 }
780
781 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
782 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
783 }
784
785 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
786 (nvlist_t *)detector) != 0) {
787 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
788 }
789
790 va_start(ap, detector);
791 name = va_arg(ap, const char *);
792 ret = i_fm_payload_set(ereport, name, ap);
793 va_end(ap);
794
795 if (ret)
796 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
797 }
798
799 /*
800 * Set-up and validate the members of an hc fmri according to;
801 *
802 * Member name Type Value
803 * ===================================================
804 * version uint8_t 0
805 * auth nvlist_t <auth>
806 * hc-name string <name>
807 * hc-id string <id>
808 *
809 * Note that auth and hc-id are optional members.
810 */
811
812 #define HC_MAXPAIRS 20
813 #define HC_MAXNAMELEN 50
814
815 static int
816 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
817 {
818 if (version != FM_HC_SCHEME_VERSION) {
819 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
820 return (0);
821 }
822
823 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
824 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
825 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
826 return (0);
827 }
828
829 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
830 (nvlist_t *)auth) != 0) {
831 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
832 return (0);
833 }
834
835 return (1);
836 }
837
838 void
839 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
840 nvlist_t *snvl, int npairs, ...)
841 {
842 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
843 nvlist_t *pairs[HC_MAXPAIRS];
844 va_list ap;
845 int i;
846
847 if (!fm_fmri_hc_set_common(fmri, version, auth))
848 return;
849
850 npairs = MIN(npairs, HC_MAXPAIRS);
851
852 va_start(ap, npairs);
853 for (i = 0; i < npairs; i++) {
854 const char *name = va_arg(ap, const char *);
855 uint32_t id = va_arg(ap, uint32_t);
856 char idstr[11];
857
858 (void) snprintf(idstr, sizeof (idstr), "%u", id);
859
860 pairs[i] = fm_nvlist_create(nva);
861 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
862 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
863 atomic_inc_64(
864 &erpt_kstat_data.fmri_set_failed.value.ui64);
865 }
866 }
867 va_end(ap);
868
869 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,
870 (const nvlist_t **)pairs, npairs) != 0) {
871 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
872 }
873
874 for (i = 0; i < npairs; i++)
875 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
876
877 if (snvl != NULL) {
878 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
879 atomic_inc_64(
880 &erpt_kstat_data.fmri_set_failed.value.ui64);
881 }
882 }
883 }
884
885 void
886 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
887 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
888 {
889 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
890 nvlist_t *pairs[HC_MAXPAIRS];
891 nvlist_t **hcl;
892 uint_t n;
893 int i, j;
894 va_list ap;
895 const char *hcname, *hcid;
896
897 if (!fm_fmri_hc_set_common(fmri, version, auth))
898 return;
899
900 /*
901 * copy the bboard nvpairs to the pairs array
902 */
903 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
904 != 0) {
905 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
906 return;
907 }
908
909 for (i = 0; i < n; i++) {
910 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
911 &hcname) != 0) {
912 atomic_inc_64(
913 &erpt_kstat_data.fmri_set_failed.value.ui64);
914 return;
915 }
916 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
917 atomic_inc_64(
918 &erpt_kstat_data.fmri_set_failed.value.ui64);
919 return;
920 }
921
922 pairs[i] = fm_nvlist_create(nva);
923 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
924 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
925 for (j = 0; j <= i; j++) {
926 if (pairs[j] != NULL)
927 fm_nvlist_destroy(pairs[j],
928 FM_NVA_RETAIN);
929 }
930 atomic_inc_64(
931 &erpt_kstat_data.fmri_set_failed.value.ui64);
932 return;
933 }
934 }
935
936 /*
937 * create the pairs from passed in pairs
938 */
939 npairs = MIN(npairs, HC_MAXPAIRS);
940
941 va_start(ap, npairs);
942 for (i = n; i < npairs + n; i++) {
943 const char *name = va_arg(ap, const char *);
944 uint32_t id = va_arg(ap, uint32_t);
945 char idstr[11];
946 (void) snprintf(idstr, sizeof (idstr), "%u", id);
947 pairs[i] = fm_nvlist_create(nva);
948 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
949 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
950 for (j = 0; j <= i; j++) {
951 if (pairs[j] != NULL)
952 fm_nvlist_destroy(pairs[j],
953 FM_NVA_RETAIN);
954 }
955 atomic_inc_64(
956 &erpt_kstat_data.fmri_set_failed.value.ui64);
957 va_end(ap);
958 return;
959 }
960 }
961 va_end(ap);
962
963 /*
964 * Create the fmri hc list
965 */
966 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,
967 (const nvlist_t **)pairs, npairs + n) != 0) {
968 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
969 return;
970 }
971
972 for (i = 0; i < npairs + n; i++) {
973 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
974 }
975
976 if (snvl != NULL) {
977 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
978 atomic_inc_64(
979 &erpt_kstat_data.fmri_set_failed.value.ui64);
980 return;
981 }
982 }
983 }
984
985 /*
986 * Set-up and validate the members of an dev fmri according to:
987 *
988 * Member name Type Value
989 * ====================================================
990 * version uint8_t 0
991 * auth nvlist_t <auth>
992 * devpath string <devpath>
993 * [devid] string <devid>
994 * [target-port-l0id] string <target-port-lun0-id>
995 *
996 * Note that auth and devid are optional members.
997 */
998 void
999 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1000 const char *devpath, const char *devid, const char *tpl0)
1001 {
1002 int err = 0;
1003
1004 if (version != DEV_SCHEME_VERSION0) {
1005 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1006 return;
1007 }
1008
1009 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1010 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1011
1012 if (auth != NULL) {
1013 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1014 (nvlist_t *)auth);
1015 }
1016
1017 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1018
1019 if (devid != NULL)
1020 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1021
1022 if (tpl0 != NULL)
1023 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1024
1025 if (err)
1026 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1027
1028 }
1029
1030 /*
1031 * Set-up and validate the members of an cpu fmri according to:
1032 *
1033 * Member name Type Value
1034 * ====================================================
1035 * version uint8_t 0
1036 * auth nvlist_t <auth>
1037 * cpuid uint32_t <cpu_id>
1038 * cpumask uint8_t <cpu_mask>
1039 * serial uint64_t <serial_id>
1040 *
1041 * Note that auth, cpumask, serial are optional members.
1042 *
1043 */
1044 void
1045 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1046 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1047 {
1048 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1049
1050 if (version < CPU_SCHEME_VERSION1) {
1051 atomic_inc_64(failedp);
1052 return;
1053 }
1054
1055 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1056 atomic_inc_64(failedp);
1057 return;
1058 }
1059
1060 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1061 FM_FMRI_SCHEME_CPU) != 0) {
1062 atomic_inc_64(failedp);
1063 return;
1064 }
1065
1066 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1067 (nvlist_t *)auth) != 0)
1068 atomic_inc_64(failedp);
1069
1070 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1071 atomic_inc_64(failedp);
1072
1073 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1074 *cpu_maskp) != 0)
1075 atomic_inc_64(failedp);
1076
1077 if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1078 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1079 atomic_inc_64(failedp);
1080 }
1081
1082 /*
1083 * Set-up and validate the members of a mem according to:
1084 *
1085 * Member name Type Value
1086 * ====================================================
1087 * version uint8_t 0
1088 * auth nvlist_t <auth> [optional]
1089 * unum string <unum>
1090 * serial string <serial> [optional*]
1091 * offset uint64_t <offset> [optional]
1092 *
1093 * * serial is required if offset is present
1094 */
1095 void
1096 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1097 const char *unum, const char *serial, uint64_t offset)
1098 {
1099 if (version != MEM_SCHEME_VERSION0) {
1100 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1101 return;
1102 }
1103
1104 if (!serial && (offset != (uint64_t)-1)) {
1105 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1106 return;
1107 }
1108
1109 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1110 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1111 return;
1112 }
1113
1114 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1115 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1116 return;
1117 }
1118
1119 if (auth != NULL) {
1120 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1121 (nvlist_t *)auth) != 0) {
1122 atomic_inc_64(
1123 &erpt_kstat_data.fmri_set_failed.value.ui64);
1124 }
1125 }
1126
1127 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1128 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1129 }
1130
1131 if (serial != NULL) {
1132 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1133 (const char **)&serial, 1) != 0) {
1134 atomic_inc_64(
1135 &erpt_kstat_data.fmri_set_failed.value.ui64);
1136 }
1137 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1138 FM_FMRI_MEM_OFFSET, offset) != 0) {
1139 atomic_inc_64(
1140 &erpt_kstat_data.fmri_set_failed.value.ui64);
1141 }
1142 }
1143 }
1144
1145 void
1146 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1147 uint64_t vdev_guid)
1148 {
1149 if (version != ZFS_SCHEME_VERSION0) {
1150 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1151 return;
1152 }
1153
1154 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1155 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1156 return;
1157 }
1158
1159 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1160 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1161 return;
1162 }
1163
1164 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1165 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1166 }
1167
1168 if (vdev_guid != 0) {
1169 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1170 atomic_inc_64(
1171 &erpt_kstat_data.fmri_set_failed.value.ui64);
1172 }
1173 }
1174 }
1175
1176 uint64_t
1177 fm_ena_increment(uint64_t ena)
1178 {
1179 uint64_t new_ena;
1180
1181 switch (ENA_FORMAT(ena)) {
1182 case FM_ENA_FMT1:
1183 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1184 break;
1185 case FM_ENA_FMT2:
1186 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1187 break;
1188 default:
1189 new_ena = 0;
1190 }
1191
1192 return (new_ena);
1193 }
1194
1195 uint64_t
1196 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1197 {
1198 uint64_t ena = 0;
1199
1200 switch (format) {
1201 case FM_ENA_FMT1:
1202 if (timestamp) {
1203 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1204 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1205 ENA_FMT1_CPUID_MASK) |
1206 ((timestamp << ENA_FMT1_TIME_SHFT) &
1207 ENA_FMT1_TIME_MASK));
1208 } else {
1209 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1210 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1211 ENA_FMT1_CPUID_MASK) |
1212 ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1213 ENA_FMT1_TIME_MASK));
1214 }
1215 break;
1216 case FM_ENA_FMT2:
1217 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1218 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1219 break;
1220 default:
1221 break;
1222 }
1223
1224 return (ena);
1225 }
1226
1227 uint64_t
1228 fm_ena_generate(uint64_t timestamp, uchar_t format)
1229 {
1230 uint64_t ena;
1231
1232 kpreempt_disable();
1233 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1234 kpreempt_enable();
1235
1236 return (ena);
1237 }
1238
1239 uint64_t
1240 fm_ena_generation_get(uint64_t ena)
1241 {
1242 uint64_t gen;
1243
1244 switch (ENA_FORMAT(ena)) {
1245 case FM_ENA_FMT1:
1246 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1247 break;
1248 case FM_ENA_FMT2:
1249 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1250 break;
1251 default:
1252 gen = 0;
1253 break;
1254 }
1255
1256 return (gen);
1257 }
1258
1259 uchar_t
1260 fm_ena_format_get(uint64_t ena)
1261 {
1262
1263 return (ENA_FORMAT(ena));
1264 }
1265
1266 uint64_t
1267 fm_ena_id_get(uint64_t ena)
1268 {
1269 uint64_t id;
1270
1271 switch (ENA_FORMAT(ena)) {
1272 case FM_ENA_FMT1:
1273 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1274 break;
1275 case FM_ENA_FMT2:
1276 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1277 break;
1278 default:
1279 id = 0;
1280 }
1281
1282 return (id);
1283 }
1284
1285 uint64_t
1286 fm_ena_time_get(uint64_t ena)
1287 {
1288 uint64_t time;
1289
1290 switch (ENA_FORMAT(ena)) {
1291 case FM_ENA_FMT1:
1292 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1293 break;
1294 case FM_ENA_FMT2:
1295 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1296 break;
1297 default:
1298 time = 0;
1299 }
1300
1301 return (time);
1302 }
1303
1304 #ifdef _KERNEL
1305 /*
1306 * Helper function to increment ereport dropped count. Used by the event
1307 * rate limiting code to give feedback to the user about how many events were
1308 * rate limited by including them in the 'dropped' count.
1309 */
1310 void
1311 fm_erpt_dropped_increment(void)
1312 {
1313 atomic_inc_64(&ratelimit_dropped);
1314 }
1315
1316 void
1317 fm_init(void)
1318 {
1319 zevent_len_cur = 0;
1320 zevent_flags = 0;
1321
1322 /* Initialize zevent allocation and generation kstats */
1323 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1324 sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1325 KSTAT_FLAG_VIRTUAL);
1326
1327 if (fm_ksp != NULL) {
1328 fm_ksp->ks_data = &erpt_kstat_data;
1329 kstat_install(fm_ksp);
1330 } else {
1331 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1332 }
1333
1334 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1335 list_create(&zevent_list, sizeof (zevent_t),
1336 offsetof(zevent_t, ev_node));
1337 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1338
1339 zfs_ereport_init();
1340 }
1341
1342 void
1343 fm_fini(void)
1344 {
1345 uint_t count;
1346
1347 zfs_ereport_fini();
1348
1349 zfs_zevent_drain_all(&count);
1350
1351 mutex_enter(&zevent_lock);
1352 cv_broadcast(&zevent_cv);
1353
1354 zevent_flags |= ZEVENT_SHUTDOWN;
1355 while (zevent_waiters > 0) {
1356 mutex_exit(&zevent_lock);
1357 kpreempt(KPREEMPT_SYNC);
1358 mutex_enter(&zevent_lock);
1359 }
1360 mutex_exit(&zevent_lock);
1361
1362 cv_destroy(&zevent_cv);
1363 list_destroy(&zevent_list);
1364 mutex_destroy(&zevent_lock);
1365
1366 if (fm_ksp != NULL) {
1367 kstat_delete(fm_ksp);
1368 fm_ksp = NULL;
1369 }
1370 }
1371 #endif /* _KERNEL */
1372
1373 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, UINT, ZMOD_RW,
1374 "Max event queue length");