]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/fm.c
ABD changes for vectorized RAIDZ
[mirror_zfs.git] / module / zfs / fm.c
CommitLineData
fa42225a
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
fa42225a
BB
23 */
24
25/*
26 * Fault Management Architecture (FMA) Resource and Protocol Support
27 *
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30 *
31 * Name-Value Pair Lists
32 *
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist construtor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
39 *
40 * Protocol Event and FMRI Construction
41 *
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45 *
46 * ENA Manipulation
47 *
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
52 */
53
54#include <sys/types.h>
55#include <sys/time.h>
26685276 56#include <sys/list.h>
fa42225a
BB
57#include <sys/nvpair.h>
58#include <sys/cmn_err.h>
fa42225a 59#include <sys/sysmacros.h>
fa42225a 60#include <sys/compress.h>
fa42225a
BB
61#include <sys/sunddi.h>
62#include <sys/systeminfo.h>
fa42225a
BB
63#include <sys/fm/util.h>
64#include <sys/fm/protocol.h>
26685276
BB
65#include <sys/kstat.h>
66#include <sys/zfs_context.h>
67#ifdef _KERNEL
68#include <sys/atomic.h>
69#include <sys/condvar.h>
70#include <sys/cpuvar.h>
71#include <sys/systm.h>
72#include <sys/dumphdr.h>
73#include <sys/cpuvar.h>
74#include <sys/console.h>
75#include <sys/kobj.h>
76#include <sys/time.h>
77#include <sys/zfs_ioctl.h>
fa42225a 78
c409e464
BB
79int zfs_zevent_len_max = 0;
80int zfs_zevent_cols = 80;
81int zfs_zevent_console = 0;
fa42225a 82
26685276
BB
83static int zevent_len_cur = 0;
84static int zevent_waiters = 0;
85static int zevent_flags = 0;
fa42225a 86
6078881a
TH
87/* Num events rate limited since the last time zfs_zevent_next() was called */
88static uint64_t ratelimit_dropped = 0;
89
a2f1945e
BB
90/*
91 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
92 * posted. The posted EIDs are monotonically increasing but not persistent.
93 * They will be reset to the initial value (1) each time the kernel module is
94 * loaded.
95 */
96static uint64_t zevent_eid = 0;
97
26685276
BB
98static kmutex_t zevent_lock;
99static list_t zevent_list;
100static kcondvar_t zevent_cv;
101#endif /* _KERNEL */
fa42225a 102
428870ff 103
fa42225a 104/*
26685276 105 * Common fault management kstats to record event generation failures
fa42225a
BB
106 */
107
108struct erpt_kstat {
109 kstat_named_t erpt_dropped; /* num erpts dropped on post */
110 kstat_named_t erpt_set_failed; /* num erpt set failures */
111 kstat_named_t fmri_set_failed; /* num fmri set failures */
112 kstat_named_t payload_set_failed; /* num payload set failures */
113};
114
115static struct erpt_kstat erpt_kstat_data = {
116 { "erpt-dropped", KSTAT_DATA_UINT64 },
117 { "erpt-set-failed", KSTAT_DATA_UINT64 },
118 { "fmri-set-failed", KSTAT_DATA_UINT64 },
119 { "payload-set-failed", KSTAT_DATA_UINT64 }
120};
121
26685276 122kstat_t *fm_ksp;
fa42225a 123
26685276 124#ifdef _KERNEL
fa42225a
BB
125
126/*
127 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
128 * output so they aren't split across console lines, and return the end column.
129 */
130/*PRINTFLIKE4*/
131static int
132fm_printf(int depth, int c, int cols, const char *format, ...)
133{
134 va_list ap;
135 int width;
136 char c1;
137
138 va_start(ap, format);
139 width = vsnprintf(&c1, sizeof (c1), format, ap);
140 va_end(ap);
141
142 if (c + width >= cols) {
26685276 143 console_printf("\n");
fa42225a
BB
144 c = 0;
145 if (format[0] != ' ' && depth > 0) {
146 console_printf(" ");
147 c++;
148 }
149 }
150
151 va_start(ap, format);
152 console_vprintf(format, ap);
153 va_end(ap);
154
155 return ((c + width) % cols);
156}
157
158/*
d5884c34 159 * Recursively print an nvlist in the specified column width and return the
fa42225a
BB
160 * column we end up in. This function is called recursively by fm_nvprint(),
161 * below. We generically format the entire nvpair using hexadecimal
162 * integers and strings, and elide any integer arrays. Arrays are basically
163 * used for cache dumps right now, so we suppress them so as not to overwhelm
164 * the amount of console output we produce at panic time. This can be further
165 * enhanced as FMA technology grows based upon the needs of consumers. All
166 * FMA telemetry is logged using the dump device transport, so the console
167 * output serves only as a fallback in case this procedure is unsuccessful.
168 */
169static int
170fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
171{
172 nvpair_t *nvp;
173
174 for (nvp = nvlist_next_nvpair(nvl, NULL);
175 nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
176
177 data_type_t type = nvpair_type(nvp);
178 const char *name = nvpair_name(nvp);
179
180 boolean_t b;
181 uint8_t i8;
182 uint16_t i16;
183 uint32_t i32;
184 uint64_t i64;
185 char *str;
186 nvlist_t *cnv;
187
188 if (strcmp(name, FM_CLASS) == 0)
189 continue; /* already printed by caller */
190
191 c = fm_printf(d, c, cols, " %s=", name);
192
193 switch (type) {
194 case DATA_TYPE_BOOLEAN:
195 c = fm_printf(d + 1, c, cols, " 1");
196 break;
197
198 case DATA_TYPE_BOOLEAN_VALUE:
199 (void) nvpair_value_boolean_value(nvp, &b);
200 c = fm_printf(d + 1, c, cols, b ? "1" : "0");
201 break;
202
203 case DATA_TYPE_BYTE:
204 (void) nvpair_value_byte(nvp, &i8);
26685276 205 c = fm_printf(d + 1, c, cols, "0x%x", i8);
fa42225a
BB
206 break;
207
208 case DATA_TYPE_INT8:
209 (void) nvpair_value_int8(nvp, (void *)&i8);
26685276 210 c = fm_printf(d + 1, c, cols, "0x%x", i8);
fa42225a
BB
211 break;
212
213 case DATA_TYPE_UINT8:
214 (void) nvpair_value_uint8(nvp, &i8);
26685276 215 c = fm_printf(d + 1, c, cols, "0x%x", i8);
fa42225a
BB
216 break;
217
218 case DATA_TYPE_INT16:
219 (void) nvpair_value_int16(nvp, (void *)&i16);
26685276 220 c = fm_printf(d + 1, c, cols, "0x%x", i16);
fa42225a
BB
221 break;
222
223 case DATA_TYPE_UINT16:
224 (void) nvpair_value_uint16(nvp, &i16);
26685276 225 c = fm_printf(d + 1, c, cols, "0x%x", i16);
fa42225a
BB
226 break;
227
228 case DATA_TYPE_INT32:
229 (void) nvpair_value_int32(nvp, (void *)&i32);
26685276 230 c = fm_printf(d + 1, c, cols, "0x%x", i32);
fa42225a
BB
231 break;
232
233 case DATA_TYPE_UINT32:
234 (void) nvpair_value_uint32(nvp, &i32);
26685276 235 c = fm_printf(d + 1, c, cols, "0x%x", i32);
fa42225a
BB
236 break;
237
238 case DATA_TYPE_INT64:
239 (void) nvpair_value_int64(nvp, (void *)&i64);
26685276 240 c = fm_printf(d + 1, c, cols, "0x%llx",
fa42225a
BB
241 (u_longlong_t)i64);
242 break;
243
244 case DATA_TYPE_UINT64:
245 (void) nvpair_value_uint64(nvp, &i64);
26685276 246 c = fm_printf(d + 1, c, cols, "0x%llx",
fa42225a
BB
247 (u_longlong_t)i64);
248 break;
249
250 case DATA_TYPE_HRTIME:
251 (void) nvpair_value_hrtime(nvp, (void *)&i64);
26685276 252 c = fm_printf(d + 1, c, cols, "0x%llx",
fa42225a
BB
253 (u_longlong_t)i64);
254 break;
255
256 case DATA_TYPE_STRING:
257 (void) nvpair_value_string(nvp, &str);
258 c = fm_printf(d + 1, c, cols, "\"%s\"",
259 str ? str : "<NULL>");
260 break;
261
262 case DATA_TYPE_NVLIST:
263 c = fm_printf(d + 1, c, cols, "[");
264 (void) nvpair_value_nvlist(nvp, &cnv);
265 c = fm_nvprintr(cnv, d + 1, c, cols);
266 c = fm_printf(d + 1, c, cols, " ]");
267 break;
268
269 case DATA_TYPE_NVLIST_ARRAY: {
270 nvlist_t **val;
271 uint_t i, nelem;
272
273 c = fm_printf(d + 1, c, cols, "[");
274 (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
275 for (i = 0; i < nelem; i++) {
276 c = fm_nvprintr(val[i], d + 1, c, cols);
277 }
278 c = fm_printf(d + 1, c, cols, " ]");
279 }
280 break;
281
26685276
BB
282 case DATA_TYPE_INT8_ARRAY: {
283 int8_t *val;
284 uint_t i, nelem;
285
286 c = fm_printf(d + 1, c, cols, "[ ");
287 (void) nvpair_value_int8_array(nvp, &val, &nelem);
288 for (i = 0; i < nelem; i++)
d1d7e268
MK
289 c = fm_printf(d + 1, c, cols, "0x%llx ",
290 (u_longlong_t)val[i]);
26685276
BB
291
292 c = fm_printf(d + 1, c, cols, "]");
293 break;
294 }
295
296 case DATA_TYPE_UINT8_ARRAY: {
297 uint8_t *val;
298 uint_t i, nelem;
299
300 c = fm_printf(d + 1, c, cols, "[ ");
301 (void) nvpair_value_uint8_array(nvp, &val, &nelem);
302 for (i = 0; i < nelem; i++)
d1d7e268
MK
303 c = fm_printf(d + 1, c, cols, "0x%llx ",
304 (u_longlong_t)val[i]);
26685276
BB
305
306 c = fm_printf(d + 1, c, cols, "]");
307 break;
308 }
309
310 case DATA_TYPE_INT16_ARRAY: {
311 int16_t *val;
312 uint_t i, nelem;
313
314 c = fm_printf(d + 1, c, cols, "[ ");
315 (void) nvpair_value_int16_array(nvp, &val, &nelem);
316 for (i = 0; i < nelem; i++)
d1d7e268
MK
317 c = fm_printf(d + 1, c, cols, "0x%llx ",
318 (u_longlong_t)val[i]);
26685276
BB
319
320 c = fm_printf(d + 1, c, cols, "]");
321 break;
322 }
323
324 case DATA_TYPE_UINT16_ARRAY: {
325 uint16_t *val;
326 uint_t i, nelem;
327
328 c = fm_printf(d + 1, c, cols, "[ ");
329 (void) nvpair_value_uint16_array(nvp, &val, &nelem);
330 for (i = 0; i < nelem; i++)
d1d7e268
MK
331 c = fm_printf(d + 1, c, cols, "0x%llx ",
332 (u_longlong_t)val[i]);
26685276
BB
333
334 c = fm_printf(d + 1, c, cols, "]");
335 break;
336 }
337
338 case DATA_TYPE_INT32_ARRAY: {
339 int32_t *val;
340 uint_t i, nelem;
341
342 c = fm_printf(d + 1, c, cols, "[ ");
343 (void) nvpair_value_int32_array(nvp, &val, &nelem);
344 for (i = 0; i < nelem; i++)
d1d7e268
MK
345 c = fm_printf(d + 1, c, cols, "0x%llx ",
346 (u_longlong_t)val[i]);
26685276
BB
347
348 c = fm_printf(d + 1, c, cols, "]");
349 break;
350 }
351
352 case DATA_TYPE_UINT32_ARRAY: {
353 uint32_t *val;
354 uint_t i, nelem;
355
356 c = fm_printf(d + 1, c, cols, "[ ");
357 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
358 for (i = 0; i < nelem; i++)
d1d7e268
MK
359 c = fm_printf(d + 1, c, cols, "0x%llx ",
360 (u_longlong_t)val[i]);
26685276
BB
361
362 c = fm_printf(d + 1, c, cols, "]");
363 break;
364 }
365
366 case DATA_TYPE_INT64_ARRAY: {
367 int64_t *val;
368 uint_t i, nelem;
369
370 c = fm_printf(d + 1, c, cols, "[ ");
371 (void) nvpair_value_int64_array(nvp, &val, &nelem);
372 for (i = 0; i < nelem; i++)
d1d7e268
MK
373 c = fm_printf(d + 1, c, cols, "0x%llx ",
374 (u_longlong_t)val[i]);
26685276
BB
375
376 c = fm_printf(d + 1, c, cols, "]");
377 break;
378 }
379
380 case DATA_TYPE_UINT64_ARRAY: {
381 uint64_t *val;
382 uint_t i, nelem;
383
384 c = fm_printf(d + 1, c, cols, "[ ");
385 (void) nvpair_value_uint64_array(nvp, &val, &nelem);
386 for (i = 0; i < nelem; i++)
d1d7e268
MK
387 c = fm_printf(d + 1, c, cols, "0x%llx ",
388 (u_longlong_t)val[i]);
26685276
BB
389
390 c = fm_printf(d + 1, c, cols, "]");
391 break;
392 }
393
394 case DATA_TYPE_STRING_ARRAY:
fa42225a
BB
395 case DATA_TYPE_BOOLEAN_ARRAY:
396 case DATA_TYPE_BYTE_ARRAY:
fa42225a
BB
397 c = fm_printf(d + 1, c, cols, "[...]");
398 break;
26685276 399
fa42225a
BB
400 case DATA_TYPE_UNKNOWN:
401 c = fm_printf(d + 1, c, cols, "<unknown>");
402 break;
403 }
404 }
405
406 return (c);
407}
408
409void
410fm_nvprint(nvlist_t *nvl)
411{
412 char *class;
413 int c = 0;
414
26685276 415 console_printf("\n");
fa42225a
BB
416
417 if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
c409e464 418 c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
fa42225a 419
c409e464 420 if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
fa42225a
BB
421 console_printf("\n");
422
423 console_printf("\n");
424}
425
26685276
BB
426static zevent_t *
427zfs_zevent_alloc(void)
428{
429 zevent_t *ev;
430
79c76d5b 431 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
26685276 432
d1d7e268 433 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
26685276
BB
434 offsetof(zfs_zevent_t, ze_node));
435 list_link_init(&ev->ev_node);
436
d1d7e268 437 return (ev);
26685276
BB
438}
439
440static void
441zfs_zevent_free(zevent_t *ev)
442{
443 /* Run provided cleanup callback */
444 ev->ev_cb(ev->ev_nvl, ev->ev_detector);
445
446 list_destroy(&ev->ev_ze_list);
d1d7e268 447 kmem_free(ev, sizeof (zevent_t));
26685276
BB
448}
449
450static void
451zfs_zevent_drain(zevent_t *ev)
452{
453 zfs_zevent_t *ze;
454
455 ASSERT(MUTEX_HELD(&zevent_lock));
456 list_remove(&zevent_list, ev);
457
458 /* Remove references to this event in all private file data */
459 while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
460 list_remove(&ev->ev_ze_list, ze);
461 ze->ze_zevent = NULL;
462 ze->ze_dropped++;
463 }
464
465 zfs_zevent_free(ev);
466}
467
fa42225a 468void
26685276 469zfs_zevent_drain_all(int *count)
fa42225a 470{
26685276 471 zevent_t *ev;
fa42225a 472
26685276
BB
473 mutex_enter(&zevent_lock);
474 while ((ev = list_head(&zevent_list)) != NULL)
475 zfs_zevent_drain(ev);
476
477 *count = zevent_len_cur;
478 zevent_len_cur = 0;
479 mutex_exit(&zevent_lock);
fa42225a
BB
480}
481
572e2857 482/*
26685276
BB
483 * New zevents are inserted at the head. If the maximum queue
484 * length is exceeded a zevent will be drained from the tail.
485 * As part of this any user space processes which currently have
486 * a reference to this zevent_t in their private data will have
487 * this reference set to NULL.
572e2857 488 */
26685276
BB
489static void
490zfs_zevent_insert(zevent_t *ev)
572e2857 491{
99db9bfd 492 ASSERT(MUTEX_HELD(&zevent_lock));
26685276 493 list_insert_head(&zevent_list, ev);
99db9bfd 494
c409e464 495 if (zevent_len_cur >= zfs_zevent_len_max)
26685276 496 zfs_zevent_drain(list_tail(&zevent_list));
572e2857 497 else
26685276 498 zevent_len_cur++;
572e2857
BB
499}
500
fa42225a 501/*
0426c168
IH
502 * Post a zevent. The cb will be called when nvl and detector are no longer
503 * needed, i.e.:
504 * - An error happened and a zevent can't be posted. In this case, cb is called
505 * before zfs_zevent_post() returns.
506 * - The event is being drained and freed.
fa42225a 507 */
0426c168 508int
26685276 509zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
fa42225a 510{
26685276
BB
511 int64_t tv_array[2];
512 timestruc_t tv;
a2f1945e 513 uint64_t eid;
26685276
BB
514 size_t nvl_size = 0;
515 zevent_t *ev;
0426c168
IH
516 int error;
517
518 ASSERT(cb != NULL);
fa42225a 519
26685276
BB
520 gethrestime(&tv);
521 tv_array[0] = tv.tv_sec;
522 tv_array[1] = tv.tv_nsec;
0426c168
IH
523
524 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
525 if (error) {
bc89ac84 526 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
0426c168 527 goto out;
26685276 528 }
fa42225a 529
a2f1945e 530 eid = atomic_inc_64_nv(&zevent_eid);
0426c168
IH
531 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
532 if (error) {
bc89ac84 533 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
0426c168
IH
534 goto out;
535 }
536
537 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
538 if (error) {
bc89ac84 539 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
0426c168 540 goto out;
a2f1945e
BB
541 }
542
26685276 543 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
bc89ac84 544 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
0426c168
IH
545 error = EOVERFLOW;
546 goto out;
fa42225a
BB
547 }
548
c409e464 549 if (zfs_zevent_console)
26685276 550 fm_nvprint(nvl);
fa42225a 551
26685276
BB
552 ev = zfs_zevent_alloc();
553 if (ev == NULL) {
bc89ac84 554 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
0426c168
IH
555 error = ENOMEM;
556 goto out;
26685276 557 }
fa42225a 558
d1d7e268 559 ev->ev_nvl = nvl;
26685276
BB
560 ev->ev_detector = detector;
561 ev->ev_cb = cb;
a2f1945e 562 ev->ev_eid = eid;
99db9bfd
BB
563
564 mutex_enter(&zevent_lock);
26685276
BB
565 zfs_zevent_insert(ev);
566 cv_broadcast(&zevent_cv);
99db9bfd 567 mutex_exit(&zevent_lock);
0426c168
IH
568
569out:
570 if (error)
571 cb(nvl, detector);
572
573 return (error);
26685276 574}
fa42225a 575
26685276
BB
576static int
577zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
578{
579 *ze = zfsdev_get_state(minor, ZST_ZEVENT);
580 if (*ze == NULL)
581 return (EBADF);
fa42225a 582
26685276
BB
583 return (0);
584}
fa42225a 585
26685276
BB
586int
587zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
588{
589 file_t *fp;
590 int error;
591
d1d7e268
MK
592 fp = getf(fd);
593 if (fp == NULL)
594 return (EBADF);
26685276 595
72540ea3
RY
596 error = zfsdev_getminor(fp->f_file, minorp);
597 if (error == 0)
598 error = zfs_zevent_minor_to_state(*minorp, ze);
26685276
BB
599
600 if (error)
601 zfs_zevent_fd_rele(fd);
602
603 return (error);
604}
605
606void
607zfs_zevent_fd_rele(int fd)
608{
609 releasef(fd);
fa42225a
BB
610}
611
612/*
baa40d45
BB
613 * Get the next zevent in the stream and place a copy in 'event'. This
614 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
615 * 'event_size'. In this case the stream pointer is not advanced and
616 * and 'event_size' is set to the minimum required buffer size.
fa42225a 617 */
26685276 618int
baa40d45 619zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
d1d7e268 620 uint64_t *dropped)
fa42225a 621{
26685276 622 zevent_t *ev;
baa40d45
BB
623 size_t size;
624 int error = 0;
26685276
BB
625
626 mutex_enter(&zevent_lock);
627 if (ze->ze_zevent == NULL) {
628 /* New stream start at the beginning/tail */
629 ev = list_tail(&zevent_list);
630 if (ev == NULL) {
631 error = ENOENT;
632 goto out;
633 }
fa42225a 634 } else {
d1d7e268
MK
635 /*
636 * Existing stream continue with the next element and remove
637 * ourselves from the wait queue for the previous element
638 */
26685276
BB
639 ev = list_prev(&zevent_list, ze->ze_zevent);
640 if (ev == NULL) {
641 error = ENOENT;
642 goto out;
643 }
baa40d45 644 }
26685276 645
baa40d45
BB
646 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
647 if (size > *event_size) {
648 *event_size = size;
649 error = ENOMEM;
650 goto out;
fa42225a
BB
651 }
652
baa40d45
BB
653 if (ze->ze_zevent)
654 list_remove(&ze->ze_zevent->ev_ze_list, ze);
655
26685276
BB
656 ze->ze_zevent = ev;
657 list_insert_head(&ev->ev_ze_list, ze);
aecdc706 658 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
26685276 659 *dropped = ze->ze_dropped;
6078881a
TH
660
661#ifdef _KERNEL
662 /* Include events dropped due to rate limiting */
663 *dropped += ratelimit_dropped;
664 ratelimit_dropped = 0;
665#endif
26685276
BB
666 ze->ze_dropped = 0;
667out:
668 mutex_exit(&zevent_lock);
fa42225a 669
d1d7e268 670 return (error);
26685276
BB
671}
672
673int
674zfs_zevent_wait(zfs_zevent_t *ze)
675{
676 int error = 0;
677
678 mutex_enter(&zevent_lock);
fa42225a 679
26685276
BB
680 if (zevent_flags & ZEVENT_SHUTDOWN) {
681 error = ESHUTDOWN;
682 goto out;
fa42225a
BB
683 }
684
26685276 685 zevent_waiters++;
b64ccd6c 686 cv_wait_sig(&zevent_cv, &zevent_lock);
26685276
BB
687 if (issig(JUSTLOOKING))
688 error = EINTR;
689
690 zevent_waiters--;
691out:
692 mutex_exit(&zevent_lock);
693
d1d7e268 694 return (error);
fa42225a
BB
695}
696
75e3ff58
BB
697/*
698 * The caller may seek to a specific EID by passing that EID. If the EID
699 * is still available in the posted list of events the cursor is positioned
700 * there. Otherwise ENOENT is returned and the cursor is not moved.
701 *
702 * There are two reserved EIDs which may be passed and will never fail.
703 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
704 * ZEVENT_SEEK_END positions the cursor at the end of the list.
705 */
706int
707zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
708{
709 zevent_t *ev;
710 int error = 0;
711
712 mutex_enter(&zevent_lock);
713
714 if (eid == ZEVENT_SEEK_START) {
715 if (ze->ze_zevent)
716 list_remove(&ze->ze_zevent->ev_ze_list, ze);
717
718 ze->ze_zevent = NULL;
719 goto out;
720 }
721
722 if (eid == ZEVENT_SEEK_END) {
723 if (ze->ze_zevent)
724 list_remove(&ze->ze_zevent->ev_ze_list, ze);
725
726 ev = list_head(&zevent_list);
727 if (ev) {
728 ze->ze_zevent = ev;
729 list_insert_head(&ev->ev_ze_list, ze);
730 } else {
731 ze->ze_zevent = NULL;
732 }
733
734 goto out;
735 }
736
737 for (ev = list_tail(&zevent_list); ev != NULL;
738 ev = list_prev(&zevent_list, ev)) {
739 if (ev->ev_eid == eid) {
740 if (ze->ze_zevent)
741 list_remove(&ze->ze_zevent->ev_ze_list, ze);
742
743 ze->ze_zevent = ev;
744 list_insert_head(&ev->ev_ze_list, ze);
745 break;
746 }
747 }
748
749 if (ev == NULL)
750 error = ENOENT;
751
752out:
753 mutex_exit(&zevent_lock);
754
755 return (error);
756}
757
fa42225a 758void
26685276 759zfs_zevent_init(zfs_zevent_t **zep)
fa42225a 760{
26685276 761 zfs_zevent_t *ze;
fa42225a 762
26685276
BB
763 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
764 list_link_init(&ze->ze_node);
765}
fa42225a 766
26685276
BB
767void
768zfs_zevent_destroy(zfs_zevent_t *ze)
769{
770 mutex_enter(&zevent_lock);
771 if (ze->ze_zevent)
772 list_remove(&ze->ze_zevent->ev_ze_list, ze);
773 mutex_exit(&zevent_lock);
fa42225a 774
26685276 775 kmem_free(ze, sizeof (zfs_zevent_t));
fa42225a 776}
26685276 777#endif /* _KERNEL */
fa42225a
BB
778
779/*
780 * Wrapppers for FM nvlist allocators
781 */
782/* ARGSUSED */
783static void *
784i_fm_alloc(nv_alloc_t *nva, size_t size)
785{
79c76d5b 786 return (kmem_zalloc(size, KM_SLEEP));
fa42225a
BB
787}
788
789/* ARGSUSED */
790static void
791i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
792{
793 kmem_free(buf, size);
794}
795
796const nv_alloc_ops_t fm_mem_alloc_ops = {
797 NULL,
798 NULL,
799 i_fm_alloc,
800 i_fm_free,
801 NULL
802};
803
804/*
805 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
806 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
807 * is returned to indicate that the nv_alloc structure could not be created.
808 */
809nv_alloc_t *
810fm_nva_xcreate(char *buf, size_t bufsz)
811{
812 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
813
814 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
815 kmem_free(nvhdl, sizeof (nv_alloc_t));
816 return (NULL);
817 }
818
819 return (nvhdl);
820}
821
822/*
823 * Destroy a previously allocated nv_alloc structure. The fixed buffer
824 * associated with nva must be freed by the caller.
825 */
826void
827fm_nva_xdestroy(nv_alloc_t *nva)
828{
829 nv_alloc_fini(nva);
830 kmem_free(nva, sizeof (nv_alloc_t));
831}
832
833/*
834 * Create a new nv list. A pointer to a new nv list structure is returned
835 * upon success or NULL is returned to indicate that the structure could
836 * not be created. The newly created nv list is created and managed by the
837 * operations installed in nva. If nva is NULL, the default FMA nva
838 * operations are installed and used.
839 *
840 * When called from the kernel and nva == NULL, this function must be called
841 * from passive kernel context with no locks held that can prevent a
842 * sleeping memory allocation from occurring. Otherwise, this function may
843 * be called from other kernel contexts as long a valid nva created via
844 * fm_nva_create() is supplied.
845 */
846nvlist_t *
847fm_nvlist_create(nv_alloc_t *nva)
848{
849 int hdl_alloced = 0;
850 nvlist_t *nvl;
851 nv_alloc_t *nvhdl;
852
853 if (nva == NULL) {
79c76d5b 854 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
fa42225a
BB
855
856 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
857 kmem_free(nvhdl, sizeof (nv_alloc_t));
858 return (NULL);
859 }
860 hdl_alloced = 1;
861 } else {
862 nvhdl = nva;
863 }
864
865 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
866 if (hdl_alloced) {
fa42225a 867 nv_alloc_fini(nvhdl);
572e2857 868 kmem_free(nvhdl, sizeof (nv_alloc_t));
fa42225a
BB
869 }
870 return (NULL);
871 }
872
873 return (nvl);
874}
875
876/*
877 * Destroy a previously allocated nvlist structure. flag indicates whether
878 * or not the associated nva structure should be freed (FM_NVA_FREE) or
879 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
880 * it to be re-used for future nvlist creation operations.
881 */
882void
883fm_nvlist_destroy(nvlist_t *nvl, int flag)
884{
885 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
886
887 nvlist_free(nvl);
888
889 if (nva != NULL) {
890 if (flag == FM_NVA_FREE)
891 fm_nva_xdestroy(nva);
892 }
893}
894
895int
896i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
897{
898 int nelem, ret = 0;
899 data_type_t type;
900
901 while (ret == 0 && name != NULL) {
902 type = va_arg(ap, data_type_t);
903 switch (type) {
904 case DATA_TYPE_BYTE:
905 ret = nvlist_add_byte(payload, name,
906 va_arg(ap, uint_t));
907 break;
908 case DATA_TYPE_BYTE_ARRAY:
909 nelem = va_arg(ap, int);
910 ret = nvlist_add_byte_array(payload, name,
911 va_arg(ap, uchar_t *), nelem);
912 break;
913 case DATA_TYPE_BOOLEAN_VALUE:
914 ret = nvlist_add_boolean_value(payload, name,
915 va_arg(ap, boolean_t));
916 break;
917 case DATA_TYPE_BOOLEAN_ARRAY:
918 nelem = va_arg(ap, int);
919 ret = nvlist_add_boolean_array(payload, name,
920 va_arg(ap, boolean_t *), nelem);
921 break;
922 case DATA_TYPE_INT8:
923 ret = nvlist_add_int8(payload, name,
924 va_arg(ap, int));
925 break;
926 case DATA_TYPE_INT8_ARRAY:
927 nelem = va_arg(ap, int);
928 ret = nvlist_add_int8_array(payload, name,
929 va_arg(ap, int8_t *), nelem);
930 break;
931 case DATA_TYPE_UINT8:
932 ret = nvlist_add_uint8(payload, name,
933 va_arg(ap, uint_t));
934 break;
935 case DATA_TYPE_UINT8_ARRAY:
936 nelem = va_arg(ap, int);
937 ret = nvlist_add_uint8_array(payload, name,
938 va_arg(ap, uint8_t *), nelem);
939 break;
940 case DATA_TYPE_INT16:
941 ret = nvlist_add_int16(payload, name,
942 va_arg(ap, int));
943 break;
944 case DATA_TYPE_INT16_ARRAY:
945 nelem = va_arg(ap, int);
946 ret = nvlist_add_int16_array(payload, name,
947 va_arg(ap, int16_t *), nelem);
948 break;
949 case DATA_TYPE_UINT16:
950 ret = nvlist_add_uint16(payload, name,
951 va_arg(ap, uint_t));
952 break;
953 case DATA_TYPE_UINT16_ARRAY:
954 nelem = va_arg(ap, int);
955 ret = nvlist_add_uint16_array(payload, name,
956 va_arg(ap, uint16_t *), nelem);
957 break;
958 case DATA_TYPE_INT32:
959 ret = nvlist_add_int32(payload, name,
960 va_arg(ap, int32_t));
961 break;
962 case DATA_TYPE_INT32_ARRAY:
963 nelem = va_arg(ap, int);
964 ret = nvlist_add_int32_array(payload, name,
965 va_arg(ap, int32_t *), nelem);
966 break;
967 case DATA_TYPE_UINT32:
968 ret = nvlist_add_uint32(payload, name,
969 va_arg(ap, uint32_t));
970 break;
971 case DATA_TYPE_UINT32_ARRAY:
972 nelem = va_arg(ap, int);
973 ret = nvlist_add_uint32_array(payload, name,
974 va_arg(ap, uint32_t *), nelem);
975 break;
976 case DATA_TYPE_INT64:
977 ret = nvlist_add_int64(payload, name,
978 va_arg(ap, int64_t));
979 break;
980 case DATA_TYPE_INT64_ARRAY:
981 nelem = va_arg(ap, int);
982 ret = nvlist_add_int64_array(payload, name,
983 va_arg(ap, int64_t *), nelem);
984 break;
985 case DATA_TYPE_UINT64:
986 ret = nvlist_add_uint64(payload, name,
987 va_arg(ap, uint64_t));
988 break;
989 case DATA_TYPE_UINT64_ARRAY:
990 nelem = va_arg(ap, int);
991 ret = nvlist_add_uint64_array(payload, name,
992 va_arg(ap, uint64_t *), nelem);
993 break;
994 case DATA_TYPE_STRING:
995 ret = nvlist_add_string(payload, name,
996 va_arg(ap, char *));
997 break;
998 case DATA_TYPE_STRING_ARRAY:
999 nelem = va_arg(ap, int);
1000 ret = nvlist_add_string_array(payload, name,
1001 va_arg(ap, char **), nelem);
1002 break;
1003 case DATA_TYPE_NVLIST:
1004 ret = nvlist_add_nvlist(payload, name,
1005 va_arg(ap, nvlist_t *));
1006 break;
1007 case DATA_TYPE_NVLIST_ARRAY:
1008 nelem = va_arg(ap, int);
1009 ret = nvlist_add_nvlist_array(payload, name,
1010 va_arg(ap, nvlist_t **), nelem);
1011 break;
1012 default:
1013 ret = EINVAL;
1014 }
1015
1016 name = va_arg(ap, char *);
1017 }
1018 return (ret);
1019}
1020
1021void
1022fm_payload_set(nvlist_t *payload, ...)
1023{
1024 int ret;
1025 const char *name;
1026 va_list ap;
1027
1028 va_start(ap, payload);
1029 name = va_arg(ap, char *);
1030 ret = i_fm_payload_set(payload, name, ap);
1031 va_end(ap);
1032
1033 if (ret)
bc89ac84 1034 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
fa42225a
BB
1035}
1036
1037/*
1038 * Set-up and validate the members of an ereport event according to:
1039 *
1040 * Member name Type Value
1041 * ====================================================
1042 * class string ereport
1043 * version uint8_t 0
1044 * ena uint64_t <ena>
1045 * detector nvlist_t <detector>
1046 * ereport-payload nvlist_t <var args>
1047 *
428870ff
BB
1048 * We don't actually add a 'version' member to the payload. Really,
1049 * the version quoted to us by our caller is that of the category 1
1050 * "ereport" event class (and we require FM_EREPORT_VERS0) but
1051 * the payload version of the actual leaf class event under construction
1052 * may be something else. Callers should supply a version in the varargs,
1053 * or (better) we could take two version arguments - one for the
1054 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
1055 * for the leaf class.
fa42225a
BB
1056 */
1057void
1058fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
1059 uint64_t ena, const nvlist_t *detector, ...)
1060{
1061 char ereport_class[FM_MAX_CLASS];
1062 const char *name;
1063 va_list ap;
1064 int ret;
1065
1066 if (version != FM_EREPORT_VERS0) {
bc89ac84 1067 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
fa42225a
BB
1068 return;
1069 }
1070
1071 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
1072 FM_EREPORT_CLASS, erpt_class);
1073 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
bc89ac84 1074 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
fa42225a
BB
1075 return;
1076 }
1077
1078 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
bc89ac84 1079 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
fa42225a
BB
1080 }
1081
1082 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
1083 (nvlist_t *)detector) != 0) {
bc89ac84 1084 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
fa42225a
BB
1085 }
1086
1087 va_start(ap, detector);
1088 name = va_arg(ap, const char *);
1089 ret = i_fm_payload_set(ereport, name, ap);
1090 va_end(ap);
1091
1092 if (ret)
bc89ac84 1093 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
fa42225a
BB
1094}
1095
1096/*
1097 * Set-up and validate the members of an hc fmri according to;
1098 *
1099 * Member name Type Value
1100 * ===================================================
1101 * version uint8_t 0
1102 * auth nvlist_t <auth>
1103 * hc-name string <name>
1104 * hc-id string <id>
1105 *
1106 * Note that auth and hc-id are optional members.
1107 */
1108
1109#define HC_MAXPAIRS 20
1110#define HC_MAXNAMELEN 50
1111
1112static int
1113fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
1114{
1115 if (version != FM_HC_SCHEME_VERSION) {
bc89ac84 1116 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1117 return (0);
1118 }
1119
1120 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
1121 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
bc89ac84 1122 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1123 return (0);
1124 }
1125
1126 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1127 (nvlist_t *)auth) != 0) {
bc89ac84 1128 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1129 return (0);
1130 }
1131
1132 return (1);
1133}
1134
1135void
1136fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1137 nvlist_t *snvl, int npairs, ...)
1138{
1139 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1140 nvlist_t *pairs[HC_MAXPAIRS];
1141 va_list ap;
1142 int i;
1143
1144 if (!fm_fmri_hc_set_common(fmri, version, auth))
1145 return;
1146
1147 npairs = MIN(npairs, HC_MAXPAIRS);
1148
1149 va_start(ap, npairs);
1150 for (i = 0; i < npairs; i++) {
1151 const char *name = va_arg(ap, const char *);
1152 uint32_t id = va_arg(ap, uint32_t);
1153 char idstr[11];
1154
1155 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1156
1157 pairs[i] = fm_nvlist_create(nva);
1158 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1159 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
bc89ac84
JJS
1160 atomic_inc_64(
1161 &erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1162 }
1163 }
1164 va_end(ap);
1165
1166 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
bc89ac84 1167 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1168
1169 for (i = 0; i < npairs; i++)
1170 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1171
1172 if (snvl != NULL) {
1173 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
bc89ac84
JJS
1174 atomic_inc_64(
1175 &erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1176 }
1177 }
1178}
1179
26685276
BB
1180void
1181fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
1182 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
1183{
1184 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1185 nvlist_t *pairs[HC_MAXPAIRS];
1186 nvlist_t **hcl;
1187 uint_t n;
1188 int i, j;
1189 va_list ap;
1190 char *hcname, *hcid;
1191
1192 if (!fm_fmri_hc_set_common(fmri, version, auth))
1193 return;
1194
1195 /*
1196 * copy the bboard nvpairs to the pairs array
1197 */
1198 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
1199 != 0) {
bc89ac84 1200 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
26685276
BB
1201 return;
1202 }
1203
1204 for (i = 0; i < n; i++) {
1205 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
1206 &hcname) != 0) {
bc89ac84
JJS
1207 atomic_inc_64(
1208 &erpt_kstat_data.fmri_set_failed.value.ui64);
26685276
BB
1209 return;
1210 }
1211 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
bc89ac84
JJS
1212 atomic_inc_64(
1213 &erpt_kstat_data.fmri_set_failed.value.ui64);
26685276
BB
1214 return;
1215 }
1216
1217 pairs[i] = fm_nvlist_create(nva);
1218 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
1219 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
1220 for (j = 0; j <= i; j++) {
1221 if (pairs[j] != NULL)
1222 fm_nvlist_destroy(pairs[j],
1223 FM_NVA_RETAIN);
1224 }
bc89ac84
JJS
1225 atomic_inc_64(
1226 &erpt_kstat_data.fmri_set_failed.value.ui64);
26685276
BB
1227 return;
1228 }
1229 }
1230
1231 /*
1232 * create the pairs from passed in pairs
1233 */
1234 npairs = MIN(npairs, HC_MAXPAIRS);
1235
1236 va_start(ap, npairs);
1237 for (i = n; i < npairs + n; i++) {
1238 const char *name = va_arg(ap, const char *);
1239 uint32_t id = va_arg(ap, uint32_t);
1240 char idstr[11];
1241 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1242 pairs[i] = fm_nvlist_create(nva);
1243 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1244 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1245 for (j = 0; j <= i; j++) {
1246 if (pairs[j] != NULL)
1247 fm_nvlist_destroy(pairs[j],
1248 FM_NVA_RETAIN);
1249 }
bc89ac84
JJS
1250 atomic_inc_64(
1251 &erpt_kstat_data.fmri_set_failed.value.ui64);
26685276
BB
1252 return;
1253 }
1254 }
1255 va_end(ap);
1256
1257 /*
1258 * Create the fmri hc list
1259 */
1260 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
1261 npairs + n) != 0) {
bc89ac84 1262 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
26685276
BB
1263 return;
1264 }
1265
1266 for (i = 0; i < npairs + n; i++) {
1267 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1268 }
1269
1270 if (snvl != NULL) {
1271 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
bc89ac84
JJS
1272 atomic_inc_64(
1273 &erpt_kstat_data.fmri_set_failed.value.ui64);
26685276
BB
1274 return;
1275 }
1276 }
1277}
1278
fa42225a
BB
1279/*
1280 * Set-up and validate the members of an dev fmri according to:
1281 *
1282 * Member name Type Value
1283 * ====================================================
1284 * version uint8_t 0
1285 * auth nvlist_t <auth>
1286 * devpath string <devpath>
428870ff
BB
1287 * [devid] string <devid>
1288 * [target-port-l0id] string <target-port-lun0-id>
fa42225a
BB
1289 *
1290 * Note that auth and devid are optional members.
1291 */
1292void
1293fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
428870ff 1294 const char *devpath, const char *devid, const char *tpl0)
fa42225a 1295{
428870ff
BB
1296 int err = 0;
1297
fa42225a 1298 if (version != DEV_SCHEME_VERSION0) {
bc89ac84 1299 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1300 return;
1301 }
1302
428870ff
BB
1303 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1304 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
fa42225a
BB
1305
1306 if (auth != NULL) {
428870ff
BB
1307 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1308 (nvlist_t *)auth);
fa42225a
BB
1309 }
1310
428870ff 1311 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
fa42225a
BB
1312
1313 if (devid != NULL)
428870ff
BB
1314 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1315
1316 if (tpl0 != NULL)
1317 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1318
1319 if (err)
bc89ac84 1320 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
428870ff 1321
fa42225a
BB
1322}
1323
1324/*
1325 * Set-up and validate the members of an cpu fmri according to:
1326 *
1327 * Member name Type Value
1328 * ====================================================
1329 * version uint8_t 0
1330 * auth nvlist_t <auth>
1331 * cpuid uint32_t <cpu_id>
1332 * cpumask uint8_t <cpu_mask>
1333 * serial uint64_t <serial_id>
1334 *
1335 * Note that auth, cpumask, serial are optional members.
1336 *
1337 */
1338void
1339fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1340 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1341{
1342 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1343
1344 if (version < CPU_SCHEME_VERSION1) {
bc89ac84 1345 atomic_inc_64(failedp);
fa42225a
BB
1346 return;
1347 }
1348
1349 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
bc89ac84 1350 atomic_inc_64(failedp);
fa42225a
BB
1351 return;
1352 }
1353
1354 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1355 FM_FMRI_SCHEME_CPU) != 0) {
bc89ac84 1356 atomic_inc_64(failedp);
fa42225a
BB
1357 return;
1358 }
1359
1360 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1361 (nvlist_t *)auth) != 0)
bc89ac84 1362 atomic_inc_64(failedp);
fa42225a
BB
1363
1364 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
bc89ac84 1365 atomic_inc_64(failedp);
fa42225a
BB
1366
1367 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1368 *cpu_maskp) != 0)
bc89ac84 1369 atomic_inc_64(failedp);
fa42225a
BB
1370
1371 if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1372 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
bc89ac84 1373 atomic_inc_64(failedp);
fa42225a
BB
1374}
1375
1376/*
1377 * Set-up and validate the members of a mem according to:
1378 *
1379 * Member name Type Value
1380 * ====================================================
1381 * version uint8_t 0
1382 * auth nvlist_t <auth> [optional]
1383 * unum string <unum>
1384 * serial string <serial> [optional*]
1385 * offset uint64_t <offset> [optional]
1386 *
1387 * * serial is required if offset is present
1388 */
1389void
1390fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1391 const char *unum, const char *serial, uint64_t offset)
1392{
1393 if (version != MEM_SCHEME_VERSION0) {
bc89ac84 1394 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1395 return;
1396 }
1397
1398 if (!serial && (offset != (uint64_t)-1)) {
bc89ac84 1399 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1400 return;
1401 }
1402
1403 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
bc89ac84 1404 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1405 return;
1406 }
1407
1408 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
bc89ac84 1409 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1410 return;
1411 }
1412
1413 if (auth != NULL) {
1414 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1415 (nvlist_t *)auth) != 0) {
bc89ac84
JJS
1416 atomic_inc_64(
1417 &erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1418 }
1419 }
1420
1421 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
bc89ac84 1422 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1423 }
1424
1425 if (serial != NULL) {
1426 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1427 (char **)&serial, 1) != 0) {
bc89ac84
JJS
1428 atomic_inc_64(
1429 &erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a 1430 }
bc89ac84
JJS
1431 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1432 FM_FMRI_MEM_OFFSET, offset) != 0) {
1433 atomic_inc_64(
1434 &erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1435 }
1436 }
1437}
1438
1439void
1440fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1441 uint64_t vdev_guid)
1442{
1443 if (version != ZFS_SCHEME_VERSION0) {
bc89ac84 1444 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1445 return;
1446 }
1447
1448 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
bc89ac84 1449 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1450 return;
1451 }
1452
1453 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
bc89ac84 1454 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1455 return;
1456 }
1457
1458 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
bc89ac84 1459 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1460 }
1461
1462 if (vdev_guid != 0) {
1463 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
bc89ac84
JJS
1464 atomic_inc_64(
1465 &erpt_kstat_data.fmri_set_failed.value.ui64);
fa42225a
BB
1466 }
1467 }
1468}
1469
1470uint64_t
1471fm_ena_increment(uint64_t ena)
1472{
1473 uint64_t new_ena;
1474
1475 switch (ENA_FORMAT(ena)) {
1476 case FM_ENA_FMT1:
1477 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1478 break;
1479 case FM_ENA_FMT2:
1480 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1481 break;
1482 default:
1483 new_ena = 0;
1484 }
1485
1486 return (new_ena);
1487}
1488
1489uint64_t
1490fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1491{
1492 uint64_t ena = 0;
1493
1494 switch (format) {
1495 case FM_ENA_FMT1:
1496 if (timestamp) {
1497 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1498 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1499 ENA_FMT1_CPUID_MASK) |
1500 ((timestamp << ENA_FMT1_TIME_SHFT) &
1501 ENA_FMT1_TIME_MASK));
1502 } else {
1503 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1504 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1505 ENA_FMT1_CPUID_MASK) |
26685276 1506 ((gethrtime() << ENA_FMT1_TIME_SHFT) &
fa42225a
BB
1507 ENA_FMT1_TIME_MASK));
1508 }
1509 break;
1510 case FM_ENA_FMT2:
1511 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1512 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1513 break;
1514 default:
1515 break;
1516 }
1517
1518 return (ena);
1519}
1520
1521uint64_t
1522fm_ena_generate(uint64_t timestamp, uchar_t format)
1523{
15a9e033
PS
1524 uint64_t ena;
1525
1526 kpreempt_disable();
1527 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1528 kpreempt_enable();
1529
1530 return (ena);
fa42225a
BB
1531}
1532
1533uint64_t
1534fm_ena_generation_get(uint64_t ena)
1535{
1536 uint64_t gen;
1537
1538 switch (ENA_FORMAT(ena)) {
1539 case FM_ENA_FMT1:
1540 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1541 break;
1542 case FM_ENA_FMT2:
1543 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1544 break;
1545 default:
1546 gen = 0;
1547 break;
1548 }
1549
1550 return (gen);
1551}
1552
1553uchar_t
1554fm_ena_format_get(uint64_t ena)
1555{
1556
1557 return (ENA_FORMAT(ena));
1558}
1559
1560uint64_t
1561fm_ena_id_get(uint64_t ena)
1562{
1563 uint64_t id;
1564
1565 switch (ENA_FORMAT(ena)) {
1566 case FM_ENA_FMT1:
1567 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1568 break;
1569 case FM_ENA_FMT2:
1570 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1571 break;
1572 default:
1573 id = 0;
1574 }
1575
1576 return (id);
1577}
1578
1579uint64_t
1580fm_ena_time_get(uint64_t ena)
1581{
1582 uint64_t time;
1583
1584 switch (ENA_FORMAT(ena)) {
1585 case FM_ENA_FMT1:
1586 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1587 break;
1588 case FM_ENA_FMT2:
1589 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1590 break;
1591 default:
1592 time = 0;
1593 }
1594
1595 return (time);
1596}
1597
6078881a
TH
1598#ifdef _KERNEL
1599/*
1600 * Helper function to increment ereport dropped count. Used by the event
1601 * rate limiting code to give feedback to the user about how many events were
1602 * rate limited by including them in the 'dropped' count.
1603 */
1604void
1605fm_erpt_dropped_increment(void)
1606{
1607 atomic_inc_64(&ratelimit_dropped);
1608}
1609#endif
1610
26685276 1611#ifdef _KERNEL
fa42225a 1612void
26685276 1613fm_init(void)
fa42225a 1614{
26685276
BB
1615 zevent_len_cur = 0;
1616 zevent_flags = 0;
fa42225a 1617
c409e464
BB
1618 if (zfs_zevent_len_max == 0)
1619 zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
fa42225a 1620
26685276
BB
1621 /* Initialize zevent allocation and generation kstats */
1622 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1623 sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1624 KSTAT_FLAG_VIRTUAL);
1625
1626 if (fm_ksp != NULL) {
1627 fm_ksp->ks_data = &erpt_kstat_data;
1628 kstat_install(fm_ksp);
1629 } else {
1630 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1631 }
1632
1633 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
d1d7e268
MK
1634 list_create(&zevent_list, sizeof (zevent_t),
1635 offsetof(zevent_t, ev_node));
26685276 1636 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
fa42225a 1637}
428870ff
BB
1638
1639void
26685276 1640fm_fini(void)
428870ff 1641{
26685276 1642 int count;
428870ff 1643
26685276 1644 zfs_zevent_drain_all(&count);
428870ff 1645
26685276 1646 mutex_enter(&zevent_lock);
99db9bfd
BB
1647 cv_broadcast(&zevent_cv);
1648
26685276
BB
1649 zevent_flags |= ZEVENT_SHUTDOWN;
1650 while (zevent_waiters > 0) {
1651 mutex_exit(&zevent_lock);
1652 schedule();
1653 mutex_enter(&zevent_lock);
428870ff 1654 }
26685276 1655 mutex_exit(&zevent_lock);
428870ff 1656
26685276
BB
1657 cv_destroy(&zevent_cv);
1658 list_destroy(&zevent_list);
1659 mutex_destroy(&zevent_lock);
428870ff 1660
26685276
BB
1661 if (fm_ksp != NULL) {
1662 kstat_delete(fm_ksp);
1663 fm_ksp = NULL;
428870ff 1664 }
26685276 1665}
428870ff 1666
c409e464
BB
1667module_param(zfs_zevent_len_max, int, 0644);
1668MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length");
428870ff 1669
c409e464
BB
1670module_param(zfs_zevent_cols, int, 0644);
1671MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width");
428870ff 1672
c409e464
BB
1673module_param(zfs_zevent_console, int, 0644);
1674MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console");
428870ff 1675
26685276 1676#endif /* _KERNEL */