4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
24 #include <sys/vdev_impl.h>
26 #include <zfs_comutil.h>
29 * Keeps stats on last N reads per spa_t, disabled by default.
31 int zfs_read_history
= 0;
34 * Include cache hits in history, disabled by default.
36 int zfs_read_history_hits
= 0;
39 * Keeps stats on the last 100 txgs by default.
41 int zfs_txg_history
= 100;
44 * Keeps stats on the last N MMP updates, disabled by default.
46 int zfs_multihost_history
= 0;
49 * ==========================================================================
50 * SPA Read History Routines
51 * ==========================================================================
55 * Read statistics - Information exported regarding each arc_read call
57 typedef struct spa_read_history
{
58 hrtime_t start
; /* time read completed */
59 uint64_t objset
; /* read from this objset */
60 uint64_t object
; /* read of this object number */
61 uint64_t level
; /* block's indirection level */
62 uint64_t blkid
; /* read of this block id */
63 char origin
[24]; /* read originated from here */
64 uint32_t aflags
; /* ARC flags (cached, prefetch, etc.) */
65 pid_t pid
; /* PID of task doing read */
66 char comm
[16]; /* process name of task doing read */
67 procfs_list_node_t srh_node
;
71 spa_read_history_show_header(struct seq_file
*f
)
73 seq_printf(f
, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
74 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
75 "level", "blkid", "aflags", "origin", "pid", "process");
81 spa_read_history_show(struct seq_file
*f
, void *data
)
83 spa_read_history_t
*srh
= (spa_read_history_t
*)data
;
85 seq_printf(f
, "%-8llu %-16llu 0x%-6llx "
86 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
87 (u_longlong_t
)srh
->srh_node
.pln_id
, srh
->start
,
88 (longlong_t
)srh
->objset
, (longlong_t
)srh
->object
,
89 (longlong_t
)srh
->level
, (longlong_t
)srh
->blkid
,
90 srh
->aflags
, srh
->origin
, srh
->pid
, srh
->comm
);
95 /* Remove oldest elements from list until there are no more than 'size' left */
97 spa_read_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
99 spa_read_history_t
*srh
;
100 while (shl
->size
> size
) {
101 srh
= list_remove_head(&shl
->procfs_list
.pl_list
);
102 ASSERT3P(srh
, !=, NULL
);
103 kmem_free(srh
, sizeof (spa_read_history_t
));
108 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
112 spa_read_history_clear(procfs_list_t
*procfs_list
)
114 spa_history_list_t
*shl
= procfs_list
->pl_private
;
115 mutex_enter(&procfs_list
->pl_lock
);
116 spa_read_history_truncate(shl
, 0);
117 mutex_exit(&procfs_list
->pl_lock
);
122 spa_read_history_init(spa_t
*spa
)
124 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
129 module
= kmem_asprintf("zfs/%s", spa_name(spa
));
131 shl
->procfs_list
.pl_private
= shl
;
132 procfs_list_install(module
,
135 spa_read_history_show
,
136 spa_read_history_show_header
,
137 spa_read_history_clear
,
138 offsetof(spa_read_history_t
, srh_node
));
144 spa_read_history_destroy(spa_t
*spa
)
146 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
147 procfs_list_uninstall(&shl
->procfs_list
);
148 spa_read_history_truncate(shl
, 0);
149 procfs_list_destroy(&shl
->procfs_list
);
153 spa_read_history_add(spa_t
*spa
, const zbookmark_phys_t
*zb
, uint32_t aflags
)
155 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
156 spa_read_history_t
*srh
;
158 ASSERT3P(spa
, !=, NULL
);
159 ASSERT3P(zb
, !=, NULL
);
161 if (zfs_read_history
== 0 && shl
->size
== 0)
164 if (zfs_read_history_hits
== 0 && (aflags
& ARC_FLAG_CACHED
))
167 srh
= kmem_zalloc(sizeof (spa_read_history_t
), KM_SLEEP
);
168 strlcpy(srh
->comm
, getcomm(), sizeof (srh
->comm
));
169 srh
->start
= gethrtime();
170 srh
->objset
= zb
->zb_objset
;
171 srh
->object
= zb
->zb_object
;
172 srh
->level
= zb
->zb_level
;
173 srh
->blkid
= zb
->zb_blkid
;
174 srh
->aflags
= aflags
;
177 mutex_enter(&shl
->procfs_list
.pl_lock
);
179 procfs_list_add(&shl
->procfs_list
, srh
);
182 spa_read_history_truncate(shl
, zfs_read_history
);
184 mutex_exit(&shl
->procfs_list
.pl_lock
);
188 * ==========================================================================
189 * SPA TXG History Routines
190 * ==========================================================================
194 * Txg statistics - Information exported regarding each txg sync
197 typedef struct spa_txg_history
{
198 uint64_t txg
; /* txg id */
199 txg_state_t state
; /* active txg state */
200 uint64_t nread
; /* number of bytes read */
201 uint64_t nwritten
; /* number of bytes written */
202 uint64_t reads
; /* number of read operations */
203 uint64_t writes
; /* number of write operations */
204 uint64_t ndirty
; /* number of dirty bytes */
205 hrtime_t times
[TXG_STATE_COMMITTED
]; /* completion times */
206 procfs_list_node_t sth_node
;
210 spa_txg_history_show_header(struct seq_file
*f
)
212 seq_printf(f
, "%-8s %-16s %-5s %-12s %-12s %-12s "
213 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
214 "ndirty", "nread", "nwritten", "reads", "writes",
215 "otime", "qtime", "wtime", "stime");
220 spa_txg_history_show(struct seq_file
*f
, void *data
)
222 spa_txg_history_t
*sth
= (spa_txg_history_t
*)data
;
223 uint64_t open
= 0, quiesce
= 0, wait
= 0, sync
= 0;
226 switch (sth
->state
) {
227 case TXG_STATE_BIRTH
: state
= 'B'; break;
228 case TXG_STATE_OPEN
: state
= 'O'; break;
229 case TXG_STATE_QUIESCED
: state
= 'Q'; break;
230 case TXG_STATE_WAIT_FOR_SYNC
: state
= 'W'; break;
231 case TXG_STATE_SYNCED
: state
= 'S'; break;
232 case TXG_STATE_COMMITTED
: state
= 'C'; break;
233 default: state
= '?'; break;
236 if (sth
->times
[TXG_STATE_OPEN
])
237 open
= sth
->times
[TXG_STATE_OPEN
] -
238 sth
->times
[TXG_STATE_BIRTH
];
240 if (sth
->times
[TXG_STATE_QUIESCED
])
241 quiesce
= sth
->times
[TXG_STATE_QUIESCED
] -
242 sth
->times
[TXG_STATE_OPEN
];
244 if (sth
->times
[TXG_STATE_WAIT_FOR_SYNC
])
245 wait
= sth
->times
[TXG_STATE_WAIT_FOR_SYNC
] -
246 sth
->times
[TXG_STATE_QUIESCED
];
248 if (sth
->times
[TXG_STATE_SYNCED
])
249 sync
= sth
->times
[TXG_STATE_SYNCED
] -
250 sth
->times
[TXG_STATE_WAIT_FOR_SYNC
];
252 seq_printf(f
, "%-8llu %-16llu %-5c %-12llu "
253 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
254 (longlong_t
)sth
->txg
, sth
->times
[TXG_STATE_BIRTH
], state
,
255 (u_longlong_t
)sth
->ndirty
,
256 (u_longlong_t
)sth
->nread
, (u_longlong_t
)sth
->nwritten
,
257 (u_longlong_t
)sth
->reads
, (u_longlong_t
)sth
->writes
,
258 (u_longlong_t
)open
, (u_longlong_t
)quiesce
, (u_longlong_t
)wait
,
264 /* Remove oldest elements from list until there are no more than 'size' left */
266 spa_txg_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
268 spa_txg_history_t
*sth
;
269 while (shl
->size
> size
) {
270 sth
= list_remove_head(&shl
->procfs_list
.pl_list
);
271 ASSERT3P(sth
, !=, NULL
);
272 kmem_free(sth
, sizeof (spa_txg_history_t
));
277 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
282 spa_txg_history_clear(procfs_list_t
*procfs_list
)
284 spa_history_list_t
*shl
= procfs_list
->pl_private
;
285 mutex_enter(&procfs_list
->pl_lock
);
286 spa_txg_history_truncate(shl
, 0);
287 mutex_exit(&procfs_list
->pl_lock
);
292 spa_txg_history_init(spa_t
*spa
)
294 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
299 module
= kmem_asprintf("zfs/%s", spa_name(spa
));
301 shl
->procfs_list
.pl_private
= shl
;
302 procfs_list_install(module
,
305 spa_txg_history_show
,
306 spa_txg_history_show_header
,
307 spa_txg_history_clear
,
308 offsetof(spa_txg_history_t
, sth_node
));
314 spa_txg_history_destroy(spa_t
*spa
)
316 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
317 procfs_list_uninstall(&shl
->procfs_list
);
318 spa_txg_history_truncate(shl
, 0);
319 procfs_list_destroy(&shl
->procfs_list
);
323 * Add a new txg to historical record.
326 spa_txg_history_add(spa_t
*spa
, uint64_t txg
, hrtime_t birth_time
)
328 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
329 spa_txg_history_t
*sth
;
331 if (zfs_txg_history
== 0 && shl
->size
== 0)
334 sth
= kmem_zalloc(sizeof (spa_txg_history_t
), KM_SLEEP
);
336 sth
->state
= TXG_STATE_OPEN
;
337 sth
->times
[TXG_STATE_BIRTH
] = birth_time
;
339 mutex_enter(&shl
->procfs_list
.pl_lock
);
340 procfs_list_add(&shl
->procfs_list
, sth
);
342 spa_txg_history_truncate(shl
, zfs_txg_history
);
343 mutex_exit(&shl
->procfs_list
.pl_lock
);
347 * Set txg state completion time and increment current state.
350 spa_txg_history_set(spa_t
*spa
, uint64_t txg
, txg_state_t completed_state
,
351 hrtime_t completed_time
)
353 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
354 spa_txg_history_t
*sth
;
357 if (zfs_txg_history
== 0)
360 mutex_enter(&shl
->procfs_list
.pl_lock
);
361 for (sth
= list_tail(&shl
->procfs_list
.pl_list
); sth
!= NULL
;
362 sth
= list_prev(&shl
->procfs_list
.pl_list
, sth
)) {
363 if (sth
->txg
== txg
) {
364 sth
->times
[completed_state
] = completed_time
;
370 mutex_exit(&shl
->procfs_list
.pl_lock
);
379 spa_txg_history_set_io(spa_t
*spa
, uint64_t txg
, uint64_t nread
,
380 uint64_t nwritten
, uint64_t reads
, uint64_t writes
, uint64_t ndirty
)
382 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
383 spa_txg_history_t
*sth
;
386 if (zfs_txg_history
== 0)
389 mutex_enter(&shl
->procfs_list
.pl_lock
);
390 for (sth
= list_tail(&shl
->procfs_list
.pl_list
); sth
!= NULL
;
391 sth
= list_prev(&shl
->procfs_list
.pl_list
, sth
)) {
392 if (sth
->txg
== txg
) {
394 sth
->nwritten
= nwritten
;
396 sth
->writes
= writes
;
397 sth
->ndirty
= ndirty
;
402 mutex_exit(&shl
->procfs_list
.pl_lock
);
408 spa_txg_history_init_io(spa_t
*spa
, uint64_t txg
, dsl_pool_t
*dp
)
412 if (zfs_txg_history
== 0)
415 ts
= kmem_alloc(sizeof (txg_stat_t
), KM_SLEEP
);
417 spa_config_enter(spa
, SCL_ALL
, FTAG
, RW_READER
);
418 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs1
);
419 spa_config_exit(spa
, SCL_ALL
, FTAG
);
422 ts
->ndirty
= dp
->dp_dirty_pertxg
[txg
& TXG_MASK
];
424 spa_txg_history_set(spa
, txg
, TXG_STATE_WAIT_FOR_SYNC
, gethrtime());
430 spa_txg_history_fini_io(spa_t
*spa
, txg_stat_t
*ts
)
435 if (zfs_txg_history
== 0) {
436 kmem_free(ts
, sizeof (txg_stat_t
));
440 spa_config_enter(spa
, SCL_ALL
, FTAG
, RW_READER
);
441 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs2
);
442 spa_config_exit(spa
, SCL_ALL
, FTAG
);
444 spa_txg_history_set(spa
, ts
->txg
, TXG_STATE_SYNCED
, gethrtime());
445 spa_txg_history_set_io(spa
, ts
->txg
,
446 ts
->vs2
.vs_bytes
[ZIO_TYPE_READ
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_READ
],
447 ts
->vs2
.vs_bytes
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_WRITE
],
448 ts
->vs2
.vs_ops
[ZIO_TYPE_READ
] - ts
->vs1
.vs_ops
[ZIO_TYPE_READ
],
449 ts
->vs2
.vs_ops
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_ops
[ZIO_TYPE_WRITE
],
452 kmem_free(ts
, sizeof (txg_stat_t
));
456 * ==========================================================================
457 * SPA TX Assign Histogram Routines
458 * ==========================================================================
462 * Tx statistics - Information exported regarding dmu_tx_assign time.
466 * When the kstat is written zero all buckets. When the kstat is read
467 * count the number of trailing buckets set to zero and update ks_ndata
468 * such that they are not output.
471 spa_tx_assign_update(kstat_t
*ksp
, int rw
)
473 spa_t
*spa
= ksp
->ks_private
;
474 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
477 if (rw
== KSTAT_WRITE
) {
478 for (i
= 0; i
< shk
->count
; i
++)
479 ((kstat_named_t
*)shk
->private)[i
].value
.ui64
= 0;
482 for (i
= shk
->count
; i
> 0; i
--)
483 if (((kstat_named_t
*)shk
->private)[i
-1].value
.ui64
!= 0)
487 ksp
->ks_data_size
= i
* sizeof (kstat_named_t
);
493 spa_tx_assign_init(spa_t
*spa
)
495 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
501 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
503 shk
->count
= 42; /* power of two buckets for 1ns to 2,199s */
504 shk
->size
= shk
->count
* sizeof (kstat_named_t
);
505 shk
->private = kmem_alloc(shk
->size
, KM_SLEEP
);
507 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
509 for (i
= 0; i
< shk
->count
; i
++) {
510 ks
= &((kstat_named_t
*)shk
->private)[i
];
511 ks
->data_type
= KSTAT_DATA_UINT64
;
513 (void) snprintf(ks
->name
, KSTAT_STRLEN
, "%llu ns",
514 (u_longlong_t
)1 << i
);
517 ksp
= kstat_create(name
, 0, "dmu_tx_assign", "misc",
518 KSTAT_TYPE_NAMED
, 0, KSTAT_FLAG_VIRTUAL
);
522 ksp
->ks_lock
= &shk
->lock
;
523 ksp
->ks_data
= shk
->private;
524 ksp
->ks_ndata
= shk
->count
;
525 ksp
->ks_data_size
= shk
->size
;
526 ksp
->ks_private
= spa
;
527 ksp
->ks_update
= spa_tx_assign_update
;
534 spa_tx_assign_destroy(spa_t
*spa
)
536 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
543 kmem_free(shk
->private, shk
->size
);
544 mutex_destroy(&shk
->lock
);
548 spa_tx_assign_add_nsecs(spa_t
*spa
, uint64_t nsecs
)
550 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
553 while (((1ULL << idx
) < nsecs
) && (idx
< shk
->size
- 1))
556 atomic_inc_64(&((kstat_named_t
*)shk
->private)[idx
].value
.ui64
);
560 * ==========================================================================
561 * SPA IO History Routines
562 * ==========================================================================
565 spa_io_history_update(kstat_t
*ksp
, int rw
)
567 if (rw
== KSTAT_WRITE
)
568 memset(ksp
->ks_data
, 0, ksp
->ks_data_size
);
574 spa_io_history_init(spa_t
*spa
)
576 spa_history_kstat_t
*shk
= &spa
->spa_stats
.io_history
;
580 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
582 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
584 ksp
= kstat_create(name
, 0, "io", "disk", KSTAT_TYPE_IO
, 1, 0);
588 ksp
->ks_lock
= &shk
->lock
;
589 ksp
->ks_private
= spa
;
590 ksp
->ks_update
= spa_io_history_update
;
597 spa_io_history_destroy(spa_t
*spa
)
599 spa_history_kstat_t
*shk
= &spa
->spa_stats
.io_history
;
602 kstat_delete(shk
->kstat
);
604 mutex_destroy(&shk
->lock
);
608 * ==========================================================================
609 * SPA MMP History Routines
610 * ==========================================================================
614 * MMP statistics - Information exported regarding attempted MMP writes
615 * For MMP writes issued, fields used as per comments below.
616 * For MMP writes skipped, an entry represents a span of time when
617 * writes were skipped for same reason (error from mmp_random_leaf).
619 * timestamp time first write skipped, if >1 skipped in a row
620 * mmp_delay delay value at timestamp
621 * vdev_guid number of writes skipped
622 * io_error one of enum mmp_error
623 * duration time span (ns) of skipped writes
626 typedef struct spa_mmp_history
{
627 uint64_t mmp_node_id
; /* unique # for updates */
628 uint64_t txg
; /* txg of last sync */
629 uint64_t timestamp
; /* UTC time MMP write issued */
630 uint64_t mmp_delay
; /* mmp_thread.mmp_delay at timestamp */
631 uint64_t vdev_guid
; /* unique ID of leaf vdev */
633 int vdev_label
; /* vdev label */
634 int io_error
; /* error status of MMP write */
635 hrtime_t error_start
; /* hrtime of start of error period */
636 hrtime_t duration
; /* time from submission to completion */
637 procfs_list_node_t smh_node
;
641 spa_mmp_history_show_header(struct seq_file
*f
)
643 seq_printf(f
, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
644 "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
645 "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
650 spa_mmp_history_show(struct seq_file
*f
, void *data
)
652 spa_mmp_history_t
*smh
= (spa_mmp_history_t
*)data
;
653 char skip_fmt
[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
655 char write_fmt
[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
658 seq_printf(f
, (smh
->error_start
? skip_fmt
: write_fmt
),
659 (u_longlong_t
)smh
->mmp_node_id
, (u_longlong_t
)smh
->txg
,
660 (u_longlong_t
)smh
->timestamp
, (longlong_t
)smh
->io_error
,
661 (longlong_t
)smh
->duration
, (u_longlong_t
)smh
->mmp_delay
,
662 (u_longlong_t
)smh
->vdev_guid
, (u_longlong_t
)smh
->vdev_label
,
663 (smh
->vdev_path
? smh
->vdev_path
: "-"));
668 /* Remove oldest elements from list until there are no more than 'size' left */
670 spa_mmp_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
672 spa_mmp_history_t
*smh
;
673 while (shl
->size
> size
) {
674 smh
= list_remove_head(&shl
->procfs_list
.pl_list
);
676 strfree(smh
->vdev_path
);
677 kmem_free(smh
, sizeof (spa_mmp_history_t
));
682 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
687 spa_mmp_history_clear(procfs_list_t
*procfs_list
)
689 spa_history_list_t
*shl
= procfs_list
->pl_private
;
690 mutex_enter(&procfs_list
->pl_lock
);
691 spa_mmp_history_truncate(shl
, 0);
692 mutex_exit(&procfs_list
->pl_lock
);
697 spa_mmp_history_init(spa_t
*spa
)
699 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
704 module
= kmem_asprintf("zfs/%s", spa_name(spa
));
706 shl
->procfs_list
.pl_private
= shl
;
707 procfs_list_install(module
,
710 spa_mmp_history_show
,
711 spa_mmp_history_show_header
,
712 spa_mmp_history_clear
,
713 offsetof(spa_mmp_history_t
, smh_node
));
719 spa_mmp_history_destroy(spa_t
*spa
)
721 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
722 procfs_list_uninstall(&shl
->procfs_list
);
723 spa_mmp_history_truncate(shl
, 0);
724 procfs_list_destroy(&shl
->procfs_list
);
728 * Set duration in existing "skip" record to how long we have waited for a leaf
729 * vdev to become available.
731 * Important that we start search at the tail of the list where new
732 * records are inserted, so this is normally an O(1) operation.
735 spa_mmp_history_set_skip(spa_t
*spa
, uint64_t mmp_node_id
)
737 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
738 spa_mmp_history_t
*smh
;
741 if (zfs_multihost_history
== 0 && shl
->size
== 0)
744 mutex_enter(&shl
->procfs_list
.pl_lock
);
745 for (smh
= list_tail(&shl
->procfs_list
.pl_list
); smh
!= NULL
;
746 smh
= list_prev(&shl
->procfs_list
.pl_list
, smh
)) {
747 if (smh
->mmp_node_id
== mmp_node_id
) {
748 ASSERT3U(smh
->io_error
, !=, 0);
749 smh
->duration
= gethrtime() - smh
->error_start
;
755 mutex_exit(&shl
->procfs_list
.pl_lock
);
761 * Set MMP write duration and error status in existing record.
762 * See comment re: search order above spa_mmp_history_set_skip().
765 spa_mmp_history_set(spa_t
*spa
, uint64_t mmp_node_id
, int io_error
,
768 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
769 spa_mmp_history_t
*smh
;
772 if (zfs_multihost_history
== 0 && shl
->size
== 0)
775 mutex_enter(&shl
->procfs_list
.pl_lock
);
776 for (smh
= list_tail(&shl
->procfs_list
.pl_list
); smh
!= NULL
;
777 smh
= list_prev(&shl
->procfs_list
.pl_list
, smh
)) {
778 if (smh
->mmp_node_id
== mmp_node_id
) {
779 ASSERT(smh
->io_error
== 0);
780 smh
->io_error
= io_error
;
781 smh
->duration
= duration
;
786 mutex_exit(&shl
->procfs_list
.pl_lock
);
792 * Add a new MMP historical record.
793 * error == 0 : a write was issued.
794 * error != 0 : a write was not issued because no leaves were found.
797 spa_mmp_history_add(spa_t
*spa
, uint64_t txg
, uint64_t timestamp
,
798 uint64_t mmp_delay
, vdev_t
*vd
, int label
, uint64_t mmp_node_id
,
801 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
802 spa_mmp_history_t
*smh
;
804 if (zfs_multihost_history
== 0 && shl
->size
== 0)
807 smh
= kmem_zalloc(sizeof (spa_mmp_history_t
), KM_SLEEP
);
809 smh
->timestamp
= timestamp
;
810 smh
->mmp_delay
= mmp_delay
;
812 smh
->vdev_guid
= vd
->vdev_guid
;
814 smh
->vdev_path
= strdup(vd
->vdev_path
);
816 smh
->vdev_label
= label
;
817 smh
->mmp_node_id
= mmp_node_id
;
820 smh
->io_error
= error
;
821 smh
->error_start
= gethrtime();
825 mutex_enter(&shl
->procfs_list
.pl_lock
);
826 procfs_list_add(&shl
->procfs_list
, smh
);
828 spa_mmp_history_truncate(shl
, zfs_multihost_history
);
829 mutex_exit(&shl
->procfs_list
.pl_lock
);
833 spa_state_addr(kstat_t
*ksp
, loff_t n
)
835 return (ksp
->ks_private
); /* return the spa_t */
839 spa_state_data(char *buf
, size_t size
, void *data
)
841 spa_t
*spa
= (spa_t
*)data
;
842 (void) snprintf(buf
, size
, "%s\n", spa_state_to_name(spa
));
847 * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
849 * This is a lock-less read of the pool's state (unlike using 'zpool', which
850 * can potentially block for seconds). Because it doesn't block, it can useful
851 * as a pool heartbeat value.
854 spa_state_init(spa_t
*spa
)
856 spa_history_kstat_t
*shk
= &spa
->spa_stats
.state
;
860 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
862 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
863 ksp
= kstat_create(name
, 0, "state", "misc",
864 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
868 ksp
->ks_lock
= &shk
->lock
;
870 ksp
->ks_private
= spa
;
871 ksp
->ks_flags
|= KSTAT_FLAG_NO_HEADERS
;
872 kstat_set_raw_ops(ksp
, NULL
, spa_state_data
, spa_state_addr
);
880 spa_health_destroy(spa_t
*spa
)
882 spa_history_kstat_t
*shk
= &spa
->spa_stats
.state
;
883 kstat_t
*ksp
= shk
->kstat
;
887 mutex_destroy(&shk
->lock
);
891 spa_stats_init(spa_t
*spa
)
893 spa_read_history_init(spa
);
894 spa_txg_history_init(spa
);
895 spa_tx_assign_init(spa
);
896 spa_io_history_init(spa
);
897 spa_mmp_history_init(spa
);
902 spa_stats_destroy(spa_t
*spa
)
904 spa_health_destroy(spa
);
905 spa_tx_assign_destroy(spa
);
906 spa_txg_history_destroy(spa
);
907 spa_read_history_destroy(spa
);
908 spa_io_history_destroy(spa
);
909 spa_mmp_history_destroy(spa
);
914 module_param(zfs_read_history
, int, 0644);
915 MODULE_PARM_DESC(zfs_read_history
,
916 "Historical statistics for the last N reads");
918 module_param(zfs_read_history_hits
, int, 0644);
919 MODULE_PARM_DESC(zfs_read_history_hits
,
920 "Include cache hits in read history");
922 module_param(zfs_txg_history
, int, 0644);
923 MODULE_PARM_DESC(zfs_txg_history
,
924 "Historical statistics for the last N txgs");
926 module_param(zfs_multihost_history
, int, 0644);
927 MODULE_PARM_DESC(zfs_multihost_history
,
928 "Historical statistics for last N multihost writes");