4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
24 #include <sys/vdev_impl.h>
26 #include <zfs_comutil.h>
29 * Keeps stats on last N reads per spa_t, disabled by default.
31 int zfs_read_history
= 0;
34 * Include cache hits in history, disabled by default.
36 int zfs_read_history_hits
= 0;
39 * Keeps stats on the last 100 txgs by default.
41 int zfs_txg_history
= 100;
44 * Keeps stats on the last N MMP updates, disabled by default.
46 int zfs_multihost_history
= 0;
49 * ==========================================================================
50 * SPA Read History Routines
51 * ==========================================================================
55 * Read statistics - Information exported regarding each arc_read call
57 typedef struct spa_read_history
{
58 hrtime_t start
; /* time read completed */
59 uint64_t objset
; /* read from this objset */
60 uint64_t object
; /* read of this object number */
61 uint64_t level
; /* block's indirection level */
62 uint64_t blkid
; /* read of this block id */
63 char origin
[24]; /* read originated from here */
64 uint32_t aflags
; /* ARC flags (cached, prefetch, etc.) */
65 pid_t pid
; /* PID of task doing read */
66 char comm
[16]; /* process name of task doing read */
67 procfs_list_node_t srh_node
;
71 spa_read_history_show_header(struct seq_file
*f
)
73 seq_printf(f
, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
74 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
75 "level", "blkid", "aflags", "origin", "pid", "process");
81 spa_read_history_show(struct seq_file
*f
, void *data
)
83 spa_read_history_t
*srh
= (spa_read_history_t
*)data
;
85 seq_printf(f
, "%-8llu %-16llu 0x%-6llx "
86 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
87 (u_longlong_t
)srh
->srh_node
.pln_id
, srh
->start
,
88 (longlong_t
)srh
->objset
, (longlong_t
)srh
->object
,
89 (longlong_t
)srh
->level
, (longlong_t
)srh
->blkid
,
90 srh
->aflags
, srh
->origin
, srh
->pid
, srh
->comm
);
95 /* Remove oldest elements from list until there are no more than 'size' left */
97 spa_read_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
99 spa_read_history_t
*srh
;
100 while (shl
->size
> size
) {
101 srh
= list_remove_head(&shl
->procfs_list
.pl_list
);
102 ASSERT3P(srh
, !=, NULL
);
103 kmem_free(srh
, sizeof (spa_read_history_t
));
108 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
112 spa_read_history_clear(procfs_list_t
*procfs_list
)
114 spa_history_list_t
*shl
= procfs_list
->pl_private
;
115 mutex_enter(&procfs_list
->pl_lock
);
116 spa_read_history_truncate(shl
, 0);
117 mutex_exit(&procfs_list
->pl_lock
);
122 spa_read_history_init(spa_t
*spa
)
124 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
129 module
= kmem_asprintf("zfs/%s", spa_name(spa
));
131 shl
->procfs_list
.pl_private
= shl
;
132 procfs_list_install(module
,
136 spa_read_history_show
,
137 spa_read_history_show_header
,
138 spa_read_history_clear
,
139 offsetof(spa_read_history_t
, srh_node
));
145 spa_read_history_destroy(spa_t
*spa
)
147 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
148 procfs_list_uninstall(&shl
->procfs_list
);
149 spa_read_history_truncate(shl
, 0);
150 procfs_list_destroy(&shl
->procfs_list
);
154 spa_read_history_add(spa_t
*spa
, const zbookmark_phys_t
*zb
, uint32_t aflags
)
156 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
157 spa_read_history_t
*srh
;
159 ASSERT3P(spa
, !=, NULL
);
160 ASSERT3P(zb
, !=, NULL
);
162 if (zfs_read_history
== 0 && shl
->size
== 0)
165 if (zfs_read_history_hits
== 0 && (aflags
& ARC_FLAG_CACHED
))
168 srh
= kmem_zalloc(sizeof (spa_read_history_t
), KM_SLEEP
);
169 strlcpy(srh
->comm
, getcomm(), sizeof (srh
->comm
));
170 srh
->start
= gethrtime();
171 srh
->objset
= zb
->zb_objset
;
172 srh
->object
= zb
->zb_object
;
173 srh
->level
= zb
->zb_level
;
174 srh
->blkid
= zb
->zb_blkid
;
175 srh
->aflags
= aflags
;
178 mutex_enter(&shl
->procfs_list
.pl_lock
);
180 procfs_list_add(&shl
->procfs_list
, srh
);
183 spa_read_history_truncate(shl
, zfs_read_history
);
185 mutex_exit(&shl
->procfs_list
.pl_lock
);
189 * ==========================================================================
190 * SPA TXG History Routines
191 * ==========================================================================
195 * Txg statistics - Information exported regarding each txg sync
198 typedef struct spa_txg_history
{
199 uint64_t txg
; /* txg id */
200 txg_state_t state
; /* active txg state */
201 uint64_t nread
; /* number of bytes read */
202 uint64_t nwritten
; /* number of bytes written */
203 uint64_t reads
; /* number of read operations */
204 uint64_t writes
; /* number of write operations */
205 uint64_t ndirty
; /* number of dirty bytes */
206 hrtime_t times
[TXG_STATE_COMMITTED
]; /* completion times */
207 procfs_list_node_t sth_node
;
211 spa_txg_history_show_header(struct seq_file
*f
)
213 seq_printf(f
, "%-8s %-16s %-5s %-12s %-12s %-12s "
214 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
215 "ndirty", "nread", "nwritten", "reads", "writes",
216 "otime", "qtime", "wtime", "stime");
221 spa_txg_history_show(struct seq_file
*f
, void *data
)
223 spa_txg_history_t
*sth
= (spa_txg_history_t
*)data
;
224 uint64_t open
= 0, quiesce
= 0, wait
= 0, sync
= 0;
227 switch (sth
->state
) {
228 case TXG_STATE_BIRTH
: state
= 'B'; break;
229 case TXG_STATE_OPEN
: state
= 'O'; break;
230 case TXG_STATE_QUIESCED
: state
= 'Q'; break;
231 case TXG_STATE_WAIT_FOR_SYNC
: state
= 'W'; break;
232 case TXG_STATE_SYNCED
: state
= 'S'; break;
233 case TXG_STATE_COMMITTED
: state
= 'C'; break;
234 default: state
= '?'; break;
237 if (sth
->times
[TXG_STATE_OPEN
])
238 open
= sth
->times
[TXG_STATE_OPEN
] -
239 sth
->times
[TXG_STATE_BIRTH
];
241 if (sth
->times
[TXG_STATE_QUIESCED
])
242 quiesce
= sth
->times
[TXG_STATE_QUIESCED
] -
243 sth
->times
[TXG_STATE_OPEN
];
245 if (sth
->times
[TXG_STATE_WAIT_FOR_SYNC
])
246 wait
= sth
->times
[TXG_STATE_WAIT_FOR_SYNC
] -
247 sth
->times
[TXG_STATE_QUIESCED
];
249 if (sth
->times
[TXG_STATE_SYNCED
])
250 sync
= sth
->times
[TXG_STATE_SYNCED
] -
251 sth
->times
[TXG_STATE_WAIT_FOR_SYNC
];
253 seq_printf(f
, "%-8llu %-16llu %-5c %-12llu "
254 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
255 (longlong_t
)sth
->txg
, sth
->times
[TXG_STATE_BIRTH
], state
,
256 (u_longlong_t
)sth
->ndirty
,
257 (u_longlong_t
)sth
->nread
, (u_longlong_t
)sth
->nwritten
,
258 (u_longlong_t
)sth
->reads
, (u_longlong_t
)sth
->writes
,
259 (u_longlong_t
)open
, (u_longlong_t
)quiesce
, (u_longlong_t
)wait
,
265 /* Remove oldest elements from list until there are no more than 'size' left */
267 spa_txg_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
269 spa_txg_history_t
*sth
;
270 while (shl
->size
> size
) {
271 sth
= list_remove_head(&shl
->procfs_list
.pl_list
);
272 ASSERT3P(sth
, !=, NULL
);
273 kmem_free(sth
, sizeof (spa_txg_history_t
));
278 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
283 spa_txg_history_clear(procfs_list_t
*procfs_list
)
285 spa_history_list_t
*shl
= procfs_list
->pl_private
;
286 mutex_enter(&procfs_list
->pl_lock
);
287 spa_txg_history_truncate(shl
, 0);
288 mutex_exit(&procfs_list
->pl_lock
);
293 spa_txg_history_init(spa_t
*spa
)
295 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
300 module
= kmem_asprintf("zfs/%s", spa_name(spa
));
302 shl
->procfs_list
.pl_private
= shl
;
303 procfs_list_install(module
,
307 spa_txg_history_show
,
308 spa_txg_history_show_header
,
309 spa_txg_history_clear
,
310 offsetof(spa_txg_history_t
, sth_node
));
316 spa_txg_history_destroy(spa_t
*spa
)
318 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
319 procfs_list_uninstall(&shl
->procfs_list
);
320 spa_txg_history_truncate(shl
, 0);
321 procfs_list_destroy(&shl
->procfs_list
);
325 * Add a new txg to historical record.
328 spa_txg_history_add(spa_t
*spa
, uint64_t txg
, hrtime_t birth_time
)
330 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
331 spa_txg_history_t
*sth
;
333 if (zfs_txg_history
== 0 && shl
->size
== 0)
336 sth
= kmem_zalloc(sizeof (spa_txg_history_t
), KM_SLEEP
);
338 sth
->state
= TXG_STATE_OPEN
;
339 sth
->times
[TXG_STATE_BIRTH
] = birth_time
;
341 mutex_enter(&shl
->procfs_list
.pl_lock
);
342 procfs_list_add(&shl
->procfs_list
, sth
);
344 spa_txg_history_truncate(shl
, zfs_txg_history
);
345 mutex_exit(&shl
->procfs_list
.pl_lock
);
349 * Set txg state completion time and increment current state.
352 spa_txg_history_set(spa_t
*spa
, uint64_t txg
, txg_state_t completed_state
,
353 hrtime_t completed_time
)
355 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
356 spa_txg_history_t
*sth
;
359 if (zfs_txg_history
== 0)
362 mutex_enter(&shl
->procfs_list
.pl_lock
);
363 for (sth
= list_tail(&shl
->procfs_list
.pl_list
); sth
!= NULL
;
364 sth
= list_prev(&shl
->procfs_list
.pl_list
, sth
)) {
365 if (sth
->txg
== txg
) {
366 sth
->times
[completed_state
] = completed_time
;
372 mutex_exit(&shl
->procfs_list
.pl_lock
);
381 spa_txg_history_set_io(spa_t
*spa
, uint64_t txg
, uint64_t nread
,
382 uint64_t nwritten
, uint64_t reads
, uint64_t writes
, uint64_t ndirty
)
384 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
385 spa_txg_history_t
*sth
;
388 if (zfs_txg_history
== 0)
391 mutex_enter(&shl
->procfs_list
.pl_lock
);
392 for (sth
= list_tail(&shl
->procfs_list
.pl_list
); sth
!= NULL
;
393 sth
= list_prev(&shl
->procfs_list
.pl_list
, sth
)) {
394 if (sth
->txg
== txg
) {
396 sth
->nwritten
= nwritten
;
398 sth
->writes
= writes
;
399 sth
->ndirty
= ndirty
;
404 mutex_exit(&shl
->procfs_list
.pl_lock
);
410 spa_txg_history_init_io(spa_t
*spa
, uint64_t txg
, dsl_pool_t
*dp
)
414 if (zfs_txg_history
== 0)
417 ts
= kmem_alloc(sizeof (txg_stat_t
), KM_SLEEP
);
419 spa_config_enter(spa
, SCL_CONFIG
, FTAG
, RW_READER
);
420 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs1
);
421 spa_config_exit(spa
, SCL_CONFIG
, FTAG
);
424 ts
->ndirty
= dp
->dp_dirty_pertxg
[txg
& TXG_MASK
];
426 spa_txg_history_set(spa
, txg
, TXG_STATE_WAIT_FOR_SYNC
, gethrtime());
432 spa_txg_history_fini_io(spa_t
*spa
, txg_stat_t
*ts
)
437 if (zfs_txg_history
== 0) {
438 kmem_free(ts
, sizeof (txg_stat_t
));
442 spa_config_enter(spa
, SCL_CONFIG
, FTAG
, RW_READER
);
443 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs2
);
444 spa_config_exit(spa
, SCL_CONFIG
, FTAG
);
446 spa_txg_history_set(spa
, ts
->txg
, TXG_STATE_SYNCED
, gethrtime());
447 spa_txg_history_set_io(spa
, ts
->txg
,
448 ts
->vs2
.vs_bytes
[ZIO_TYPE_READ
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_READ
],
449 ts
->vs2
.vs_bytes
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_WRITE
],
450 ts
->vs2
.vs_ops
[ZIO_TYPE_READ
] - ts
->vs1
.vs_ops
[ZIO_TYPE_READ
],
451 ts
->vs2
.vs_ops
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_ops
[ZIO_TYPE_WRITE
],
454 kmem_free(ts
, sizeof (txg_stat_t
));
458 * ==========================================================================
459 * SPA TX Assign Histogram Routines
460 * ==========================================================================
464 * Tx statistics - Information exported regarding dmu_tx_assign time.
468 * When the kstat is written zero all buckets. When the kstat is read
469 * count the number of trailing buckets set to zero and update ks_ndata
470 * such that they are not output.
473 spa_tx_assign_update(kstat_t
*ksp
, int rw
)
475 spa_t
*spa
= ksp
->ks_private
;
476 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
479 if (rw
== KSTAT_WRITE
) {
480 for (i
= 0; i
< shk
->count
; i
++)
481 ((kstat_named_t
*)shk
->private)[i
].value
.ui64
= 0;
484 for (i
= shk
->count
; i
> 0; i
--)
485 if (((kstat_named_t
*)shk
->private)[i
-1].value
.ui64
!= 0)
489 ksp
->ks_data_size
= i
* sizeof (kstat_named_t
);
495 spa_tx_assign_init(spa_t
*spa
)
497 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
503 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
505 shk
->count
= 42; /* power of two buckets for 1ns to 2,199s */
506 shk
->size
= shk
->count
* sizeof (kstat_named_t
);
507 shk
->private = kmem_alloc(shk
->size
, KM_SLEEP
);
509 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
511 for (i
= 0; i
< shk
->count
; i
++) {
512 ks
= &((kstat_named_t
*)shk
->private)[i
];
513 ks
->data_type
= KSTAT_DATA_UINT64
;
515 (void) snprintf(ks
->name
, KSTAT_STRLEN
, "%llu ns",
516 (u_longlong_t
)1 << i
);
519 ksp
= kstat_create(name
, 0, "dmu_tx_assign", "misc",
520 KSTAT_TYPE_NAMED
, 0, KSTAT_FLAG_VIRTUAL
);
524 ksp
->ks_lock
= &shk
->lock
;
525 ksp
->ks_data
= shk
->private;
526 ksp
->ks_ndata
= shk
->count
;
527 ksp
->ks_data_size
= shk
->size
;
528 ksp
->ks_private
= spa
;
529 ksp
->ks_update
= spa_tx_assign_update
;
536 spa_tx_assign_destroy(spa_t
*spa
)
538 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
545 kmem_free(shk
->private, shk
->size
);
546 mutex_destroy(&shk
->lock
);
550 spa_tx_assign_add_nsecs(spa_t
*spa
, uint64_t nsecs
)
552 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
555 while (((1ULL << idx
) < nsecs
) && (idx
< shk
->size
- 1))
558 atomic_inc_64(&((kstat_named_t
*)shk
->private)[idx
].value
.ui64
);
562 * ==========================================================================
563 * SPA IO History Routines
564 * ==========================================================================
567 spa_io_history_update(kstat_t
*ksp
, int rw
)
569 if (rw
== KSTAT_WRITE
)
570 memset(ksp
->ks_data
, 0, ksp
->ks_data_size
);
576 spa_io_history_init(spa_t
*spa
)
578 spa_history_kstat_t
*shk
= &spa
->spa_stats
.io_history
;
582 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
584 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
586 ksp
= kstat_create(name
, 0, "io", "disk", KSTAT_TYPE_IO
, 1, 0);
590 ksp
->ks_lock
= &shk
->lock
;
591 ksp
->ks_private
= spa
;
592 ksp
->ks_update
= spa_io_history_update
;
599 spa_io_history_destroy(spa_t
*spa
)
601 spa_history_kstat_t
*shk
= &spa
->spa_stats
.io_history
;
604 kstat_delete(shk
->kstat
);
606 mutex_destroy(&shk
->lock
);
610 * ==========================================================================
611 * SPA MMP History Routines
612 * ==========================================================================
616 * MMP statistics - Information exported regarding attempted MMP writes
617 * For MMP writes issued, fields used as per comments below.
618 * For MMP writes skipped, an entry represents a span of time when
619 * writes were skipped for same reason (error from mmp_random_leaf).
621 * timestamp time first write skipped, if >1 skipped in a row
622 * mmp_delay delay value at timestamp
623 * vdev_guid number of writes skipped
624 * io_error one of enum mmp_error
625 * duration time span (ns) of skipped writes
628 typedef struct spa_mmp_history
{
629 uint64_t mmp_node_id
; /* unique # for updates */
630 uint64_t txg
; /* txg of last sync */
631 uint64_t timestamp
; /* UTC time MMP write issued */
632 uint64_t mmp_delay
; /* mmp_thread.mmp_delay at timestamp */
633 uint64_t vdev_guid
; /* unique ID of leaf vdev */
635 int vdev_label
; /* vdev label */
636 int io_error
; /* error status of MMP write */
637 hrtime_t error_start
; /* hrtime of start of error period */
638 hrtime_t duration
; /* time from submission to completion */
639 procfs_list_node_t smh_node
;
643 spa_mmp_history_show_header(struct seq_file
*f
)
645 seq_printf(f
, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
646 "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
647 "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
652 spa_mmp_history_show(struct seq_file
*f
, void *data
)
654 spa_mmp_history_t
*smh
= (spa_mmp_history_t
*)data
;
655 char skip_fmt
[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
657 char write_fmt
[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
660 seq_printf(f
, (smh
->error_start
? skip_fmt
: write_fmt
),
661 (u_longlong_t
)smh
->mmp_node_id
, (u_longlong_t
)smh
->txg
,
662 (u_longlong_t
)smh
->timestamp
, (longlong_t
)smh
->io_error
,
663 (longlong_t
)smh
->duration
, (u_longlong_t
)smh
->mmp_delay
,
664 (u_longlong_t
)smh
->vdev_guid
, (u_longlong_t
)smh
->vdev_label
,
665 (smh
->vdev_path
? smh
->vdev_path
: "-"));
670 /* Remove oldest elements from list until there are no more than 'size' left */
672 spa_mmp_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
674 spa_mmp_history_t
*smh
;
675 while (shl
->size
> size
) {
676 smh
= list_remove_head(&shl
->procfs_list
.pl_list
);
678 strfree(smh
->vdev_path
);
679 kmem_free(smh
, sizeof (spa_mmp_history_t
));
684 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
689 spa_mmp_history_clear(procfs_list_t
*procfs_list
)
691 spa_history_list_t
*shl
= procfs_list
->pl_private
;
692 mutex_enter(&procfs_list
->pl_lock
);
693 spa_mmp_history_truncate(shl
, 0);
694 mutex_exit(&procfs_list
->pl_lock
);
699 spa_mmp_history_init(spa_t
*spa
)
701 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
706 module
= kmem_asprintf("zfs/%s", spa_name(spa
));
708 shl
->procfs_list
.pl_private
= shl
;
709 procfs_list_install(module
,
713 spa_mmp_history_show
,
714 spa_mmp_history_show_header
,
715 spa_mmp_history_clear
,
716 offsetof(spa_mmp_history_t
, smh_node
));
722 spa_mmp_history_destroy(spa_t
*spa
)
724 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
725 procfs_list_uninstall(&shl
->procfs_list
);
726 spa_mmp_history_truncate(shl
, 0);
727 procfs_list_destroy(&shl
->procfs_list
);
731 * Set duration in existing "skip" record to how long we have waited for a leaf
732 * vdev to become available.
734 * Important that we start search at the tail of the list where new
735 * records are inserted, so this is normally an O(1) operation.
738 spa_mmp_history_set_skip(spa_t
*spa
, uint64_t mmp_node_id
)
740 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
741 spa_mmp_history_t
*smh
;
744 if (zfs_multihost_history
== 0 && shl
->size
== 0)
747 mutex_enter(&shl
->procfs_list
.pl_lock
);
748 for (smh
= list_tail(&shl
->procfs_list
.pl_list
); smh
!= NULL
;
749 smh
= list_prev(&shl
->procfs_list
.pl_list
, smh
)) {
750 if (smh
->mmp_node_id
== mmp_node_id
) {
751 ASSERT3U(smh
->io_error
, !=, 0);
752 smh
->duration
= gethrtime() - smh
->error_start
;
758 mutex_exit(&shl
->procfs_list
.pl_lock
);
764 * Set MMP write duration and error status in existing record.
765 * See comment re: search order above spa_mmp_history_set_skip().
768 spa_mmp_history_set(spa_t
*spa
, uint64_t mmp_node_id
, int io_error
,
771 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
772 spa_mmp_history_t
*smh
;
775 if (zfs_multihost_history
== 0 && shl
->size
== 0)
778 mutex_enter(&shl
->procfs_list
.pl_lock
);
779 for (smh
= list_tail(&shl
->procfs_list
.pl_list
); smh
!= NULL
;
780 smh
= list_prev(&shl
->procfs_list
.pl_list
, smh
)) {
781 if (smh
->mmp_node_id
== mmp_node_id
) {
782 ASSERT(smh
->io_error
== 0);
783 smh
->io_error
= io_error
;
784 smh
->duration
= duration
;
789 mutex_exit(&shl
->procfs_list
.pl_lock
);
795 * Add a new MMP historical record.
796 * error == 0 : a write was issued.
797 * error != 0 : a write was not issued because no leaves were found.
800 spa_mmp_history_add(spa_t
*spa
, uint64_t txg
, uint64_t timestamp
,
801 uint64_t mmp_delay
, vdev_t
*vd
, int label
, uint64_t mmp_node_id
,
804 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
805 spa_mmp_history_t
*smh
;
807 if (zfs_multihost_history
== 0 && shl
->size
== 0)
810 smh
= kmem_zalloc(sizeof (spa_mmp_history_t
), KM_SLEEP
);
812 smh
->timestamp
= timestamp
;
813 smh
->mmp_delay
= mmp_delay
;
815 smh
->vdev_guid
= vd
->vdev_guid
;
817 smh
->vdev_path
= strdup(vd
->vdev_path
);
819 smh
->vdev_label
= label
;
820 smh
->mmp_node_id
= mmp_node_id
;
823 smh
->io_error
= error
;
824 smh
->error_start
= gethrtime();
828 mutex_enter(&shl
->procfs_list
.pl_lock
);
829 procfs_list_add(&shl
->procfs_list
, smh
);
831 spa_mmp_history_truncate(shl
, zfs_multihost_history
);
832 mutex_exit(&shl
->procfs_list
.pl_lock
);
836 spa_state_addr(kstat_t
*ksp
, loff_t n
)
838 return (ksp
->ks_private
); /* return the spa_t */
842 spa_state_data(char *buf
, size_t size
, void *data
)
844 spa_t
*spa
= (spa_t
*)data
;
845 (void) snprintf(buf
, size
, "%s\n", spa_state_to_name(spa
));
850 * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
852 * This is a lock-less read of the pool's state (unlike using 'zpool', which
853 * can potentially block for seconds). Because it doesn't block, it can useful
854 * as a pool heartbeat value.
857 spa_state_init(spa_t
*spa
)
859 spa_history_kstat_t
*shk
= &spa
->spa_stats
.state
;
863 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
865 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
866 ksp
= kstat_create(name
, 0, "state", "misc",
867 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
871 ksp
->ks_lock
= &shk
->lock
;
873 ksp
->ks_private
= spa
;
874 ksp
->ks_flags
|= KSTAT_FLAG_NO_HEADERS
;
875 kstat_set_raw_ops(ksp
, NULL
, spa_state_data
, spa_state_addr
);
883 spa_health_destroy(spa_t
*spa
)
885 spa_history_kstat_t
*shk
= &spa
->spa_stats
.state
;
886 kstat_t
*ksp
= shk
->kstat
;
890 mutex_destroy(&shk
->lock
);
893 static spa_iostats_t spa_iostats_template
= {
894 { "trim_extents_written", KSTAT_DATA_UINT64
},
895 { "trim_bytes_written", KSTAT_DATA_UINT64
},
896 { "trim_extents_skipped", KSTAT_DATA_UINT64
},
897 { "trim_bytes_skipped", KSTAT_DATA_UINT64
},
898 { "trim_extents_failed", KSTAT_DATA_UINT64
},
899 { "trim_bytes_failed", KSTAT_DATA_UINT64
},
900 { "autotrim_extents_written", KSTAT_DATA_UINT64
},
901 { "autotrim_bytes_written", KSTAT_DATA_UINT64
},
902 { "autotrim_extents_skipped", KSTAT_DATA_UINT64
},
903 { "autotrim_bytes_skipped", KSTAT_DATA_UINT64
},
904 { "autotrim_extents_failed", KSTAT_DATA_UINT64
},
905 { "autotrim_bytes_failed", KSTAT_DATA_UINT64
},
908 #define SPA_IOSTATS_ADD(stat, val) \
909 atomic_add_64(&iostats->stat.value.ui64, (val));
912 spa_iostats_trim_add(spa_t
*spa
, trim_type_t type
,
913 uint64_t extents_written
, uint64_t bytes_written
,
914 uint64_t extents_skipped
, uint64_t bytes_skipped
,
915 uint64_t extents_failed
, uint64_t bytes_failed
)
917 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
918 kstat_t
*ksp
= shk
->kstat
;
919 spa_iostats_t
*iostats
;
924 iostats
= ksp
->ks_data
;
925 if (type
== TRIM_TYPE_MANUAL
) {
926 SPA_IOSTATS_ADD(trim_extents_written
, extents_written
);
927 SPA_IOSTATS_ADD(trim_bytes_written
, bytes_written
);
928 SPA_IOSTATS_ADD(trim_extents_skipped
, extents_skipped
);
929 SPA_IOSTATS_ADD(trim_bytes_skipped
, bytes_skipped
);
930 SPA_IOSTATS_ADD(trim_extents_failed
, extents_failed
);
931 SPA_IOSTATS_ADD(trim_bytes_failed
, bytes_failed
);
933 SPA_IOSTATS_ADD(autotrim_extents_written
, extents_written
);
934 SPA_IOSTATS_ADD(autotrim_bytes_written
, bytes_written
);
935 SPA_IOSTATS_ADD(autotrim_extents_skipped
, extents_skipped
);
936 SPA_IOSTATS_ADD(autotrim_bytes_skipped
, bytes_skipped
);
937 SPA_IOSTATS_ADD(autotrim_extents_failed
, extents_failed
);
938 SPA_IOSTATS_ADD(autotrim_bytes_failed
, bytes_failed
);
943 spa_iostats_update(kstat_t
*ksp
, int rw
)
945 if (rw
== KSTAT_WRITE
) {
946 memcpy(ksp
->ks_data
, &spa_iostats_template
,
947 sizeof (spa_iostats_t
));
954 spa_iostats_init(spa_t
*spa
)
956 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
958 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
960 char *name
= kmem_asprintf("zfs/%s", spa_name(spa
));
961 kstat_t
*ksp
= kstat_create(name
, 0, "iostats", "misc",
962 KSTAT_TYPE_NAMED
, sizeof (spa_iostats_t
) / sizeof (kstat_named_t
),
967 int size
= sizeof (spa_iostats_t
);
968 ksp
->ks_lock
= &shk
->lock
;
969 ksp
->ks_private
= spa
;
970 ksp
->ks_update
= spa_iostats_update
;
971 ksp
->ks_data
= kmem_alloc(size
, KM_SLEEP
);
972 memcpy(ksp
->ks_data
, &spa_iostats_template
, size
);
980 spa_iostats_destroy(spa_t
*spa
)
982 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
983 kstat_t
*ksp
= shk
->kstat
;
985 kmem_free(ksp
->ks_data
, sizeof (spa_iostats_t
));
989 mutex_destroy(&shk
->lock
);
993 spa_stats_init(spa_t
*spa
)
995 spa_read_history_init(spa
);
996 spa_txg_history_init(spa
);
997 spa_tx_assign_init(spa
);
998 spa_io_history_init(spa
);
999 spa_mmp_history_init(spa
);
1000 spa_state_init(spa
);
1001 spa_iostats_init(spa
);
1005 spa_stats_destroy(spa_t
*spa
)
1007 spa_iostats_destroy(spa
);
1008 spa_health_destroy(spa
);
1009 spa_tx_assign_destroy(spa
);
1010 spa_txg_history_destroy(spa
);
1011 spa_read_history_destroy(spa
);
1012 spa_io_history_destroy(spa
);
1013 spa_mmp_history_destroy(spa
);
1016 #if defined(_KERNEL)
1018 module_param(zfs_read_history
, int, 0644);
1019 MODULE_PARM_DESC(zfs_read_history
,
1020 "Historical statistics for the last N reads");
1022 module_param(zfs_read_history_hits
, int, 0644);
1023 MODULE_PARM_DESC(zfs_read_history_hits
,
1024 "Include cache hits in read history");
1026 module_param(zfs_txg_history
, int, 0644);
1027 MODULE_PARM_DESC(zfs_txg_history
,
1028 "Historical statistics for the last N txgs");
1030 module_param(zfs_multihost_history
, int, 0644);
1031 MODULE_PARM_DESC(zfs_multihost_history
,
1032 "Historical statistics for last N multihost writes");