4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
24 #include <sys/vdev_impl.h>
27 * Keeps stats on last N reads per spa_t, disabled by default.
29 int zfs_read_history
= 0;
32 * Include cache hits in history, disabled by default.
34 int zfs_read_history_hits
= 0;
37 * Keeps stats on the last 100 txgs by default.
39 int zfs_txg_history
= 100;
42 * Keeps stats on the last N MMP updates, disabled by default.
44 int zfs_multihost_history
= 0;
47 * ==========================================================================
48 * SPA Read History Routines
49 * ==========================================================================
53 * Read statistics - Information exported regarding each arc_read call
55 typedef struct spa_read_history
{
56 uint64_t uid
; /* unique identifier */
57 hrtime_t start
; /* time read completed */
58 uint64_t objset
; /* read from this objset */
59 uint64_t object
; /* read of this object number */
60 uint64_t level
; /* block's indirection level */
61 uint64_t blkid
; /* read of this block id */
62 char origin
[24]; /* read originated from here */
63 uint32_t aflags
; /* ARC flags (cached, prefetch, etc.) */
64 pid_t pid
; /* PID of task doing read */
65 char comm
[16]; /* process name of task doing read */
70 spa_read_history_headers(char *buf
, size_t size
)
72 (void) snprintf(buf
, size
, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
73 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
74 "level", "blkid", "aflags", "origin", "pid", "process");
80 spa_read_history_data(char *buf
, size_t size
, void *data
)
82 spa_read_history_t
*srh
= (spa_read_history_t
*)data
;
84 (void) snprintf(buf
, size
, "%-8llu %-16llu 0x%-6llx "
85 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
86 (u_longlong_t
)srh
->uid
, srh
->start
,
87 (longlong_t
)srh
->objset
, (longlong_t
)srh
->object
,
88 (longlong_t
)srh
->level
, (longlong_t
)srh
->blkid
,
89 srh
->aflags
, srh
->origin
, srh
->pid
, srh
->comm
);
95 * Calculate the address for the next spa_stats_history_t entry. The
96 * ssh->lock will be held until ksp->ks_ndata entries are processed.
99 spa_read_history_addr(kstat_t
*ksp
, loff_t n
)
101 spa_t
*spa
= ksp
->ks_private
;
102 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
104 ASSERT(MUTEX_HELD(&ssh
->lock
));
107 ssh
->private = list_tail(&ssh
->list
);
108 else if (ssh
->private)
109 ssh
->private = list_prev(&ssh
->list
, ssh
->private);
111 return (ssh
->private);
115 * When the kstat is written discard all spa_read_history_t entries. The
116 * ssh->lock will be held until ksp->ks_ndata entries are processed.
119 spa_read_history_update(kstat_t
*ksp
, int rw
)
121 spa_t
*spa
= ksp
->ks_private
;
122 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
124 if (rw
== KSTAT_WRITE
) {
125 spa_read_history_t
*srh
;
127 while ((srh
= list_remove_head(&ssh
->list
))) {
129 kmem_free(srh
, sizeof (spa_read_history_t
));
132 ASSERT3U(ssh
->size
, ==, 0);
135 ksp
->ks_ndata
= ssh
->size
;
136 ksp
->ks_data_size
= ssh
->size
* sizeof (spa_read_history_t
);
142 spa_read_history_init(spa_t
*spa
)
144 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
148 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
149 list_create(&ssh
->list
, sizeof (spa_read_history_t
),
150 offsetof(spa_read_history_t
, srh_link
));
156 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
158 ksp
= kstat_create(name
, 0, "reads", "misc",
159 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
163 ksp
->ks_lock
= &ssh
->lock
;
165 ksp
->ks_private
= spa
;
166 ksp
->ks_update
= spa_read_history_update
;
167 kstat_set_raw_ops(ksp
, spa_read_history_headers
,
168 spa_read_history_data
, spa_read_history_addr
);
175 spa_read_history_destroy(spa_t
*spa
)
177 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
178 spa_read_history_t
*srh
;
185 mutex_enter(&ssh
->lock
);
186 while ((srh
= list_remove_head(&ssh
->list
))) {
188 kmem_free(srh
, sizeof (spa_read_history_t
));
191 ASSERT3U(ssh
->size
, ==, 0);
192 list_destroy(&ssh
->list
);
193 mutex_exit(&ssh
->lock
);
195 mutex_destroy(&ssh
->lock
);
199 spa_read_history_add(spa_t
*spa
, const zbookmark_phys_t
*zb
, uint32_t aflags
)
201 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
202 spa_read_history_t
*srh
, *rm
;
204 ASSERT3P(spa
, !=, NULL
);
205 ASSERT3P(zb
, !=, NULL
);
207 if (zfs_read_history
== 0 && ssh
->size
== 0)
210 if (zfs_read_history_hits
== 0 && (aflags
& ARC_FLAG_CACHED
))
213 srh
= kmem_zalloc(sizeof (spa_read_history_t
), KM_SLEEP
);
214 strlcpy(srh
->comm
, getcomm(), sizeof (srh
->comm
));
215 srh
->start
= gethrtime();
216 srh
->objset
= zb
->zb_objset
;
217 srh
->object
= zb
->zb_object
;
218 srh
->level
= zb
->zb_level
;
219 srh
->blkid
= zb
->zb_blkid
;
220 srh
->aflags
= aflags
;
223 mutex_enter(&ssh
->lock
);
225 srh
->uid
= ssh
->count
++;
226 list_insert_head(&ssh
->list
, srh
);
229 while (ssh
->size
> zfs_read_history
) {
231 rm
= list_remove_tail(&ssh
->list
);
232 kmem_free(rm
, sizeof (spa_read_history_t
));
235 mutex_exit(&ssh
->lock
);
239 * ==========================================================================
240 * SPA TXG History Routines
241 * ==========================================================================
245 * Txg statistics - Information exported regarding each txg sync
248 typedef struct spa_txg_history
{
249 uint64_t txg
; /* txg id */
250 txg_state_t state
; /* active txg state */
251 uint64_t nread
; /* number of bytes read */
252 uint64_t nwritten
; /* number of bytes written */
253 uint64_t reads
; /* number of read operations */
254 uint64_t writes
; /* number of write operations */
255 uint64_t ndirty
; /* number of dirty bytes */
256 hrtime_t times
[TXG_STATE_COMMITTED
]; /* completion times */
257 list_node_t sth_link
;
261 spa_txg_history_headers(char *buf
, size_t size
)
263 (void) snprintf(buf
, size
, "%-8s %-16s %-5s %-12s %-12s %-12s "
264 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
265 "ndirty", "nread", "nwritten", "reads", "writes",
266 "otime", "qtime", "wtime", "stime");
272 spa_txg_history_data(char *buf
, size_t size
, void *data
)
274 spa_txg_history_t
*sth
= (spa_txg_history_t
*)data
;
275 uint64_t open
= 0, quiesce
= 0, wait
= 0, sync
= 0;
278 switch (sth
->state
) {
279 case TXG_STATE_BIRTH
: state
= 'B'; break;
280 case TXG_STATE_OPEN
: state
= 'O'; break;
281 case TXG_STATE_QUIESCED
: state
= 'Q'; break;
282 case TXG_STATE_WAIT_FOR_SYNC
: state
= 'W'; break;
283 case TXG_STATE_SYNCED
: state
= 'S'; break;
284 case TXG_STATE_COMMITTED
: state
= 'C'; break;
285 default: state
= '?'; break;
288 if (sth
->times
[TXG_STATE_OPEN
])
289 open
= sth
->times
[TXG_STATE_OPEN
] -
290 sth
->times
[TXG_STATE_BIRTH
];
292 if (sth
->times
[TXG_STATE_QUIESCED
])
293 quiesce
= sth
->times
[TXG_STATE_QUIESCED
] -
294 sth
->times
[TXG_STATE_OPEN
];
296 if (sth
->times
[TXG_STATE_WAIT_FOR_SYNC
])
297 wait
= sth
->times
[TXG_STATE_WAIT_FOR_SYNC
] -
298 sth
->times
[TXG_STATE_QUIESCED
];
300 if (sth
->times
[TXG_STATE_SYNCED
])
301 sync
= sth
->times
[TXG_STATE_SYNCED
] -
302 sth
->times
[TXG_STATE_WAIT_FOR_SYNC
];
304 (void) snprintf(buf
, size
, "%-8llu %-16llu %-5c %-12llu "
305 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
306 (longlong_t
)sth
->txg
, sth
->times
[TXG_STATE_BIRTH
], state
,
307 (u_longlong_t
)sth
->ndirty
,
308 (u_longlong_t
)sth
->nread
, (u_longlong_t
)sth
->nwritten
,
309 (u_longlong_t
)sth
->reads
, (u_longlong_t
)sth
->writes
,
310 (u_longlong_t
)open
, (u_longlong_t
)quiesce
, (u_longlong_t
)wait
,
317 * Calculate the address for the next spa_stats_history_t entry. The
318 * ssh->lock will be held until ksp->ks_ndata entries are processed.
321 spa_txg_history_addr(kstat_t
*ksp
, loff_t n
)
323 spa_t
*spa
= ksp
->ks_private
;
324 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
326 ASSERT(MUTEX_HELD(&ssh
->lock
));
329 ssh
->private = list_tail(&ssh
->list
);
330 else if (ssh
->private)
331 ssh
->private = list_prev(&ssh
->list
, ssh
->private);
333 return (ssh
->private);
337 * When the kstat is written discard all spa_txg_history_t entries. The
338 * ssh->lock will be held until ksp->ks_ndata entries are processed.
341 spa_txg_history_update(kstat_t
*ksp
, int rw
)
343 spa_t
*spa
= ksp
->ks_private
;
344 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
346 ASSERT(MUTEX_HELD(&ssh
->lock
));
348 if (rw
== KSTAT_WRITE
) {
349 spa_txg_history_t
*sth
;
351 while ((sth
= list_remove_head(&ssh
->list
))) {
353 kmem_free(sth
, sizeof (spa_txg_history_t
));
356 ASSERT3U(ssh
->size
, ==, 0);
359 ksp
->ks_ndata
= ssh
->size
;
360 ksp
->ks_data_size
= ssh
->size
* sizeof (spa_txg_history_t
);
366 spa_txg_history_init(spa_t
*spa
)
368 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
372 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
373 list_create(&ssh
->list
, sizeof (spa_txg_history_t
),
374 offsetof(spa_txg_history_t
, sth_link
));
380 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
382 ksp
= kstat_create(name
, 0, "txgs", "misc",
383 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
387 ksp
->ks_lock
= &ssh
->lock
;
389 ksp
->ks_private
= spa
;
390 ksp
->ks_update
= spa_txg_history_update
;
391 kstat_set_raw_ops(ksp
, spa_txg_history_headers
,
392 spa_txg_history_data
, spa_txg_history_addr
);
399 spa_txg_history_destroy(spa_t
*spa
)
401 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
402 spa_txg_history_t
*sth
;
409 mutex_enter(&ssh
->lock
);
410 while ((sth
= list_remove_head(&ssh
->list
))) {
412 kmem_free(sth
, sizeof (spa_txg_history_t
));
415 ASSERT3U(ssh
->size
, ==, 0);
416 list_destroy(&ssh
->list
);
417 mutex_exit(&ssh
->lock
);
419 mutex_destroy(&ssh
->lock
);
423 * Add a new txg to historical record.
426 spa_txg_history_add(spa_t
*spa
, uint64_t txg
, hrtime_t birth_time
)
428 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
429 spa_txg_history_t
*sth
, *rm
;
431 if (zfs_txg_history
== 0 && ssh
->size
== 0)
434 sth
= kmem_zalloc(sizeof (spa_txg_history_t
), KM_SLEEP
);
436 sth
->state
= TXG_STATE_OPEN
;
437 sth
->times
[TXG_STATE_BIRTH
] = birth_time
;
439 mutex_enter(&ssh
->lock
);
441 list_insert_head(&ssh
->list
, sth
);
444 while (ssh
->size
> zfs_txg_history
) {
446 rm
= list_remove_tail(&ssh
->list
);
447 kmem_free(rm
, sizeof (spa_txg_history_t
));
450 mutex_exit(&ssh
->lock
);
454 * Set txg state completion time and increment current state.
457 spa_txg_history_set(spa_t
*spa
, uint64_t txg
, txg_state_t completed_state
,
458 hrtime_t completed_time
)
460 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
461 spa_txg_history_t
*sth
;
464 if (zfs_txg_history
== 0)
467 mutex_enter(&ssh
->lock
);
468 for (sth
= list_head(&ssh
->list
); sth
!= NULL
;
469 sth
= list_next(&ssh
->list
, sth
)) {
470 if (sth
->txg
== txg
) {
471 sth
->times
[completed_state
] = completed_time
;
477 mutex_exit(&ssh
->lock
);
486 spa_txg_history_set_io(spa_t
*spa
, uint64_t txg
, uint64_t nread
,
487 uint64_t nwritten
, uint64_t reads
, uint64_t writes
, uint64_t ndirty
)
489 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
490 spa_txg_history_t
*sth
;
493 if (zfs_txg_history
== 0)
496 mutex_enter(&ssh
->lock
);
497 for (sth
= list_head(&ssh
->list
); sth
!= NULL
;
498 sth
= list_next(&ssh
->list
, sth
)) {
499 if (sth
->txg
== txg
) {
501 sth
->nwritten
= nwritten
;
503 sth
->writes
= writes
;
504 sth
->ndirty
= ndirty
;
509 mutex_exit(&ssh
->lock
);
515 spa_txg_history_init_io(spa_t
*spa
, uint64_t txg
, dsl_pool_t
*dp
)
519 if (zfs_txg_history
== 0)
522 ts
= kmem_alloc(sizeof (txg_stat_t
), KM_SLEEP
);
524 spa_config_enter(spa
, SCL_ALL
, FTAG
, RW_READER
);
525 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs1
);
526 spa_config_exit(spa
, SCL_ALL
, FTAG
);
529 ts
->ndirty
= dp
->dp_dirty_pertxg
[txg
& TXG_MASK
];
531 spa_txg_history_set(spa
, txg
, TXG_STATE_WAIT_FOR_SYNC
, gethrtime());
537 spa_txg_history_fini_io(spa_t
*spa
, txg_stat_t
*ts
)
542 if (zfs_txg_history
== 0) {
543 kmem_free(ts
, sizeof (txg_stat_t
));
547 spa_config_enter(spa
, SCL_ALL
, FTAG
, RW_READER
);
548 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs2
);
549 spa_config_exit(spa
, SCL_ALL
, FTAG
);
551 spa_txg_history_set(spa
, ts
->txg
, TXG_STATE_SYNCED
, gethrtime());
552 spa_txg_history_set_io(spa
, ts
->txg
,
553 ts
->vs2
.vs_bytes
[ZIO_TYPE_READ
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_READ
],
554 ts
->vs2
.vs_bytes
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_WRITE
],
555 ts
->vs2
.vs_ops
[ZIO_TYPE_READ
] - ts
->vs1
.vs_ops
[ZIO_TYPE_READ
],
556 ts
->vs2
.vs_ops
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_ops
[ZIO_TYPE_WRITE
],
559 kmem_free(ts
, sizeof (txg_stat_t
));
563 * ==========================================================================
564 * SPA TX Assign Histogram Routines
565 * ==========================================================================
569 * Tx statistics - Information exported regarding dmu_tx_assign time.
573 * When the kstat is written zero all buckets. When the kstat is read
574 * count the number of trailing buckets set to zero and update ks_ndata
575 * such that they are not output.
578 spa_tx_assign_update(kstat_t
*ksp
, int rw
)
580 spa_t
*spa
= ksp
->ks_private
;
581 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
584 if (rw
== KSTAT_WRITE
) {
585 for (i
= 0; i
< ssh
->count
; i
++)
586 ((kstat_named_t
*)ssh
->private)[i
].value
.ui64
= 0;
589 for (i
= ssh
->count
; i
> 0; i
--)
590 if (((kstat_named_t
*)ssh
->private)[i
-1].value
.ui64
!= 0)
594 ksp
->ks_data_size
= i
* sizeof (kstat_named_t
);
600 spa_tx_assign_init(spa_t
*spa
)
602 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
608 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
610 ssh
->count
= 42; /* power of two buckets for 1ns to 2,199s */
611 ssh
->size
= ssh
->count
* sizeof (kstat_named_t
);
612 ssh
->private = kmem_alloc(ssh
->size
, KM_SLEEP
);
614 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
616 for (i
= 0; i
< ssh
->count
; i
++) {
617 ks
= &((kstat_named_t
*)ssh
->private)[i
];
618 ks
->data_type
= KSTAT_DATA_UINT64
;
620 (void) snprintf(ks
->name
, KSTAT_STRLEN
, "%llu ns",
621 (u_longlong_t
)1 << i
);
624 ksp
= kstat_create(name
, 0, "dmu_tx_assign", "misc",
625 KSTAT_TYPE_NAMED
, 0, KSTAT_FLAG_VIRTUAL
);
629 ksp
->ks_lock
= &ssh
->lock
;
630 ksp
->ks_data
= ssh
->private;
631 ksp
->ks_ndata
= ssh
->count
;
632 ksp
->ks_data_size
= ssh
->size
;
633 ksp
->ks_private
= spa
;
634 ksp
->ks_update
= spa_tx_assign_update
;
641 spa_tx_assign_destroy(spa_t
*spa
)
643 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
650 kmem_free(ssh
->private, ssh
->size
);
651 mutex_destroy(&ssh
->lock
);
655 spa_tx_assign_add_nsecs(spa_t
*spa
, uint64_t nsecs
)
657 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
660 while (((1ULL << idx
) < nsecs
) && (idx
< ssh
->size
- 1))
663 atomic_inc_64(&((kstat_named_t
*)ssh
->private)[idx
].value
.ui64
);
667 * ==========================================================================
668 * SPA IO History Routines
669 * ==========================================================================
672 spa_io_history_update(kstat_t
*ksp
, int rw
)
674 if (rw
== KSTAT_WRITE
)
675 memset(ksp
->ks_data
, 0, ksp
->ks_data_size
);
681 spa_io_history_init(spa_t
*spa
)
683 spa_stats_history_t
*ssh
= &spa
->spa_stats
.io_history
;
687 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
689 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
691 ksp
= kstat_create(name
, 0, "io", "disk", KSTAT_TYPE_IO
, 1, 0);
695 ksp
->ks_lock
= &ssh
->lock
;
696 ksp
->ks_private
= spa
;
697 ksp
->ks_update
= spa_io_history_update
;
704 spa_io_history_destroy(spa_t
*spa
)
706 spa_stats_history_t
*ssh
= &spa
->spa_stats
.io_history
;
709 kstat_delete(ssh
->kstat
);
711 mutex_destroy(&ssh
->lock
);
715 * ==========================================================================
716 * SPA MMP History Routines
717 * ==========================================================================
721 * MMP statistics - Information exported regarding attempted MMP writes
722 * For MMP writes issued, fields used as per comments below.
723 * For MMP writes skipped, an entry represents a span of time when
724 * writes were skipped for same reason (error from mmp_random_leaf).
726 * timestamp time first write skipped, if >1 skipped in a row
727 * mmp_delay delay value at timestamp
728 * vdev_guid number of writes skipped
729 * io_error one of enum mmp_error
730 * duration time span (ns) of skipped writes
733 typedef struct spa_mmp_history
{
734 uint64_t mmp_kstat_id
; /* unique # for updates */
735 uint64_t txg
; /* txg of last sync */
736 uint64_t timestamp
; /* UTC time MMP write issued */
737 uint64_t mmp_delay
; /* mmp_thread.mmp_delay at timestamp */
738 uint64_t vdev_guid
; /* unique ID of leaf vdev */
740 int vdev_label
; /* vdev label */
741 int io_error
; /* error status of MMP write */
742 hrtime_t error_start
; /* hrtime of start of error period */
743 hrtime_t duration
; /* time from submission to completion */
744 list_node_t smh_link
;
748 spa_mmp_history_headers(char *buf
, size_t size
)
750 (void) snprintf(buf
, size
, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
751 "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
752 "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
757 spa_mmp_history_data(char *buf
, size_t size
, void *data
)
759 spa_mmp_history_t
*smh
= (spa_mmp_history_t
*)data
;
760 char skip_fmt
[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
762 char write_fmt
[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
765 (void) snprintf(buf
, size
, (smh
->error_start
? skip_fmt
: write_fmt
),
766 (u_longlong_t
)smh
->mmp_kstat_id
, (u_longlong_t
)smh
->txg
,
767 (u_longlong_t
)smh
->timestamp
, (longlong_t
)smh
->io_error
,
768 (longlong_t
)smh
->duration
, (u_longlong_t
)smh
->mmp_delay
,
769 (u_longlong_t
)smh
->vdev_guid
, (u_longlong_t
)smh
->vdev_label
,
770 (smh
->vdev_path
? smh
->vdev_path
: "-"));
776 * Calculate the address for the next spa_stats_history_t entry. The
777 * ssh->lock will be held until ksp->ks_ndata entries are processed.
780 spa_mmp_history_addr(kstat_t
*ksp
, loff_t n
)
782 spa_t
*spa
= ksp
->ks_private
;
783 spa_stats_history_t
*ssh
= &spa
->spa_stats
.mmp_history
;
785 ASSERT(MUTEX_HELD(&ssh
->lock
));
788 ssh
->private = list_tail(&ssh
->list
);
789 else if (ssh
->private)
790 ssh
->private = list_prev(&ssh
->list
, ssh
->private);
792 return (ssh
->private);
796 * When the kstat is written discard all spa_mmp_history_t entries. The
797 * ssh->lock will be held until ksp->ks_ndata entries are processed.
800 spa_mmp_history_update(kstat_t
*ksp
, int rw
)
802 spa_t
*spa
= ksp
->ks_private
;
803 spa_stats_history_t
*ssh
= &spa
->spa_stats
.mmp_history
;
805 ASSERT(MUTEX_HELD(&ssh
->lock
));
807 if (rw
== KSTAT_WRITE
) {
808 spa_mmp_history_t
*smh
;
810 while ((smh
= list_remove_head(&ssh
->list
))) {
813 strfree(smh
->vdev_path
);
814 kmem_free(smh
, sizeof (spa_mmp_history_t
));
817 ASSERT3U(ssh
->size
, ==, 0);
820 ksp
->ks_ndata
= ssh
->size
;
821 ksp
->ks_data_size
= ssh
->size
* sizeof (spa_mmp_history_t
);
827 spa_mmp_history_init(spa_t
*spa
)
829 spa_stats_history_t
*ssh
= &spa
->spa_stats
.mmp_history
;
833 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
834 list_create(&ssh
->list
, sizeof (spa_mmp_history_t
),
835 offsetof(spa_mmp_history_t
, smh_link
));
841 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
843 ksp
= kstat_create(name
, 0, "multihost", "misc",
844 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
848 ksp
->ks_lock
= &ssh
->lock
;
850 ksp
->ks_private
= spa
;
851 ksp
->ks_update
= spa_mmp_history_update
;
852 kstat_set_raw_ops(ksp
, spa_mmp_history_headers
,
853 spa_mmp_history_data
, spa_mmp_history_addr
);
860 spa_mmp_history_destroy(spa_t
*spa
)
862 spa_stats_history_t
*ssh
= &spa
->spa_stats
.mmp_history
;
863 spa_mmp_history_t
*smh
;
870 mutex_enter(&ssh
->lock
);
871 while ((smh
= list_remove_head(&ssh
->list
))) {
874 strfree(smh
->vdev_path
);
875 kmem_free(smh
, sizeof (spa_mmp_history_t
));
878 ASSERT3U(ssh
->size
, ==, 0);
879 list_destroy(&ssh
->list
);
880 mutex_exit(&ssh
->lock
);
882 mutex_destroy(&ssh
->lock
);
886 * Set duration in existing "skip" record to how long we have waited for a leaf
887 * vdev to become available.
889 * Important that we start search at the head of the list where new
890 * records are inserted, so this is normally an O(1) operation.
893 spa_mmp_history_set_skip(spa_t
*spa
, uint64_t mmp_kstat_id
)
895 spa_stats_history_t
*ssh
= &spa
->spa_stats
.mmp_history
;
896 spa_mmp_history_t
*smh
;
899 if (zfs_multihost_history
== 0 && ssh
->size
== 0)
902 mutex_enter(&ssh
->lock
);
903 for (smh
= list_head(&ssh
->list
); smh
!= NULL
;
904 smh
= list_next(&ssh
->list
, smh
)) {
905 if (smh
->mmp_kstat_id
== mmp_kstat_id
) {
906 ASSERT3U(smh
->io_error
, !=, 0);
907 smh
->duration
= gethrtime() - smh
->error_start
;
913 mutex_exit(&ssh
->lock
);
919 * Set MMP write duration and error status in existing record.
920 * See comment re: search order above spa_mmp_history_set_skip().
923 spa_mmp_history_set(spa_t
*spa
, uint64_t mmp_kstat_id
, int io_error
,
926 spa_stats_history_t
*ssh
= &spa
->spa_stats
.mmp_history
;
927 spa_mmp_history_t
*smh
;
930 if (zfs_multihost_history
== 0 && ssh
->size
== 0)
933 mutex_enter(&ssh
->lock
);
934 for (smh
= list_head(&ssh
->list
); smh
!= NULL
;
935 smh
= list_next(&ssh
->list
, smh
)) {
936 if (smh
->mmp_kstat_id
== mmp_kstat_id
) {
937 ASSERT(smh
->io_error
== 0);
938 smh
->io_error
= io_error
;
939 smh
->duration
= duration
;
944 mutex_exit(&ssh
->lock
);
950 * Add a new MMP historical record.
951 * error == 0 : a write was issued.
952 * error != 0 : a write was not issued because no leaves were found.
955 spa_mmp_history_add(spa_t
*spa
, uint64_t txg
, uint64_t timestamp
,
956 uint64_t mmp_delay
, vdev_t
*vd
, int label
, uint64_t mmp_kstat_id
,
959 spa_stats_history_t
*ssh
= &spa
->spa_stats
.mmp_history
;
960 spa_mmp_history_t
*smh
, *rm
;
962 if (zfs_multihost_history
== 0 && ssh
->size
== 0)
965 smh
= kmem_zalloc(sizeof (spa_mmp_history_t
), KM_SLEEP
);
967 smh
->timestamp
= timestamp
;
968 smh
->mmp_delay
= mmp_delay
;
970 smh
->vdev_guid
= vd
->vdev_guid
;
972 smh
->vdev_path
= strdup(vd
->vdev_path
);
974 smh
->vdev_label
= label
;
975 smh
->mmp_kstat_id
= mmp_kstat_id
;
978 smh
->io_error
= error
;
979 smh
->error_start
= gethrtime();
983 mutex_enter(&ssh
->lock
);
985 list_insert_head(&ssh
->list
, smh
);
988 while (ssh
->size
> zfs_multihost_history
) {
990 rm
= list_remove_tail(&ssh
->list
);
992 strfree(rm
->vdev_path
);
993 kmem_free(rm
, sizeof (spa_mmp_history_t
));
996 mutex_exit(&ssh
->lock
);
997 return ((void *)smh
);
1001 spa_stats_init(spa_t
*spa
)
1003 spa_read_history_init(spa
);
1004 spa_txg_history_init(spa
);
1005 spa_tx_assign_init(spa
);
1006 spa_io_history_init(spa
);
1007 spa_mmp_history_init(spa
);
1011 spa_stats_destroy(spa_t
*spa
)
1013 spa_tx_assign_destroy(spa
);
1014 spa_txg_history_destroy(spa
);
1015 spa_read_history_destroy(spa
);
1016 spa_io_history_destroy(spa
);
1017 spa_mmp_history_destroy(spa
);
1020 #if defined(_KERNEL) && defined(HAVE_SPL)
1022 module_param(zfs_read_history
, int, 0644);
1023 MODULE_PARM_DESC(zfs_read_history
,
1024 "Historical statistics for the last N reads");
1026 module_param(zfs_read_history_hits
, int, 0644);
1027 MODULE_PARM_DESC(zfs_read_history_hits
,
1028 "Include cache hits in read history");
1030 module_param(zfs_txg_history
, int, 0644);
1031 MODULE_PARM_DESC(zfs_txg_history
,
1032 "Historical statistics for the last N txgs");
1034 module_param(zfs_multihost_history
, int, 0644);
1035 MODULE_PARM_DESC(zfs_multihost_history
,
1036 "Historical statistics for last N multihost writes");