4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
26 * Keeps stats on last N reads per spa_t, disabled by default.
28 int zfs_read_history
= 0;
31 * Include cache hits in history, disabled by default.
33 int zfs_read_history_hits
= 0;
36 * Keeps stats on the last N txgs, disabled by default.
38 int zfs_txg_history
= 0;
41 * ==========================================================================
42 * SPA Read History Routines
43 * ==========================================================================
47 * Read statistics - Information exported regarding each arc_read call
49 typedef struct spa_read_history
{
50 uint64_t uid
; /* unique identifier */
51 hrtime_t start
; /* time read completed */
52 uint64_t objset
; /* read from this objset */
53 uint64_t object
; /* read of this object number */
54 uint64_t level
; /* block's indirection level */
55 uint64_t blkid
; /* read of this block id */
56 char origin
[24]; /* read originated from here */
57 uint32_t aflags
; /* ARC flags (cached, prefetch, etc.) */
58 pid_t pid
; /* PID of task doing read */
59 char comm
[16]; /* process name of task doing read */
64 spa_read_history_headers(char *buf
, size_t size
)
66 size
= snprintf(buf
, size
- 1, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
67 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
68 "level", "blkid", "aflags", "origin", "pid", "process");
75 spa_read_history_data(char *buf
, size_t size
, void *data
)
77 spa_read_history_t
*srh
= (spa_read_history_t
*)data
;
79 size
= snprintf(buf
, size
- 1, "%-8llu %-16llu 0x%-6llx "
80 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
81 (u_longlong_t
)srh
->uid
, srh
->start
,
82 (longlong_t
)srh
->objset
, (longlong_t
)srh
->object
,
83 (longlong_t
)srh
->level
, (longlong_t
)srh
->blkid
,
84 srh
->aflags
, srh
->origin
, srh
->pid
, srh
->comm
);
91 * Calculate the address for the next spa_stats_history_t entry. The
92 * ssh->lock will be held until ksp->ks_ndata entries are processed.
95 spa_read_history_addr(kstat_t
*ksp
, loff_t n
)
97 spa_t
*spa
= ksp
->ks_private
;
98 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
100 ASSERT(MUTEX_HELD(&ssh
->lock
));
103 ssh
->private = list_tail(&ssh
->list
);
104 else if (ssh
->private)
105 ssh
->private = list_prev(&ssh
->list
, ssh
->private);
107 return (ssh
->private);
111 * When the kstat is written discard all spa_read_history_t entires. The
112 * ssh->lock will be held until ksp->ks_ndata entries are processed.
115 spa_read_history_update(kstat_t
*ksp
, int rw
)
117 spa_t
*spa
= ksp
->ks_private
;
118 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
120 if (rw
== KSTAT_WRITE
) {
121 spa_read_history_t
*srh
;
123 while ((srh
= list_remove_head(&ssh
->list
))) {
125 kmem_free(srh
, sizeof (spa_read_history_t
));
128 ASSERT3U(ssh
->size
, ==, 0);
131 ksp
->ks_ndata
= ssh
->size
;
132 ksp
->ks_data_size
= ssh
->size
* sizeof (spa_read_history_t
);
138 spa_read_history_init(spa_t
*spa
)
140 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
141 char name
[KSTAT_STRLEN
];
144 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
145 list_create(&ssh
->list
, sizeof (spa_read_history_t
),
146 offsetof(spa_read_history_t
, srh_link
));
152 (void) snprintf(name
, KSTAT_STRLEN
, "zfs/%s", spa_name(spa
));
153 name
[KSTAT_STRLEN
-1] = '\0';
155 ksp
= kstat_create(name
, 0, "reads", "misc",
156 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
160 ksp
->ks_lock
= &ssh
->lock
;
162 ksp
->ks_private
= spa
;
163 ksp
->ks_update
= spa_read_history_update
;
164 kstat_set_raw_ops(ksp
, spa_read_history_headers
,
165 spa_read_history_data
, spa_read_history_addr
);
171 spa_read_history_destroy(spa_t
*spa
)
173 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
174 spa_read_history_t
*srh
;
181 mutex_enter(&ssh
->lock
);
182 while ((srh
= list_remove_head(&ssh
->list
))) {
184 kmem_free(srh
, sizeof (spa_read_history_t
));
187 ASSERT3U(ssh
->size
, ==, 0);
188 list_destroy(&ssh
->list
);
189 mutex_exit(&ssh
->lock
);
191 mutex_destroy(&ssh
->lock
);
195 spa_read_history_add(spa_t
*spa
, const zbookmark_t
*zb
, uint32_t aflags
)
197 spa_stats_history_t
*ssh
= &spa
->spa_stats
.read_history
;
198 spa_read_history_t
*srh
, *rm
;
200 ASSERT3P(spa
, !=, NULL
);
201 ASSERT3P(zb
, !=, NULL
);
203 if (zfs_read_history
== 0 && ssh
->size
== 0)
206 if (zfs_read_history_hits
== 0 && (aflags
& ARC_CACHED
))
209 srh
= kmem_zalloc(sizeof (spa_read_history_t
), KM_PUSHPAGE
);
210 strlcpy(srh
->comm
, getcomm(), sizeof (srh
->comm
));
211 srh
->start
= gethrtime();
212 srh
->objset
= zb
->zb_objset
;
213 srh
->object
= zb
->zb_object
;
214 srh
->level
= zb
->zb_level
;
215 srh
->blkid
= zb
->zb_blkid
;
216 srh
->aflags
= aflags
;
219 mutex_enter(&ssh
->lock
);
221 srh
->uid
= ssh
->count
++;
222 list_insert_head(&ssh
->list
, srh
);
225 while (ssh
->size
> zfs_read_history
) {
227 rm
= list_remove_tail(&ssh
->list
);
228 kmem_free(rm
, sizeof (spa_read_history_t
));
231 mutex_exit(&ssh
->lock
);
235 * ==========================================================================
236 * SPA TXG History Routines
237 * ==========================================================================
241 * Txg statistics - Information exported regarding each txg sync
244 typedef struct spa_txg_history
{
245 uint64_t txg
; /* txg id */
246 txg_state_t state
; /* active txg state */
247 uint64_t nread
; /* number of bytes read */
248 uint64_t nwritten
; /* number of bytes written */
249 uint64_t reads
; /* number of read operations */
250 uint64_t writes
; /* number of write operations */
251 uint64_t nreserved
; /* number of bytes reserved */
252 hrtime_t times
[TXG_STATE_COMMITTED
]; /* completion times */
253 list_node_t sth_link
;
257 spa_txg_history_headers(char *buf
, size_t size
)
259 size
= snprintf(buf
, size
- 1, "%-8s %-16s %-5s %-12s %-12s %-12s "
260 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
261 "nreserved", "nread", "nwritten", "reads", "writes",
262 "otime", "qtime", "wtime", "stime");
269 spa_txg_history_data(char *buf
, size_t size
, void *data
)
271 spa_txg_history_t
*sth
= (spa_txg_history_t
*)data
;
272 uint64_t open
= 0, quiesce
= 0, wait
= 0, sync
= 0;
275 switch (sth
->state
) {
276 case TXG_STATE_BIRTH
: state
= 'B'; break;
277 case TXG_STATE_OPEN
: state
= 'O'; break;
278 case TXG_STATE_QUIESCED
: state
= 'Q'; break;
279 case TXG_STATE_WAIT_FOR_SYNC
: state
= 'W'; break;
280 case TXG_STATE_SYNCED
: state
= 'S'; break;
281 case TXG_STATE_COMMITTED
: state
= 'C'; break;
282 default: state
= '?'; break;
285 if (sth
->times
[TXG_STATE_OPEN
])
286 open
= sth
->times
[TXG_STATE_OPEN
] -
287 sth
->times
[TXG_STATE_BIRTH
];
289 if (sth
->times
[TXG_STATE_QUIESCED
])
290 quiesce
= sth
->times
[TXG_STATE_QUIESCED
] -
291 sth
->times
[TXG_STATE_OPEN
];
293 if (sth
->times
[TXG_STATE_WAIT_FOR_SYNC
])
294 wait
= sth
->times
[TXG_STATE_WAIT_FOR_SYNC
] -
295 sth
->times
[TXG_STATE_QUIESCED
];
297 if (sth
->times
[TXG_STATE_SYNCED
])
298 sync
= sth
->times
[TXG_STATE_SYNCED
] -
299 sth
->times
[TXG_STATE_WAIT_FOR_SYNC
];
301 size
= snprintf(buf
, size
- 1, "%-8llu %-16llu %-5c %-12llu "
302 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
303 (longlong_t
)sth
->txg
, sth
->times
[TXG_STATE_BIRTH
], state
,
304 (u_longlong_t
)sth
->nreserved
,
305 (u_longlong_t
)sth
->nread
, (u_longlong_t
)sth
->nwritten
,
306 (u_longlong_t
)sth
->reads
, (u_longlong_t
)sth
->writes
,
307 (u_longlong_t
)open
, (u_longlong_t
)quiesce
, (u_longlong_t
)wait
,
315 * Calculate the address for the next spa_stats_history_t entry. The
316 * ssh->lock will be held until ksp->ks_ndata entries are processed.
319 spa_txg_history_addr(kstat_t
*ksp
, loff_t n
)
321 spa_t
*spa
= ksp
->ks_private
;
322 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
324 ASSERT(MUTEX_HELD(&ssh
->lock
));
327 ssh
->private = list_tail(&ssh
->list
);
328 else if (ssh
->private)
329 ssh
->private = list_prev(&ssh
->list
, ssh
->private);
331 return (ssh
->private);
335 * When the kstat is written discard all spa_txg_history_t entires. The
336 * ssh->lock will be held until ksp->ks_ndata entries are processed.
339 spa_txg_history_update(kstat_t
*ksp
, int rw
)
341 spa_t
*spa
= ksp
->ks_private
;
342 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
344 ASSERT(MUTEX_HELD(&ssh
->lock
));
346 if (rw
== KSTAT_WRITE
) {
347 spa_txg_history_t
*sth
;
349 while ((sth
= list_remove_head(&ssh
->list
))) {
351 kmem_free(sth
, sizeof (spa_txg_history_t
));
354 ASSERT3U(ssh
->size
, ==, 0);
357 ksp
->ks_ndata
= ssh
->size
;
358 ksp
->ks_data_size
= ssh
->size
* sizeof (spa_txg_history_t
);
364 spa_txg_history_init(spa_t
*spa
)
366 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
367 char name
[KSTAT_STRLEN
];
370 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
371 list_create(&ssh
->list
, sizeof (spa_txg_history_t
),
372 offsetof(spa_txg_history_t
, sth_link
));
378 (void) snprintf(name
, KSTAT_STRLEN
, "zfs/%s", spa_name(spa
));
379 name
[KSTAT_STRLEN
-1] = '\0';
381 ksp
= kstat_create(name
, 0, "txgs", "misc",
382 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
386 ksp
->ks_lock
= &ssh
->lock
;
388 ksp
->ks_private
= spa
;
389 ksp
->ks_update
= spa_txg_history_update
;
390 kstat_set_raw_ops(ksp
, spa_txg_history_headers
,
391 spa_txg_history_data
, spa_txg_history_addr
);
397 spa_txg_history_destroy(spa_t
*spa
)
399 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
400 spa_txg_history_t
*sth
;
407 mutex_enter(&ssh
->lock
);
408 while ((sth
= list_remove_head(&ssh
->list
))) {
410 kmem_free(sth
, sizeof (spa_txg_history_t
));
413 ASSERT3U(ssh
->size
, ==, 0);
414 list_destroy(&ssh
->list
);
415 mutex_exit(&ssh
->lock
);
417 mutex_destroy(&ssh
->lock
);
421 * Add a new txg to historical record.
424 spa_txg_history_add(spa_t
*spa
, uint64_t txg
, hrtime_t birth_time
)
426 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
427 spa_txg_history_t
*sth
, *rm
;
429 if (zfs_txg_history
== 0 && ssh
->size
== 0)
432 sth
= kmem_zalloc(sizeof (spa_txg_history_t
), KM_PUSHPAGE
);
434 sth
->state
= TXG_STATE_OPEN
;
435 sth
->times
[TXG_STATE_BIRTH
] = birth_time
;
437 mutex_enter(&ssh
->lock
);
439 list_insert_head(&ssh
->list
, sth
);
442 while (ssh
->size
> zfs_txg_history
) {
444 rm
= list_remove_tail(&ssh
->list
);
445 kmem_free(rm
, sizeof (spa_txg_history_t
));
448 mutex_exit(&ssh
->lock
);
452 * Set txg state completion time and increment current state.
455 spa_txg_history_set(spa_t
*spa
, uint64_t txg
, txg_state_t completed_state
,
456 hrtime_t completed_time
)
458 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
459 spa_txg_history_t
*sth
;
462 if (zfs_txg_history
== 0)
465 mutex_enter(&ssh
->lock
);
466 for (sth
= list_head(&ssh
->list
); sth
!= NULL
;
467 sth
= list_next(&ssh
->list
, sth
)) {
468 if (sth
->txg
== txg
) {
469 sth
->times
[completed_state
] = completed_time
;
475 mutex_exit(&ssh
->lock
);
484 spa_txg_history_set_io(spa_t
*spa
, uint64_t txg
, uint64_t nread
,
485 uint64_t nwritten
, uint64_t reads
, uint64_t writes
, uint64_t nreserved
)
487 spa_stats_history_t
*ssh
= &spa
->spa_stats
.txg_history
;
488 spa_txg_history_t
*sth
;
491 if (zfs_txg_history
== 0)
494 mutex_enter(&ssh
->lock
);
495 for (sth
= list_head(&ssh
->list
); sth
!= NULL
;
496 sth
= list_next(&ssh
->list
, sth
)) {
497 if (sth
->txg
== txg
) {
499 sth
->nwritten
= nwritten
;
501 sth
->writes
= writes
;
502 sth
->nreserved
= nreserved
;
507 mutex_exit(&ssh
->lock
);
513 * ==========================================================================
514 * SPA TX Assign Histogram Routines
515 * ==========================================================================
519 * Tx statistics - Information exported regarding dmu_tx_assign time.
523 * When the kstat is written zero all buckets. When the kstat is read
524 * count the number of trailing buckets set to zero and update ks_ndata
525 * such that they are not output.
528 spa_tx_assign_update(kstat_t
*ksp
, int rw
)
530 spa_t
*spa
= ksp
->ks_private
;
531 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
534 if (rw
== KSTAT_WRITE
) {
535 for (i
= 0; i
< ssh
->count
; i
++)
536 ((kstat_named_t
*)ssh
->private)[i
].value
.ui64
= 0;
539 for (i
= ssh
->count
; i
> 0; i
--)
540 if (((kstat_named_t
*)ssh
->private)[i
-1].value
.ui64
!= 0)
544 ksp
->ks_data_size
= i
* sizeof (kstat_named_t
);
550 spa_tx_assign_init(spa_t
*spa
)
552 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
553 char name
[KSTAT_STRLEN
];
558 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
560 ssh
->count
= 42; /* power of two buckets for 1ns to 2,199s */
561 ssh
->size
= ssh
->count
* sizeof (kstat_named_t
);
562 ssh
->private = kmem_alloc(ssh
->size
, KM_SLEEP
);
564 (void) snprintf(name
, KSTAT_STRLEN
, "zfs/%s", spa_name(spa
));
565 name
[KSTAT_STRLEN
-1] = '\0';
567 for (i
= 0; i
< ssh
->count
; i
++) {
568 ks
= &((kstat_named_t
*)ssh
->private)[i
];
569 ks
->data_type
= KSTAT_DATA_UINT64
;
571 (void) snprintf(ks
->name
, KSTAT_STRLEN
, "%llu ns",
572 (u_longlong_t
)1 << i
);
575 ksp
= kstat_create(name
, 0, "dmu_tx_assign", "misc",
576 KSTAT_TYPE_NAMED
, 0, KSTAT_FLAG_VIRTUAL
);
580 ksp
->ks_lock
= &ssh
->lock
;
581 ksp
->ks_data
= ssh
->private;
582 ksp
->ks_ndata
= ssh
->count
;
583 ksp
->ks_data_size
= ssh
->size
;
584 ksp
->ks_private
= spa
;
585 ksp
->ks_update
= spa_tx_assign_update
;
591 spa_tx_assign_destroy(spa_t
*spa
)
593 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
600 kmem_free(ssh
->private, ssh
->size
);
601 mutex_destroy(&ssh
->lock
);
605 spa_tx_assign_add_nsecs(spa_t
*spa
, uint64_t nsecs
)
607 spa_stats_history_t
*ssh
= &spa
->spa_stats
.tx_assign_histogram
;
610 while (((1 << idx
) < nsecs
) && (idx
< ssh
->size
- 1))
613 atomic_inc_64(&((kstat_named_t
*)ssh
->private)[idx
].value
.ui64
);
617 * ==========================================================================
618 * SPA IO History Routines
619 * ==========================================================================
622 spa_io_history_update(kstat_t
*ksp
, int rw
)
624 if (rw
== KSTAT_WRITE
)
625 memset(ksp
->ks_data
, 0, ksp
->ks_data_size
);
631 spa_io_history_init(spa_t
*spa
)
633 spa_stats_history_t
*ssh
= &spa
->spa_stats
.io_history
;
634 char name
[KSTAT_STRLEN
];
637 mutex_init(&ssh
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
639 (void) snprintf(name
, KSTAT_STRLEN
, "zfs/%s", spa_name(spa
));
640 name
[KSTAT_STRLEN
-1] = '\0';
642 ksp
= kstat_create(name
, 0, "io", "disk", KSTAT_TYPE_IO
, 1, 0);
646 ksp
->ks_lock
= &ssh
->lock
;
647 ksp
->ks_private
= spa
;
648 ksp
->ks_update
= spa_io_history_update
;
654 spa_io_history_destroy(spa_t
*spa
)
656 spa_stats_history_t
*ssh
= &spa
->spa_stats
.io_history
;
659 kstat_delete(ssh
->kstat
);
661 mutex_destroy(&ssh
->lock
);
665 spa_stats_init(spa_t
*spa
)
667 spa_read_history_init(spa
);
668 spa_txg_history_init(spa
);
669 spa_tx_assign_init(spa
);
670 spa_io_history_init(spa
);
674 spa_stats_destroy(spa_t
*spa
)
676 spa_tx_assign_destroy(spa
);
677 spa_txg_history_destroy(spa
);
678 spa_read_history_destroy(spa
);
679 spa_io_history_destroy(spa
);
682 #if defined(_KERNEL) && defined(HAVE_SPL)
683 module_param(zfs_read_history
, int, 0644);
684 MODULE_PARM_DESC(zfs_read_history
, "Historic statistics for the last N reads");
686 module_param(zfs_read_history_hits
, int, 0644);
687 MODULE_PARM_DESC(zfs_read_history_hits
, "Include cache hits in read history");
689 module_param(zfs_txg_history
, int, 0644);
690 MODULE_PARM_DESC(zfs_txg_history
, "Historic statistics for the last N txgs");