]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zfs/spa_stats.c
7ca359806174434e7ad99088fbc6714c1ce22745
[mirror_zfs-debian.git] / module / zfs / spa_stats.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
24 #include <sys/vdev_impl.h>
25
26 /*
27 * Keeps stats on last N reads per spa_t, disabled by default.
28 */
29 int zfs_read_history = 0;
30
31 /*
32 * Include cache hits in history, disabled by default.
33 */
34 int zfs_read_history_hits = 0;
35
36 /*
37 * Keeps stats on the last N txgs, disabled by default.
38 */
39 int zfs_txg_history = 0;
40
41 /*
42 * Keeps stats on the last N MMP updates, disabled by default.
43 */
44 int zfs_multihost_history = 0;
45
46 /*
47 * ==========================================================================
48 * SPA Read History Routines
49 * ==========================================================================
50 */
51
52 /*
53 * Read statistics - Information exported regarding each arc_read call
54 */
55 typedef struct spa_read_history {
56 uint64_t uid; /* unique identifier */
57 hrtime_t start; /* time read completed */
58 uint64_t objset; /* read from this objset */
59 uint64_t object; /* read of this object number */
60 uint64_t level; /* block's indirection level */
61 uint64_t blkid; /* read of this block id */
62 char origin[24]; /* read originated from here */
63 uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
64 pid_t pid; /* PID of task doing read */
65 char comm[16]; /* process name of task doing read */
66 list_node_t srh_link;
67 } spa_read_history_t;
68
69 static int
70 spa_read_history_headers(char *buf, size_t size)
71 {
72 (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
73 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
74 "level", "blkid", "aflags", "origin", "pid", "process");
75
76 return (0);
77 }
78
79 static int
80 spa_read_history_data(char *buf, size_t size, void *data)
81 {
82 spa_read_history_t *srh = (spa_read_history_t *)data;
83
84 (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
85 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
86 (u_longlong_t)srh->uid, srh->start,
87 (longlong_t)srh->objset, (longlong_t)srh->object,
88 (longlong_t)srh->level, (longlong_t)srh->blkid,
89 srh->aflags, srh->origin, srh->pid, srh->comm);
90
91 return (0);
92 }
93
94 /*
95 * Calculate the address for the next spa_stats_history_t entry. The
96 * ssh->lock will be held until ksp->ks_ndata entries are processed.
97 */
98 static void *
99 spa_read_history_addr(kstat_t *ksp, loff_t n)
100 {
101 spa_t *spa = ksp->ks_private;
102 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
103
104 ASSERT(MUTEX_HELD(&ssh->lock));
105
106 if (n == 0)
107 ssh->private = list_tail(&ssh->list);
108 else if (ssh->private)
109 ssh->private = list_prev(&ssh->list, ssh->private);
110
111 return (ssh->private);
112 }
113
114 /*
115 * When the kstat is written discard all spa_read_history_t entries. The
116 * ssh->lock will be held until ksp->ks_ndata entries are processed.
117 */
118 static int
119 spa_read_history_update(kstat_t *ksp, int rw)
120 {
121 spa_t *spa = ksp->ks_private;
122 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
123
124 if (rw == KSTAT_WRITE) {
125 spa_read_history_t *srh;
126
127 while ((srh = list_remove_head(&ssh->list))) {
128 ssh->size--;
129 kmem_free(srh, sizeof (spa_read_history_t));
130 }
131
132 ASSERT3U(ssh->size, ==, 0);
133 }
134
135 ksp->ks_ndata = ssh->size;
136 ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
137
138 return (0);
139 }
140
141 static void
142 spa_read_history_init(spa_t *spa)
143 {
144 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
145 char name[KSTAT_STRLEN];
146 kstat_t *ksp;
147
148 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
149 list_create(&ssh->list, sizeof (spa_read_history_t),
150 offsetof(spa_read_history_t, srh_link));
151
152 ssh->count = 0;
153 ssh->size = 0;
154 ssh->private = NULL;
155
156 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
157
158 ksp = kstat_create(name, 0, "reads", "misc",
159 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
160 ssh->kstat = ksp;
161
162 if (ksp) {
163 ksp->ks_lock = &ssh->lock;
164 ksp->ks_data = NULL;
165 ksp->ks_private = spa;
166 ksp->ks_update = spa_read_history_update;
167 kstat_set_raw_ops(ksp, spa_read_history_headers,
168 spa_read_history_data, spa_read_history_addr);
169 kstat_install(ksp);
170 }
171 }
172
173 static void
174 spa_read_history_destroy(spa_t *spa)
175 {
176 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
177 spa_read_history_t *srh;
178 kstat_t *ksp;
179
180 ksp = ssh->kstat;
181 if (ksp)
182 kstat_delete(ksp);
183
184 mutex_enter(&ssh->lock);
185 while ((srh = list_remove_head(&ssh->list))) {
186 ssh->size--;
187 kmem_free(srh, sizeof (spa_read_history_t));
188 }
189
190 ASSERT3U(ssh->size, ==, 0);
191 list_destroy(&ssh->list);
192 mutex_exit(&ssh->lock);
193
194 mutex_destroy(&ssh->lock);
195 }
196
197 void
198 spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
199 {
200 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
201 spa_read_history_t *srh, *rm;
202
203 ASSERT3P(spa, !=, NULL);
204 ASSERT3P(zb, !=, NULL);
205
206 if (zfs_read_history == 0 && ssh->size == 0)
207 return;
208
209 if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
210 return;
211
212 srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
213 strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
214 srh->start = gethrtime();
215 srh->objset = zb->zb_objset;
216 srh->object = zb->zb_object;
217 srh->level = zb->zb_level;
218 srh->blkid = zb->zb_blkid;
219 srh->aflags = aflags;
220 srh->pid = getpid();
221
222 mutex_enter(&ssh->lock);
223
224 srh->uid = ssh->count++;
225 list_insert_head(&ssh->list, srh);
226 ssh->size++;
227
228 while (ssh->size > zfs_read_history) {
229 ssh->size--;
230 rm = list_remove_tail(&ssh->list);
231 kmem_free(rm, sizeof (spa_read_history_t));
232 }
233
234 mutex_exit(&ssh->lock);
235 }
236
237 /*
238 * ==========================================================================
239 * SPA TXG History Routines
240 * ==========================================================================
241 */
242
243 /*
244 * Txg statistics - Information exported regarding each txg sync
245 */
246
247 typedef struct spa_txg_history {
248 uint64_t txg; /* txg id */
249 txg_state_t state; /* active txg state */
250 uint64_t nread; /* number of bytes read */
251 uint64_t nwritten; /* number of bytes written */
252 uint64_t reads; /* number of read operations */
253 uint64_t writes; /* number of write operations */
254 uint64_t ndirty; /* number of dirty bytes */
255 hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
256 list_node_t sth_link;
257 } spa_txg_history_t;
258
259 static int
260 spa_txg_history_headers(char *buf, size_t size)
261 {
262 (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
263 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
264 "ndirty", "nread", "nwritten", "reads", "writes",
265 "otime", "qtime", "wtime", "stime");
266
267 return (0);
268 }
269
270 static int
271 spa_txg_history_data(char *buf, size_t size, void *data)
272 {
273 spa_txg_history_t *sth = (spa_txg_history_t *)data;
274 uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
275 char state;
276
277 switch (sth->state) {
278 case TXG_STATE_BIRTH: state = 'B'; break;
279 case TXG_STATE_OPEN: state = 'O'; break;
280 case TXG_STATE_QUIESCED: state = 'Q'; break;
281 case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break;
282 case TXG_STATE_SYNCED: state = 'S'; break;
283 case TXG_STATE_COMMITTED: state = 'C'; break;
284 default: state = '?'; break;
285 }
286
287 if (sth->times[TXG_STATE_OPEN])
288 open = sth->times[TXG_STATE_OPEN] -
289 sth->times[TXG_STATE_BIRTH];
290
291 if (sth->times[TXG_STATE_QUIESCED])
292 quiesce = sth->times[TXG_STATE_QUIESCED] -
293 sth->times[TXG_STATE_OPEN];
294
295 if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
296 wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
297 sth->times[TXG_STATE_QUIESCED];
298
299 if (sth->times[TXG_STATE_SYNCED])
300 sync = sth->times[TXG_STATE_SYNCED] -
301 sth->times[TXG_STATE_WAIT_FOR_SYNC];
302
303 (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
304 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
305 (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
306 (u_longlong_t)sth->ndirty,
307 (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
308 (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
309 (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
310 (u_longlong_t)sync);
311
312 return (0);
313 }
314
315 /*
316 * Calculate the address for the next spa_stats_history_t entry. The
317 * ssh->lock will be held until ksp->ks_ndata entries are processed.
318 */
319 static void *
320 spa_txg_history_addr(kstat_t *ksp, loff_t n)
321 {
322 spa_t *spa = ksp->ks_private;
323 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
324
325 ASSERT(MUTEX_HELD(&ssh->lock));
326
327 if (n == 0)
328 ssh->private = list_tail(&ssh->list);
329 else if (ssh->private)
330 ssh->private = list_prev(&ssh->list, ssh->private);
331
332 return (ssh->private);
333 }
334
335 /*
336 * When the kstat is written discard all spa_txg_history_t entries. The
337 * ssh->lock will be held until ksp->ks_ndata entries are processed.
338 */
339 static int
340 spa_txg_history_update(kstat_t *ksp, int rw)
341 {
342 spa_t *spa = ksp->ks_private;
343 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
344
345 ASSERT(MUTEX_HELD(&ssh->lock));
346
347 if (rw == KSTAT_WRITE) {
348 spa_txg_history_t *sth;
349
350 while ((sth = list_remove_head(&ssh->list))) {
351 ssh->size--;
352 kmem_free(sth, sizeof (spa_txg_history_t));
353 }
354
355 ASSERT3U(ssh->size, ==, 0);
356 }
357
358 ksp->ks_ndata = ssh->size;
359 ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
360
361 return (0);
362 }
363
364 static void
365 spa_txg_history_init(spa_t *spa)
366 {
367 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
368 char name[KSTAT_STRLEN];
369 kstat_t *ksp;
370
371 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
372 list_create(&ssh->list, sizeof (spa_txg_history_t),
373 offsetof(spa_txg_history_t, sth_link));
374
375 ssh->count = 0;
376 ssh->size = 0;
377 ssh->private = NULL;
378
379 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
380
381 ksp = kstat_create(name, 0, "txgs", "misc",
382 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
383 ssh->kstat = ksp;
384
385 if (ksp) {
386 ksp->ks_lock = &ssh->lock;
387 ksp->ks_data = NULL;
388 ksp->ks_private = spa;
389 ksp->ks_update = spa_txg_history_update;
390 kstat_set_raw_ops(ksp, spa_txg_history_headers,
391 spa_txg_history_data, spa_txg_history_addr);
392 kstat_install(ksp);
393 }
394 }
395
396 static void
397 spa_txg_history_destroy(spa_t *spa)
398 {
399 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
400 spa_txg_history_t *sth;
401 kstat_t *ksp;
402
403 ksp = ssh->kstat;
404 if (ksp)
405 kstat_delete(ksp);
406
407 mutex_enter(&ssh->lock);
408 while ((sth = list_remove_head(&ssh->list))) {
409 ssh->size--;
410 kmem_free(sth, sizeof (spa_txg_history_t));
411 }
412
413 ASSERT3U(ssh->size, ==, 0);
414 list_destroy(&ssh->list);
415 mutex_exit(&ssh->lock);
416
417 mutex_destroy(&ssh->lock);
418 }
419
420 /*
421 * Add a new txg to historical record.
422 */
423 void
424 spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
425 {
426 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
427 spa_txg_history_t *sth, *rm;
428
429 if (zfs_txg_history == 0 && ssh->size == 0)
430 return;
431
432 sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
433 sth->txg = txg;
434 sth->state = TXG_STATE_OPEN;
435 sth->times[TXG_STATE_BIRTH] = birth_time;
436
437 mutex_enter(&ssh->lock);
438
439 list_insert_head(&ssh->list, sth);
440 ssh->size++;
441
442 while (ssh->size > zfs_txg_history) {
443 ssh->size--;
444 rm = list_remove_tail(&ssh->list);
445 kmem_free(rm, sizeof (spa_txg_history_t));
446 }
447
448 mutex_exit(&ssh->lock);
449 }
450
451 /*
452 * Set txg state completion time and increment current state.
453 */
454 int
455 spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
456 hrtime_t completed_time)
457 {
458 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
459 spa_txg_history_t *sth;
460 int error = ENOENT;
461
462 if (zfs_txg_history == 0)
463 return (0);
464
465 mutex_enter(&ssh->lock);
466 for (sth = list_head(&ssh->list); sth != NULL;
467 sth = list_next(&ssh->list, sth)) {
468 if (sth->txg == txg) {
469 sth->times[completed_state] = completed_time;
470 sth->state++;
471 error = 0;
472 break;
473 }
474 }
475 mutex_exit(&ssh->lock);
476
477 return (error);
478 }
479
480 /*
481 * Set txg IO stats.
482 */
483 static int
484 spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
485 uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
486 {
487 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
488 spa_txg_history_t *sth;
489 int error = ENOENT;
490
491 if (zfs_txg_history == 0)
492 return (0);
493
494 mutex_enter(&ssh->lock);
495 for (sth = list_head(&ssh->list); sth != NULL;
496 sth = list_next(&ssh->list, sth)) {
497 if (sth->txg == txg) {
498 sth->nread = nread;
499 sth->nwritten = nwritten;
500 sth->reads = reads;
501 sth->writes = writes;
502 sth->ndirty = ndirty;
503 error = 0;
504 break;
505 }
506 }
507 mutex_exit(&ssh->lock);
508
509 return (error);
510 }
511
512 txg_stat_t *
513 spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
514 {
515 txg_stat_t *ts;
516
517 if (zfs_txg_history == 0)
518 return (NULL);
519
520 ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
521
522 spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
523 vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
524 spa_config_exit(spa, SCL_ALL, FTAG);
525
526 ts->txg = txg;
527 ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
528
529 spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
530
531 return (ts);
532 }
533
534 void
535 spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
536 {
537 if (ts == NULL)
538 return;
539
540 if (zfs_txg_history == 0) {
541 kmem_free(ts, sizeof (txg_stat_t));
542 return;
543 }
544
545 spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
546 vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
547 spa_config_exit(spa, SCL_ALL, FTAG);
548
549 spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
550 spa_txg_history_set_io(spa, ts->txg,
551 ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
552 ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
553 ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
554 ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
555 ts->ndirty);
556
557 kmem_free(ts, sizeof (txg_stat_t));
558 }
559
560 /*
561 * ==========================================================================
562 * SPA TX Assign Histogram Routines
563 * ==========================================================================
564 */
565
566 /*
567 * Tx statistics - Information exported regarding dmu_tx_assign time.
568 */
569
570 /*
571 * When the kstat is written zero all buckets. When the kstat is read
572 * count the number of trailing buckets set to zero and update ks_ndata
573 * such that they are not output.
574 */
575 static int
576 spa_tx_assign_update(kstat_t *ksp, int rw)
577 {
578 spa_t *spa = ksp->ks_private;
579 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
580 int i;
581
582 if (rw == KSTAT_WRITE) {
583 for (i = 0; i < ssh->count; i++)
584 ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
585 }
586
587 for (i = ssh->count; i > 0; i--)
588 if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
589 break;
590
591 ksp->ks_ndata = i;
592 ksp->ks_data_size = i * sizeof (kstat_named_t);
593
594 return (0);
595 }
596
597 static void
598 spa_tx_assign_init(spa_t *spa)
599 {
600 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
601 char name[KSTAT_STRLEN];
602 kstat_named_t *ks;
603 kstat_t *ksp;
604 int i;
605
606 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
607
608 ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
609 ssh->size = ssh->count * sizeof (kstat_named_t);
610 ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
611
612 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
613
614 for (i = 0; i < ssh->count; i++) {
615 ks = &((kstat_named_t *)ssh->private)[i];
616 ks->data_type = KSTAT_DATA_UINT64;
617 ks->value.ui64 = 0;
618 (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
619 (u_longlong_t)1 << i);
620 }
621
622 ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
623 KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
624 ssh->kstat = ksp;
625
626 if (ksp) {
627 ksp->ks_lock = &ssh->lock;
628 ksp->ks_data = ssh->private;
629 ksp->ks_ndata = ssh->count;
630 ksp->ks_data_size = ssh->size;
631 ksp->ks_private = spa;
632 ksp->ks_update = spa_tx_assign_update;
633 kstat_install(ksp);
634 }
635 }
636
637 static void
638 spa_tx_assign_destroy(spa_t *spa)
639 {
640 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
641 kstat_t *ksp;
642
643 ksp = ssh->kstat;
644 if (ksp)
645 kstat_delete(ksp);
646
647 kmem_free(ssh->private, ssh->size);
648 mutex_destroy(&ssh->lock);
649 }
650
651 void
652 spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
653 {
654 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
655 uint64_t idx = 0;
656
657 while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
658 idx++;
659
660 atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
661 }
662
663 /*
664 * ==========================================================================
665 * SPA IO History Routines
666 * ==========================================================================
667 */
668 static int
669 spa_io_history_update(kstat_t *ksp, int rw)
670 {
671 if (rw == KSTAT_WRITE)
672 memset(ksp->ks_data, 0, ksp->ks_data_size);
673
674 return (0);
675 }
676
677 static void
678 spa_io_history_init(spa_t *spa)
679 {
680 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
681 char name[KSTAT_STRLEN];
682 kstat_t *ksp;
683
684 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
685
686 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
687
688 ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
689 ssh->kstat = ksp;
690
691 if (ksp) {
692 ksp->ks_lock = &ssh->lock;
693 ksp->ks_private = spa;
694 ksp->ks_update = spa_io_history_update;
695 kstat_install(ksp);
696 }
697 }
698
699 static void
700 spa_io_history_destroy(spa_t *spa)
701 {
702 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
703
704 if (ssh->kstat)
705 kstat_delete(ssh->kstat);
706
707 mutex_destroy(&ssh->lock);
708 }
709
710 /*
711 * ==========================================================================
712 * SPA MMP History Routines
713 * ==========================================================================
714 */
715
716 /*
717 * MMP statistics - Information exported regarding each MMP update
718 */
719
720 typedef struct spa_mmp_history {
721 uint64_t txg; /* txg of last sync */
722 uint64_t timestamp; /* UTC time of of last sync */
723 uint64_t mmp_delay; /* nanosec since last MMP write */
724 uint64_t vdev_guid; /* unique ID of leaf vdev */
725 char *vdev_path;
726 uint64_t vdev_label; /* vdev label */
727 list_node_t smh_link;
728 } spa_mmp_history_t;
729
730 static int
731 spa_mmp_history_headers(char *buf, size_t size)
732 {
733 (void) snprintf(buf, size, "%-10s %-10s %-12s %-24s %-10s %s\n",
734 "txg", "timestamp", "mmp_delay", "vdev_guid", "vdev_label",
735 "vdev_path");
736 return (0);
737 }
738
739 static int
740 spa_mmp_history_data(char *buf, size_t size, void *data)
741 {
742 spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
743
744 (void) snprintf(buf, size, "%-10llu %-10llu %-12llu %-24llu %-10llu "
745 "%s\n",
746 (u_longlong_t)smh->txg, (u_longlong_t)smh->timestamp,
747 (u_longlong_t)smh->mmp_delay, (u_longlong_t)smh->vdev_guid,
748 (u_longlong_t)smh->vdev_label,
749 (smh->vdev_path ? smh->vdev_path : "-"));
750
751 return (0);
752 }
753
754 /*
755 * Calculate the address for the next spa_stats_history_t entry. The
756 * ssh->lock will be held until ksp->ks_ndata entries are processed.
757 */
758 static void *
759 spa_mmp_history_addr(kstat_t *ksp, loff_t n)
760 {
761 spa_t *spa = ksp->ks_private;
762 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
763
764 ASSERT(MUTEX_HELD(&ssh->lock));
765
766 if (n == 0)
767 ssh->private = list_tail(&ssh->list);
768 else if (ssh->private)
769 ssh->private = list_prev(&ssh->list, ssh->private);
770
771 return (ssh->private);
772 }
773
774 /*
775 * When the kstat is written discard all spa_mmp_history_t entries. The
776 * ssh->lock will be held until ksp->ks_ndata entries are processed.
777 */
778 static int
779 spa_mmp_history_update(kstat_t *ksp, int rw)
780 {
781 spa_t *spa = ksp->ks_private;
782 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
783
784 ASSERT(MUTEX_HELD(&ssh->lock));
785
786 if (rw == KSTAT_WRITE) {
787 spa_mmp_history_t *smh;
788
789 while ((smh = list_remove_head(&ssh->list))) {
790 ssh->size--;
791 if (smh->vdev_path)
792 strfree(smh->vdev_path);
793 kmem_free(smh, sizeof (spa_mmp_history_t));
794 }
795
796 ASSERT3U(ssh->size, ==, 0);
797 }
798
799 ksp->ks_ndata = ssh->size;
800 ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
801
802 return (0);
803 }
804
805 static void
806 spa_mmp_history_init(spa_t *spa)
807 {
808 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
809 char name[KSTAT_STRLEN];
810 kstat_t *ksp;
811
812 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
813 list_create(&ssh->list, sizeof (spa_mmp_history_t),
814 offsetof(spa_mmp_history_t, smh_link));
815
816 ssh->count = 0;
817 ssh->size = 0;
818 ssh->private = NULL;
819
820 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
821
822 ksp = kstat_create(name, 0, "multihost", "misc",
823 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
824 ssh->kstat = ksp;
825
826 if (ksp) {
827 ksp->ks_lock = &ssh->lock;
828 ksp->ks_data = NULL;
829 ksp->ks_private = spa;
830 ksp->ks_update = spa_mmp_history_update;
831 kstat_set_raw_ops(ksp, spa_mmp_history_headers,
832 spa_mmp_history_data, spa_mmp_history_addr);
833 kstat_install(ksp);
834 }
835 }
836
837 static void
838 spa_mmp_history_destroy(spa_t *spa)
839 {
840 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
841 spa_mmp_history_t *smh;
842 kstat_t *ksp;
843
844 ksp = ssh->kstat;
845 if (ksp)
846 kstat_delete(ksp);
847
848 mutex_enter(&ssh->lock);
849 while ((smh = list_remove_head(&ssh->list))) {
850 ssh->size--;
851 if (smh->vdev_path)
852 strfree(smh->vdev_path);
853 kmem_free(smh, sizeof (spa_mmp_history_t));
854 }
855
856 ASSERT3U(ssh->size, ==, 0);
857 list_destroy(&ssh->list);
858 mutex_exit(&ssh->lock);
859
860 mutex_destroy(&ssh->lock);
861 }
862
863 /*
864 * Add a new MMP update to historical record.
865 */
866 void
867 spa_mmp_history_add(uint64_t txg, uint64_t timestamp, uint64_t mmp_delay,
868 vdev_t *vd, int label)
869 {
870 spa_t *spa = vd->vdev_spa;
871 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
872 spa_mmp_history_t *smh, *rm;
873
874 if (zfs_multihost_history == 0 && ssh->size == 0)
875 return;
876
877 smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
878 smh->txg = txg;
879 smh->timestamp = timestamp;
880 smh->mmp_delay = mmp_delay;
881 smh->vdev_guid = vd->vdev_guid;
882 if (vd->vdev_path)
883 smh->vdev_path = strdup(vd->vdev_path);
884 smh->vdev_label = label;
885
886 mutex_enter(&ssh->lock);
887
888 list_insert_head(&ssh->list, smh);
889 ssh->size++;
890
891 while (ssh->size > zfs_multihost_history) {
892 ssh->size--;
893 rm = list_remove_tail(&ssh->list);
894 if (rm->vdev_path)
895 strfree(rm->vdev_path);
896 kmem_free(rm, sizeof (spa_mmp_history_t));
897 }
898
899 mutex_exit(&ssh->lock);
900 }
901
902 void
903 spa_stats_init(spa_t *spa)
904 {
905 spa_read_history_init(spa);
906 spa_txg_history_init(spa);
907 spa_tx_assign_init(spa);
908 spa_io_history_init(spa);
909 spa_mmp_history_init(spa);
910 }
911
912 void
913 spa_stats_destroy(spa_t *spa)
914 {
915 spa_tx_assign_destroy(spa);
916 spa_txg_history_destroy(spa);
917 spa_read_history_destroy(spa);
918 spa_io_history_destroy(spa);
919 spa_mmp_history_destroy(spa);
920 }
921
922 #if defined(_KERNEL) && defined(HAVE_SPL)
923 /* CSTYLED */
924 module_param(zfs_read_history, int, 0644);
925 MODULE_PARM_DESC(zfs_read_history,
926 "Historical statistics for the last N reads");
927
928 module_param(zfs_read_history_hits, int, 0644);
929 MODULE_PARM_DESC(zfs_read_history_hits,
930 "Include cache hits in read history");
931
932 module_param(zfs_txg_history, int, 0644);
933 MODULE_PARM_DESC(zfs_txg_history,
934 "Historical statistics for the last N txgs");
935
936 module_param(zfs_multihost_history, int, 0644);
937 MODULE_PARM_DESC(zfs_multihost_history,
938 "Historical statistics for last N multihost writes");
939 /* END CSTYLED */
940 #endif