]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/spa_stats.c
Detect long config lock acquisition in mmp
[mirror_zfs.git] / module / zfs / spa_stats.c
CommitLineData
1421c891
PS
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22#include <sys/zfs_context.h>
23#include <sys/spa_impl.h>
379ca9cf 24#include <sys/vdev_impl.h>
1421c891
PS
25
26/*
27 * Keeps stats on last N reads per spa_t, disabled by default.
28 */
29int zfs_read_history = 0;
30
31/*
32 * Include cache hits in history, disabled by default.
33 */
34int zfs_read_history_hits = 0;
35
0b1401ee 36/*
01ff0d75 37 * Keeps stats on the last 100 txgs by default.
0b1401ee 38 */
01ff0d75 39int zfs_txg_history = 100;
0b1401ee 40
379ca9cf
OF
41/*
42 * Keeps stats on the last N MMP updates, disabled by default.
43 */
44int zfs_multihost_history = 0;
45
1421c891
PS
46/*
47 * ==========================================================================
48 * SPA Read History Routines
49 * ==========================================================================
50 */
51
52/*
53 * Read statistics - Information exported regarding each arc_read call
54 */
55typedef struct spa_read_history {
56 uint64_t uid; /* unique identifier */
57 hrtime_t start; /* time read completed */
58 uint64_t objset; /* read from this objset */
59 uint64_t object; /* read of this object number */
60 uint64_t level; /* block's indirection level */
61 uint64_t blkid; /* read of this block id */
62 char origin[24]; /* read originated from here */
63 uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
64 pid_t pid; /* PID of task doing read */
65 char comm[16]; /* process name of task doing read */
66 list_node_t srh_link;
67} spa_read_history_t;
68
69static int
70spa_read_history_headers(char *buf, size_t size)
71{
7b2d78a0 72 (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
1421c891
PS
73 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
74 "level", "blkid", "aflags", "origin", "pid", "process");
1421c891
PS
75
76 return (0);
77}
78
79static int
80spa_read_history_data(char *buf, size_t size, void *data)
81{
82 spa_read_history_t *srh = (spa_read_history_t *)data;
83
7b2d78a0 84 (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
1421c891
PS
85 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
86 (u_longlong_t)srh->uid, srh->start,
87 (longlong_t)srh->objset, (longlong_t)srh->object,
88 (longlong_t)srh->level, (longlong_t)srh->blkid,
89 srh->aflags, srh->origin, srh->pid, srh->comm);
1421c891
PS
90
91 return (0);
92}
93
94/*
95 * Calculate the address for the next spa_stats_history_t entry. The
96 * ssh->lock will be held until ksp->ks_ndata entries are processed.
97 */
98static void *
99spa_read_history_addr(kstat_t *ksp, loff_t n)
100{
101 spa_t *spa = ksp->ks_private;
102 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
103
104 ASSERT(MUTEX_HELD(&ssh->lock));
105
106 if (n == 0)
107 ssh->private = list_tail(&ssh->list);
108 else if (ssh->private)
109 ssh->private = list_prev(&ssh->list, ssh->private);
110
111 return (ssh->private);
112}
113
114/*
4e33ba4c 115 * When the kstat is written discard all spa_read_history_t entries. The
1421c891
PS
116 * ssh->lock will be held until ksp->ks_ndata entries are processed.
117 */
118static int
119spa_read_history_update(kstat_t *ksp, int rw)
120{
121 spa_t *spa = ksp->ks_private;
122 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
123
124 if (rw == KSTAT_WRITE) {
125 spa_read_history_t *srh;
126
127 while ((srh = list_remove_head(&ssh->list))) {
128 ssh->size--;
d1d7e268 129 kmem_free(srh, sizeof (spa_read_history_t));
1421c891
PS
130 }
131
132 ASSERT3U(ssh->size, ==, 0);
133 }
134
135 ksp->ks_ndata = ssh->size;
d1d7e268 136 ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
1421c891
PS
137
138 return (0);
139}
140
141static void
142spa_read_history_init(spa_t *spa)
143{
144 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
761b8ec6 145 char *name;
1421c891
PS
146 kstat_t *ksp;
147
148 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
149 list_create(&ssh->list, sizeof (spa_read_history_t),
150 offsetof(spa_read_history_t, srh_link));
151
152 ssh->count = 0;
153 ssh->size = 0;
154 ssh->private = NULL;
155
761b8ec6 156 name = kmem_asprintf("zfs/%s", spa_name(spa));
1421c891
PS
157
158 ksp = kstat_create(name, 0, "reads", "misc",
159 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
160 ssh->kstat = ksp;
161
162 if (ksp) {
163 ksp->ks_lock = &ssh->lock;
164 ksp->ks_data = NULL;
165 ksp->ks_private = spa;
166 ksp->ks_update = spa_read_history_update;
167 kstat_set_raw_ops(ksp, spa_read_history_headers,
168 spa_read_history_data, spa_read_history_addr);
169 kstat_install(ksp);
170 }
761b8ec6 171 strfree(name);
1421c891
PS
172}
173
174static void
175spa_read_history_destroy(spa_t *spa)
176{
177 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
178 spa_read_history_t *srh;
179 kstat_t *ksp;
180
181 ksp = ssh->kstat;
182 if (ksp)
183 kstat_delete(ksp);
184
185 mutex_enter(&ssh->lock);
186 while ((srh = list_remove_head(&ssh->list))) {
187 ssh->size--;
d1d7e268 188 kmem_free(srh, sizeof (spa_read_history_t));
1421c891
PS
189 }
190
191 ASSERT3U(ssh->size, ==, 0);
192 list_destroy(&ssh->list);
193 mutex_exit(&ssh->lock);
194
195 mutex_destroy(&ssh->lock);
196}
197
198void
5dbd68a3 199spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
1421c891
PS
200{
201 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
202 spa_read_history_t *srh, *rm;
203
204 ASSERT3P(spa, !=, NULL);
205 ASSERT3P(zb, !=, NULL);
206
207 if (zfs_read_history == 0 && ssh->size == 0)
208 return;
209
2a432414 210 if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
1421c891
PS
211 return;
212
79c76d5b 213 srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
d1d7e268 214 strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
1421c891
PS
215 srh->start = gethrtime();
216 srh->objset = zb->zb_objset;
217 srh->object = zb->zb_object;
218 srh->level = zb->zb_level;
219 srh->blkid = zb->zb_blkid;
220 srh->aflags = aflags;
221 srh->pid = getpid();
222
223 mutex_enter(&ssh->lock);
224
225 srh->uid = ssh->count++;
226 list_insert_head(&ssh->list, srh);
227 ssh->size++;
228
229 while (ssh->size > zfs_read_history) {
230 ssh->size--;
231 rm = list_remove_tail(&ssh->list);
d1d7e268 232 kmem_free(rm, sizeof (spa_read_history_t));
1421c891
PS
233 }
234
235 mutex_exit(&ssh->lock);
236}
237
0b1401ee
BB
238/*
239 * ==========================================================================
240 * SPA TXG History Routines
241 * ==========================================================================
242 */
243
244/*
245 * Txg statistics - Information exported regarding each txg sync
246 */
247
248typedef struct spa_txg_history {
249 uint64_t txg; /* txg id */
250 txg_state_t state; /* active txg state */
251 uint64_t nread; /* number of bytes read */
252 uint64_t nwritten; /* number of bytes written */
253 uint64_t reads; /* number of read operations */
254 uint64_t writes; /* number of write operations */
3ccab252 255 uint64_t ndirty; /* number of dirty bytes */
0b1401ee
BB
256 hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
257 list_node_t sth_link;
258} spa_txg_history_t;
259
260static int
261spa_txg_history_headers(char *buf, size_t size)
262{
7b2d78a0 263 (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
478d64fd 264 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
3ccab252 265 "ndirty", "nread", "nwritten", "reads", "writes",
478d64fd 266 "otime", "qtime", "wtime", "stime");
0b1401ee
BB
267
268 return (0);
269}
270
271static int
272spa_txg_history_data(char *buf, size_t size, void *data)
273{
274 spa_txg_history_t *sth = (spa_txg_history_t *)data;
478d64fd 275 uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
0b1401ee
BB
276 char state;
277
278 switch (sth->state) {
279 case TXG_STATE_BIRTH: state = 'B'; break;
280 case TXG_STATE_OPEN: state = 'O'; break;
281 case TXG_STATE_QUIESCED: state = 'Q'; break;
478d64fd 282 case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break;
0b1401ee
BB
283 case TXG_STATE_SYNCED: state = 'S'; break;
284 case TXG_STATE_COMMITTED: state = 'C'; break;
285 default: state = '?'; break;
286 }
287
288 if (sth->times[TXG_STATE_OPEN])
289 open = sth->times[TXG_STATE_OPEN] -
290 sth->times[TXG_STATE_BIRTH];
291
292 if (sth->times[TXG_STATE_QUIESCED])
293 quiesce = sth->times[TXG_STATE_QUIESCED] -
294 sth->times[TXG_STATE_OPEN];
295
478d64fd
IL
296 if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
297 wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
298 sth->times[TXG_STATE_QUIESCED];
299
0b1401ee
BB
300 if (sth->times[TXG_STATE_SYNCED])
301 sync = sth->times[TXG_STATE_SYNCED] -
478d64fd 302 sth->times[TXG_STATE_WAIT_FOR_SYNC];
0b1401ee 303
7b2d78a0 304 (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
478d64fd 305 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
0b1401ee 306 (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
3ccab252 307 (u_longlong_t)sth->ndirty,
0b1401ee
BB
308 (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
309 (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
478d64fd
IL
310 (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
311 (u_longlong_t)sync);
0b1401ee
BB
312
313 return (0);
314}
315
316/*
317 * Calculate the address for the next spa_stats_history_t entry. The
318 * ssh->lock will be held until ksp->ks_ndata entries are processed.
319 */
320static void *
321spa_txg_history_addr(kstat_t *ksp, loff_t n)
322{
323 spa_t *spa = ksp->ks_private;
324 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
325
326 ASSERT(MUTEX_HELD(&ssh->lock));
327
328 if (n == 0)
329 ssh->private = list_tail(&ssh->list);
330 else if (ssh->private)
331 ssh->private = list_prev(&ssh->list, ssh->private);
332
333 return (ssh->private);
334}
335
336/*
4e33ba4c 337 * When the kstat is written discard all spa_txg_history_t entries. The
0b1401ee
BB
338 * ssh->lock will be held until ksp->ks_ndata entries are processed.
339 */
340static int
341spa_txg_history_update(kstat_t *ksp, int rw)
342{
343 spa_t *spa = ksp->ks_private;
344 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
345
346 ASSERT(MUTEX_HELD(&ssh->lock));
347
348 if (rw == KSTAT_WRITE) {
349 spa_txg_history_t *sth;
350
351 while ((sth = list_remove_head(&ssh->list))) {
352 ssh->size--;
d1d7e268 353 kmem_free(sth, sizeof (spa_txg_history_t));
0b1401ee
BB
354 }
355
356 ASSERT3U(ssh->size, ==, 0);
357 }
358
359 ksp->ks_ndata = ssh->size;
d1d7e268 360 ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
0b1401ee
BB
361
362 return (0);
363}
364
365static void
366spa_txg_history_init(spa_t *spa)
367{
368 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
761b8ec6 369 char *name;
0b1401ee
BB
370 kstat_t *ksp;
371
372 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
373 list_create(&ssh->list, sizeof (spa_txg_history_t),
374 offsetof(spa_txg_history_t, sth_link));
375
376 ssh->count = 0;
377 ssh->size = 0;
378 ssh->private = NULL;
379
761b8ec6 380 name = kmem_asprintf("zfs/%s", spa_name(spa));
0b1401ee
BB
381
382 ksp = kstat_create(name, 0, "txgs", "misc",
383 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
384 ssh->kstat = ksp;
385
386 if (ksp) {
387 ksp->ks_lock = &ssh->lock;
388 ksp->ks_data = NULL;
389 ksp->ks_private = spa;
390 ksp->ks_update = spa_txg_history_update;
391 kstat_set_raw_ops(ksp, spa_txg_history_headers,
392 spa_txg_history_data, spa_txg_history_addr);
393 kstat_install(ksp);
394 }
761b8ec6 395 strfree(name);
0b1401ee
BB
396}
397
398static void
399spa_txg_history_destroy(spa_t *spa)
400{
401 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
402 spa_txg_history_t *sth;
403 kstat_t *ksp;
404
405 ksp = ssh->kstat;
406 if (ksp)
407 kstat_delete(ksp);
408
409 mutex_enter(&ssh->lock);
410 while ((sth = list_remove_head(&ssh->list))) {
411 ssh->size--;
d1d7e268 412 kmem_free(sth, sizeof (spa_txg_history_t));
0b1401ee
BB
413 }
414
415 ASSERT3U(ssh->size, ==, 0);
416 list_destroy(&ssh->list);
417 mutex_exit(&ssh->lock);
418
419 mutex_destroy(&ssh->lock);
420}
421
422/*
423 * Add a new txg to historical record.
424 */
425void
01b738f4 426spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
0b1401ee
BB
427{
428 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
429 spa_txg_history_t *sth, *rm;
430
431 if (zfs_txg_history == 0 && ssh->size == 0)
432 return;
433
79c76d5b 434 sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
0b1401ee
BB
435 sth->txg = txg;
436 sth->state = TXG_STATE_OPEN;
01b738f4 437 sth->times[TXG_STATE_BIRTH] = birth_time;
0b1401ee
BB
438
439 mutex_enter(&ssh->lock);
440
441 list_insert_head(&ssh->list, sth);
442 ssh->size++;
443
444 while (ssh->size > zfs_txg_history) {
445 ssh->size--;
446 rm = list_remove_tail(&ssh->list);
d1d7e268 447 kmem_free(rm, sizeof (spa_txg_history_t));
0b1401ee
BB
448 }
449
450 mutex_exit(&ssh->lock);
451}
452
453/*
454 * Set txg state completion time and increment current state.
455 */
456int
457spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
458 hrtime_t completed_time)
459{
460 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
461 spa_txg_history_t *sth;
462 int error = ENOENT;
463
464 if (zfs_txg_history == 0)
465 return (0);
466
467 mutex_enter(&ssh->lock);
468 for (sth = list_head(&ssh->list); sth != NULL;
d1d7e268 469 sth = list_next(&ssh->list, sth)) {
0b1401ee
BB
470 if (sth->txg == txg) {
471 sth->times[completed_state] = completed_time;
472 sth->state++;
473 error = 0;
474 break;
475 }
476 }
477 mutex_exit(&ssh->lock);
478
479 return (error);
480}
481
482/*
483 * Set txg IO stats.
484 */
baf67d15 485static int
0b1401ee 486spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
3ccab252 487 uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
0b1401ee
BB
488{
489 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
490 spa_txg_history_t *sth;
491 int error = ENOENT;
492
493 if (zfs_txg_history == 0)
494 return (0);
495
496 mutex_enter(&ssh->lock);
497 for (sth = list_head(&ssh->list); sth != NULL;
d1d7e268 498 sth = list_next(&ssh->list, sth)) {
0b1401ee
BB
499 if (sth->txg == txg) {
500 sth->nread = nread;
501 sth->nwritten = nwritten;
502 sth->reads = reads;
503 sth->writes = writes;
3ccab252 504 sth->ndirty = ndirty;
0b1401ee
BB
505 error = 0;
506 break;
507 }
508 }
509 mutex_exit(&ssh->lock);
510
511 return (error);
512}
513
baf67d15
BB
514txg_stat_t *
515spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
516{
517 txg_stat_t *ts;
518
519 if (zfs_txg_history == 0)
520 return (NULL);
521
522 ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
523
524 spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
525 vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
526 spa_config_exit(spa, SCL_ALL, FTAG);
527
528 ts->txg = txg;
529 ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
530
531 spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
532
533 return (ts);
534}
535
536void
537spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
538{
539 if (ts == NULL)
540 return;
541
542 if (zfs_txg_history == 0) {
543 kmem_free(ts, sizeof (txg_stat_t));
544 return;
545 }
546
547 spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
548 vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
549 spa_config_exit(spa, SCL_ALL, FTAG);
550
551 spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
552 spa_txg_history_set_io(spa, ts->txg,
553 ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
554 ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
555 ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
556 ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
557 ts->ndirty);
558
559 kmem_free(ts, sizeof (txg_stat_t));
560}
561
2d37239a
BB
562/*
563 * ==========================================================================
564 * SPA TX Assign Histogram Routines
565 * ==========================================================================
566 */
567
568/*
569 * Tx statistics - Information exported regarding dmu_tx_assign time.
570 */
571
572/*
573 * When the kstat is written zero all buckets. When the kstat is read
574 * count the number of trailing buckets set to zero and update ks_ndata
575 * such that they are not output.
576 */
577static int
578spa_tx_assign_update(kstat_t *ksp, int rw)
579{
580 spa_t *spa = ksp->ks_private;
581 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
582 int i;
583
584 if (rw == KSTAT_WRITE) {
585 for (i = 0; i < ssh->count; i++)
586 ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
587 }
588
589 for (i = ssh->count; i > 0; i--)
590 if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
591 break;
592
593 ksp->ks_ndata = i;
d1d7e268 594 ksp->ks_data_size = i * sizeof (kstat_named_t);
2d37239a
BB
595
596 return (0);
597}
598
599static void
600spa_tx_assign_init(spa_t *spa)
601{
602 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
761b8ec6 603 char *name;
2d37239a
BB
604 kstat_named_t *ks;
605 kstat_t *ksp;
606 int i;
607
608 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
609
610 ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
d1d7e268 611 ssh->size = ssh->count * sizeof (kstat_named_t);
2d37239a
BB
612 ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
613
761b8ec6 614 name = kmem_asprintf("zfs/%s", spa_name(spa));
2d37239a
BB
615
616 for (i = 0; i < ssh->count; i++) {
617 ks = &((kstat_named_t *)ssh->private)[i];
618 ks->data_type = KSTAT_DATA_UINT64;
619 ks->value.ui64 = 0;
620 (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
621 (u_longlong_t)1 << i);
622 }
623
624 ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
625 KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
626 ssh->kstat = ksp;
627
628 if (ksp) {
629 ksp->ks_lock = &ssh->lock;
630 ksp->ks_data = ssh->private;
631 ksp->ks_ndata = ssh->count;
632 ksp->ks_data_size = ssh->size;
633 ksp->ks_private = spa;
634 ksp->ks_update = spa_tx_assign_update;
635 kstat_install(ksp);
636 }
761b8ec6 637 strfree(name);
2d37239a
BB
638}
639
640static void
641spa_tx_assign_destroy(spa_t *spa)
642{
643 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
644 kstat_t *ksp;
645
646 ksp = ssh->kstat;
647 if (ksp)
648 kstat_delete(ksp);
649
650 kmem_free(ssh->private, ssh->size);
651 mutex_destroy(&ssh->lock);
652}
653
654void
655spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
656{
657 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
658 uint64_t idx = 0;
659
4ca9c1de 660 while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
2d37239a
BB
661 idx++;
662
663 atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
664}
665
330847ff
MA
666/*
667 * ==========================================================================
668 * SPA IO History Routines
669 * ==========================================================================
670 */
671static int
672spa_io_history_update(kstat_t *ksp, int rw)
673{
674 if (rw == KSTAT_WRITE)
675 memset(ksp->ks_data, 0, ksp->ks_data_size);
676
677 return (0);
678}
679
680static void
681spa_io_history_init(spa_t *spa)
682{
683 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
761b8ec6 684 char *name;
330847ff
MA
685 kstat_t *ksp;
686
687 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
688
761b8ec6 689 name = kmem_asprintf("zfs/%s", spa_name(spa));
330847ff
MA
690
691 ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
692 ssh->kstat = ksp;
693
694 if (ksp) {
695 ksp->ks_lock = &ssh->lock;
696 ksp->ks_private = spa;
697 ksp->ks_update = spa_io_history_update;
698 kstat_install(ksp);
699 }
761b8ec6 700 strfree(name);
330847ff
MA
701}
702
703static void
704spa_io_history_destroy(spa_t *spa)
705{
706 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
707
708 if (ssh->kstat)
709 kstat_delete(ssh->kstat);
710
711 mutex_destroy(&ssh->lock);
712}
713
379ca9cf
OF
714/*
715 * ==========================================================================
716 * SPA MMP History Routines
717 * ==========================================================================
718 */
719
720/*
721 * MMP statistics - Information exported regarding each MMP update
722 */
723
724typedef struct spa_mmp_history {
7088545d 725 uint64_t mmp_kstat_id; /* unique # for updates */
379ca9cf
OF
726 uint64_t txg; /* txg of last sync */
727 uint64_t timestamp; /* UTC time of of last sync */
728 uint64_t mmp_delay; /* nanosec since last MMP write */
729 uint64_t vdev_guid; /* unique ID of leaf vdev */
730 char *vdev_path;
731 uint64_t vdev_label; /* vdev label */
7088545d
OF
732 int io_error; /* error status of MMP write */
733 hrtime_t duration; /* time from submission to completion */
379ca9cf
OF
734 list_node_t smh_link;
735} spa_mmp_history_t;
736
737static int
738spa_mmp_history_headers(char *buf, size_t size)
739{
7088545d
OF
740 (void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
741 "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
742 "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
379ca9cf
OF
743 return (0);
744}
745
746static int
747spa_mmp_history_data(char *buf, size_t size, void *data)
748{
749 spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
750
7088545d
OF
751 (void) snprintf(buf, size, "%-10llu %-10llu %-10llu %-6lld %-10lld "
752 "%-12llu %-24llu %-10llu %s\n",
753 (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg,
754 (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
755 (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
756 (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
379ca9cf
OF
757 (smh->vdev_path ? smh->vdev_path : "-"));
758
759 return (0);
760}
761
762/*
763 * Calculate the address for the next spa_stats_history_t entry. The
764 * ssh->lock will be held until ksp->ks_ndata entries are processed.
765 */
766static void *
767spa_mmp_history_addr(kstat_t *ksp, loff_t n)
768{
769 spa_t *spa = ksp->ks_private;
770 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
771
772 ASSERT(MUTEX_HELD(&ssh->lock));
773
774 if (n == 0)
775 ssh->private = list_tail(&ssh->list);
776 else if (ssh->private)
777 ssh->private = list_prev(&ssh->list, ssh->private);
778
779 return (ssh->private);
780}
781
782/*
783 * When the kstat is written discard all spa_mmp_history_t entries. The
784 * ssh->lock will be held until ksp->ks_ndata entries are processed.
785 */
786static int
787spa_mmp_history_update(kstat_t *ksp, int rw)
788{
789 spa_t *spa = ksp->ks_private;
790 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
791
792 ASSERT(MUTEX_HELD(&ssh->lock));
793
794 if (rw == KSTAT_WRITE) {
795 spa_mmp_history_t *smh;
796
797 while ((smh = list_remove_head(&ssh->list))) {
798 ssh->size--;
799 if (smh->vdev_path)
800 strfree(smh->vdev_path);
801 kmem_free(smh, sizeof (spa_mmp_history_t));
802 }
803
804 ASSERT3U(ssh->size, ==, 0);
805 }
806
807 ksp->ks_ndata = ssh->size;
808 ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
809
810 return (0);
811}
812
813static void
814spa_mmp_history_init(spa_t *spa)
815{
816 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
761b8ec6 817 char *name;
379ca9cf
OF
818 kstat_t *ksp;
819
820 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
821 list_create(&ssh->list, sizeof (spa_mmp_history_t),
822 offsetof(spa_mmp_history_t, smh_link));
823
824 ssh->count = 0;
825 ssh->size = 0;
826 ssh->private = NULL;
827
761b8ec6 828 name = kmem_asprintf("zfs/%s", spa_name(spa));
379ca9cf
OF
829
830 ksp = kstat_create(name, 0, "multihost", "misc",
831 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
832 ssh->kstat = ksp;
833
834 if (ksp) {
835 ksp->ks_lock = &ssh->lock;
836 ksp->ks_data = NULL;
837 ksp->ks_private = spa;
838 ksp->ks_update = spa_mmp_history_update;
839 kstat_set_raw_ops(ksp, spa_mmp_history_headers,
840 spa_mmp_history_data, spa_mmp_history_addr);
841 kstat_install(ksp);
842 }
761b8ec6 843 strfree(name);
379ca9cf
OF
844}
845
846static void
847spa_mmp_history_destroy(spa_t *spa)
848{
849 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
850 spa_mmp_history_t *smh;
851 kstat_t *ksp;
852
853 ksp = ssh->kstat;
854 if (ksp)
855 kstat_delete(ksp);
856
857 mutex_enter(&ssh->lock);
858 while ((smh = list_remove_head(&ssh->list))) {
859 ssh->size--;
860 if (smh->vdev_path)
861 strfree(smh->vdev_path);
862 kmem_free(smh, sizeof (spa_mmp_history_t));
863 }
864
865 ASSERT3U(ssh->size, ==, 0);
866 list_destroy(&ssh->list);
867 mutex_exit(&ssh->lock);
868
869 mutex_destroy(&ssh->lock);
870}
871
872/*
7088545d
OF
873 * Set MMP write duration and error status in existing record.
874 */
875int
876spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
877 hrtime_t duration)
878{
879 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
880 spa_mmp_history_t *smh;
881 int error = ENOENT;
882
883 if (zfs_multihost_history == 0 && ssh->size == 0)
884 return (0);
885
886 mutex_enter(&ssh->lock);
887 for (smh = list_head(&ssh->list); smh != NULL;
888 smh = list_next(&ssh->list, smh)) {
889 if (smh->mmp_kstat_id == mmp_kstat_id) {
890 smh->io_error = io_error;
891 smh->duration = duration;
892 error = 0;
893 break;
894 }
895 }
896 mutex_exit(&ssh->lock);
897
898 return (error);
899}
900
901/*
902 * Add a new MMP write to historical record.
379ca9cf
OF
903 */
904void
905spa_mmp_history_add(uint64_t txg, uint64_t timestamp, uint64_t mmp_delay,
7088545d 906 vdev_t *vd, int label, uint64_t mmp_kstat_id)
379ca9cf
OF
907{
908 spa_t *spa = vd->vdev_spa;
909 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
910 spa_mmp_history_t *smh, *rm;
911
912 if (zfs_multihost_history == 0 && ssh->size == 0)
913 return;
914
915 smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
916 smh->txg = txg;
917 smh->timestamp = timestamp;
918 smh->mmp_delay = mmp_delay;
919 smh->vdev_guid = vd->vdev_guid;
920 if (vd->vdev_path)
921 smh->vdev_path = strdup(vd->vdev_path);
922 smh->vdev_label = label;
7088545d 923 smh->mmp_kstat_id = mmp_kstat_id;
379ca9cf
OF
924
925 mutex_enter(&ssh->lock);
926
927 list_insert_head(&ssh->list, smh);
928 ssh->size++;
929
930 while (ssh->size > zfs_multihost_history) {
931 ssh->size--;
932 rm = list_remove_tail(&ssh->list);
933 if (rm->vdev_path)
934 strfree(rm->vdev_path);
935 kmem_free(rm, sizeof (spa_mmp_history_t));
936 }
937
938 mutex_exit(&ssh->lock);
939}
940
1421c891
PS
941void
942spa_stats_init(spa_t *spa)
943{
944 spa_read_history_init(spa);
0b1401ee 945 spa_txg_history_init(spa);
2d37239a 946 spa_tx_assign_init(spa);
330847ff 947 spa_io_history_init(spa);
379ca9cf 948 spa_mmp_history_init(spa);
1421c891
PS
949}
950
951void
952spa_stats_destroy(spa_t *spa)
953{
2d37239a 954 spa_tx_assign_destroy(spa);
0b1401ee 955 spa_txg_history_destroy(spa);
1421c891 956 spa_read_history_destroy(spa);
330847ff 957 spa_io_history_destroy(spa);
379ca9cf 958 spa_mmp_history_destroy(spa);
1421c891
PS
959}
960
961#if defined(_KERNEL) && defined(HAVE_SPL)
379ca9cf 962/* CSTYLED */
1421c891 963module_param(zfs_read_history, int, 0644);
379ca9cf
OF
964MODULE_PARM_DESC(zfs_read_history,
965 "Historical statistics for the last N reads");
1421c891
PS
966
967module_param(zfs_read_history_hits, int, 0644);
379ca9cf
OF
968MODULE_PARM_DESC(zfs_read_history_hits,
969 "Include cache hits in read history");
0b1401ee
BB
970
971module_param(zfs_txg_history, int, 0644);
379ca9cf
OF
972MODULE_PARM_DESC(zfs_txg_history,
973 "Historical statistics for the last N txgs");
974
975module_param(zfs_multihost_history, int, 0644);
976MODULE_PARM_DESC(zfs_multihost_history,
977 "Historical statistics for last N multihost writes");
978/* END CSTYLED */
1421c891 979#endif