]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/spa_stats.c
Fix zio->io_priority failed (7 < 6) assert
[mirror_zfs.git] / module / zfs / spa_stats.c
CommitLineData
1421c891
PS
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22#include <sys/zfs_context.h>
23#include <sys/spa_impl.h>
379ca9cf 24#include <sys/vdev_impl.h>
1421c891
PS
25
26/*
27 * Keeps stats on last N reads per spa_t, disabled by default.
28 */
29int zfs_read_history = 0;
30
31/*
32 * Include cache hits in history, disabled by default.
33 */
34int zfs_read_history_hits = 0;
35
0b1401ee 36/*
01ff0d75 37 * Keeps stats on the last 100 txgs by default.
0b1401ee 38 */
01ff0d75 39int zfs_txg_history = 100;
0b1401ee 40
379ca9cf
OF
41/*
42 * Keeps stats on the last N MMP updates, disabled by default.
43 */
44int zfs_multihost_history = 0;
45
1421c891
PS
46/*
47 * ==========================================================================
48 * SPA Read History Routines
49 * ==========================================================================
50 */
51
52/*
53 * Read statistics - Information exported regarding each arc_read call
54 */
55typedef struct spa_read_history {
56 uint64_t uid; /* unique identifier */
57 hrtime_t start; /* time read completed */
58 uint64_t objset; /* read from this objset */
59 uint64_t object; /* read of this object number */
60 uint64_t level; /* block's indirection level */
61 uint64_t blkid; /* read of this block id */
62 char origin[24]; /* read originated from here */
63 uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
64 pid_t pid; /* PID of task doing read */
65 char comm[16]; /* process name of task doing read */
66 list_node_t srh_link;
67} spa_read_history_t;
68
69static int
70spa_read_history_headers(char *buf, size_t size)
71{
7b2d78a0 72 (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
1421c891
PS
73 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
74 "level", "blkid", "aflags", "origin", "pid", "process");
1421c891
PS
75
76 return (0);
77}
78
79static int
80spa_read_history_data(char *buf, size_t size, void *data)
81{
82 spa_read_history_t *srh = (spa_read_history_t *)data;
83
7b2d78a0 84 (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
1421c891
PS
85 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
86 (u_longlong_t)srh->uid, srh->start,
87 (longlong_t)srh->objset, (longlong_t)srh->object,
88 (longlong_t)srh->level, (longlong_t)srh->blkid,
89 srh->aflags, srh->origin, srh->pid, srh->comm);
1421c891
PS
90
91 return (0);
92}
93
94/*
95 * Calculate the address for the next spa_stats_history_t entry. The
96 * ssh->lock will be held until ksp->ks_ndata entries are processed.
97 */
98static void *
99spa_read_history_addr(kstat_t *ksp, loff_t n)
100{
101 spa_t *spa = ksp->ks_private;
102 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
103
104 ASSERT(MUTEX_HELD(&ssh->lock));
105
106 if (n == 0)
107 ssh->private = list_tail(&ssh->list);
108 else if (ssh->private)
109 ssh->private = list_prev(&ssh->list, ssh->private);
110
111 return (ssh->private);
112}
113
114/*
4e33ba4c 115 * When the kstat is written discard all spa_read_history_t entries. The
1421c891
PS
116 * ssh->lock will be held until ksp->ks_ndata entries are processed.
117 */
118static int
119spa_read_history_update(kstat_t *ksp, int rw)
120{
121 spa_t *spa = ksp->ks_private;
122 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
123
124 if (rw == KSTAT_WRITE) {
125 spa_read_history_t *srh;
126
127 while ((srh = list_remove_head(&ssh->list))) {
128 ssh->size--;
d1d7e268 129 kmem_free(srh, sizeof (spa_read_history_t));
1421c891
PS
130 }
131
132 ASSERT3U(ssh->size, ==, 0);
133 }
134
135 ksp->ks_ndata = ssh->size;
d1d7e268 136 ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
1421c891
PS
137
138 return (0);
139}
140
141static void
142spa_read_history_init(spa_t *spa)
143{
144 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
761b8ec6 145 char *name;
1421c891
PS
146 kstat_t *ksp;
147
148 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
149 list_create(&ssh->list, sizeof (spa_read_history_t),
150 offsetof(spa_read_history_t, srh_link));
151
152 ssh->count = 0;
153 ssh->size = 0;
154 ssh->private = NULL;
155
761b8ec6 156 name = kmem_asprintf("zfs/%s", spa_name(spa));
1421c891
PS
157
158 ksp = kstat_create(name, 0, "reads", "misc",
159 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
160 ssh->kstat = ksp;
161
162 if (ksp) {
163 ksp->ks_lock = &ssh->lock;
164 ksp->ks_data = NULL;
165 ksp->ks_private = spa;
166 ksp->ks_update = spa_read_history_update;
167 kstat_set_raw_ops(ksp, spa_read_history_headers,
168 spa_read_history_data, spa_read_history_addr);
169 kstat_install(ksp);
170 }
761b8ec6 171 strfree(name);
1421c891
PS
172}
173
174static void
175spa_read_history_destroy(spa_t *spa)
176{
177 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
178 spa_read_history_t *srh;
179 kstat_t *ksp;
180
181 ksp = ssh->kstat;
182 if (ksp)
183 kstat_delete(ksp);
184
185 mutex_enter(&ssh->lock);
186 while ((srh = list_remove_head(&ssh->list))) {
187 ssh->size--;
d1d7e268 188 kmem_free(srh, sizeof (spa_read_history_t));
1421c891
PS
189 }
190
191 ASSERT3U(ssh->size, ==, 0);
192 list_destroy(&ssh->list);
193 mutex_exit(&ssh->lock);
194
195 mutex_destroy(&ssh->lock);
196}
197
198void
5dbd68a3 199spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
1421c891
PS
200{
201 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
202 spa_read_history_t *srh, *rm;
203
204 ASSERT3P(spa, !=, NULL);
205 ASSERT3P(zb, !=, NULL);
206
207 if (zfs_read_history == 0 && ssh->size == 0)
208 return;
209
2a432414 210 if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
1421c891
PS
211 return;
212
79c76d5b 213 srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
d1d7e268 214 strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
1421c891
PS
215 srh->start = gethrtime();
216 srh->objset = zb->zb_objset;
217 srh->object = zb->zb_object;
218 srh->level = zb->zb_level;
219 srh->blkid = zb->zb_blkid;
220 srh->aflags = aflags;
221 srh->pid = getpid();
222
223 mutex_enter(&ssh->lock);
224
225 srh->uid = ssh->count++;
226 list_insert_head(&ssh->list, srh);
227 ssh->size++;
228
229 while (ssh->size > zfs_read_history) {
230 ssh->size--;
231 rm = list_remove_tail(&ssh->list);
d1d7e268 232 kmem_free(rm, sizeof (spa_read_history_t));
1421c891
PS
233 }
234
235 mutex_exit(&ssh->lock);
236}
237
0b1401ee
BB
238/*
239 * ==========================================================================
240 * SPA TXG History Routines
241 * ==========================================================================
242 */
243
244/*
245 * Txg statistics - Information exported regarding each txg sync
246 */
247
248typedef struct spa_txg_history {
249 uint64_t txg; /* txg id */
250 txg_state_t state; /* active txg state */
251 uint64_t nread; /* number of bytes read */
252 uint64_t nwritten; /* number of bytes written */
253 uint64_t reads; /* number of read operations */
254 uint64_t writes; /* number of write operations */
3ccab252 255 uint64_t ndirty; /* number of dirty bytes */
0b1401ee
BB
256 hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
257 list_node_t sth_link;
258} spa_txg_history_t;
259
260static int
261spa_txg_history_headers(char *buf, size_t size)
262{
7b2d78a0 263 (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
478d64fd 264 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
3ccab252 265 "ndirty", "nread", "nwritten", "reads", "writes",
478d64fd 266 "otime", "qtime", "wtime", "stime");
0b1401ee
BB
267
268 return (0);
269}
270
271static int
272spa_txg_history_data(char *buf, size_t size, void *data)
273{
274 spa_txg_history_t *sth = (spa_txg_history_t *)data;
478d64fd 275 uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
0b1401ee
BB
276 char state;
277
278 switch (sth->state) {
279 case TXG_STATE_BIRTH: state = 'B'; break;
280 case TXG_STATE_OPEN: state = 'O'; break;
281 case TXG_STATE_QUIESCED: state = 'Q'; break;
478d64fd 282 case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break;
0b1401ee
BB
283 case TXG_STATE_SYNCED: state = 'S'; break;
284 case TXG_STATE_COMMITTED: state = 'C'; break;
285 default: state = '?'; break;
286 }
287
288 if (sth->times[TXG_STATE_OPEN])
289 open = sth->times[TXG_STATE_OPEN] -
290 sth->times[TXG_STATE_BIRTH];
291
292 if (sth->times[TXG_STATE_QUIESCED])
293 quiesce = sth->times[TXG_STATE_QUIESCED] -
294 sth->times[TXG_STATE_OPEN];
295
478d64fd
IL
296 if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
297 wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
298 sth->times[TXG_STATE_QUIESCED];
299
0b1401ee
BB
300 if (sth->times[TXG_STATE_SYNCED])
301 sync = sth->times[TXG_STATE_SYNCED] -
478d64fd 302 sth->times[TXG_STATE_WAIT_FOR_SYNC];
0b1401ee 303
7b2d78a0 304 (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
478d64fd 305 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
0b1401ee 306 (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
3ccab252 307 (u_longlong_t)sth->ndirty,
0b1401ee
BB
308 (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
309 (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
478d64fd
IL
310 (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
311 (u_longlong_t)sync);
0b1401ee
BB
312
313 return (0);
314}
315
316/*
317 * Calculate the address for the next spa_stats_history_t entry. The
318 * ssh->lock will be held until ksp->ks_ndata entries are processed.
319 */
320static void *
321spa_txg_history_addr(kstat_t *ksp, loff_t n)
322{
323 spa_t *spa = ksp->ks_private;
324 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
325
326 ASSERT(MUTEX_HELD(&ssh->lock));
327
328 if (n == 0)
329 ssh->private = list_tail(&ssh->list);
330 else if (ssh->private)
331 ssh->private = list_prev(&ssh->list, ssh->private);
332
333 return (ssh->private);
334}
335
336/*
4e33ba4c 337 * When the kstat is written discard all spa_txg_history_t entries. The
0b1401ee
BB
338 * ssh->lock will be held until ksp->ks_ndata entries are processed.
339 */
340static int
341spa_txg_history_update(kstat_t *ksp, int rw)
342{
343 spa_t *spa = ksp->ks_private;
344 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
345
346 ASSERT(MUTEX_HELD(&ssh->lock));
347
348 if (rw == KSTAT_WRITE) {
349 spa_txg_history_t *sth;
350
351 while ((sth = list_remove_head(&ssh->list))) {
352 ssh->size--;
d1d7e268 353 kmem_free(sth, sizeof (spa_txg_history_t));
0b1401ee
BB
354 }
355
356 ASSERT3U(ssh->size, ==, 0);
357 }
358
359 ksp->ks_ndata = ssh->size;
d1d7e268 360 ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
0b1401ee
BB
361
362 return (0);
363}
364
365static void
366spa_txg_history_init(spa_t *spa)
367{
368 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
761b8ec6 369 char *name;
0b1401ee
BB
370 kstat_t *ksp;
371
372 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
373 list_create(&ssh->list, sizeof (spa_txg_history_t),
374 offsetof(spa_txg_history_t, sth_link));
375
376 ssh->count = 0;
377 ssh->size = 0;
378 ssh->private = NULL;
379
761b8ec6 380 name = kmem_asprintf("zfs/%s", spa_name(spa));
0b1401ee
BB
381
382 ksp = kstat_create(name, 0, "txgs", "misc",
383 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
384 ssh->kstat = ksp;
385
386 if (ksp) {
387 ksp->ks_lock = &ssh->lock;
388 ksp->ks_data = NULL;
389 ksp->ks_private = spa;
390 ksp->ks_update = spa_txg_history_update;
391 kstat_set_raw_ops(ksp, spa_txg_history_headers,
392 spa_txg_history_data, spa_txg_history_addr);
393 kstat_install(ksp);
394 }
761b8ec6 395 strfree(name);
0b1401ee
BB
396}
397
398static void
399spa_txg_history_destroy(spa_t *spa)
400{
401 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
402 spa_txg_history_t *sth;
403 kstat_t *ksp;
404
405 ksp = ssh->kstat;
406 if (ksp)
407 kstat_delete(ksp);
408
409 mutex_enter(&ssh->lock);
410 while ((sth = list_remove_head(&ssh->list))) {
411 ssh->size--;
d1d7e268 412 kmem_free(sth, sizeof (spa_txg_history_t));
0b1401ee
BB
413 }
414
415 ASSERT3U(ssh->size, ==, 0);
416 list_destroy(&ssh->list);
417 mutex_exit(&ssh->lock);
418
419 mutex_destroy(&ssh->lock);
420}
421
422/*
423 * Add a new txg to historical record.
424 */
425void
01b738f4 426spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
0b1401ee
BB
427{
428 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
429 spa_txg_history_t *sth, *rm;
430
431 if (zfs_txg_history == 0 && ssh->size == 0)
432 return;
433
79c76d5b 434 sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
0b1401ee
BB
435 sth->txg = txg;
436 sth->state = TXG_STATE_OPEN;
01b738f4 437 sth->times[TXG_STATE_BIRTH] = birth_time;
0b1401ee
BB
438
439 mutex_enter(&ssh->lock);
440
441 list_insert_head(&ssh->list, sth);
442 ssh->size++;
443
444 while (ssh->size > zfs_txg_history) {
445 ssh->size--;
446 rm = list_remove_tail(&ssh->list);
d1d7e268 447 kmem_free(rm, sizeof (spa_txg_history_t));
0b1401ee
BB
448 }
449
450 mutex_exit(&ssh->lock);
451}
452
453/*
454 * Set txg state completion time and increment current state.
455 */
456int
457spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
458 hrtime_t completed_time)
459{
460 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
461 spa_txg_history_t *sth;
462 int error = ENOENT;
463
464 if (zfs_txg_history == 0)
465 return (0);
466
467 mutex_enter(&ssh->lock);
468 for (sth = list_head(&ssh->list); sth != NULL;
d1d7e268 469 sth = list_next(&ssh->list, sth)) {
0b1401ee
BB
470 if (sth->txg == txg) {
471 sth->times[completed_state] = completed_time;
472 sth->state++;
473 error = 0;
474 break;
475 }
476 }
477 mutex_exit(&ssh->lock);
478
479 return (error);
480}
481
482/*
483 * Set txg IO stats.
484 */
baf67d15 485static int
0b1401ee 486spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
3ccab252 487 uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
0b1401ee
BB
488{
489 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
490 spa_txg_history_t *sth;
491 int error = ENOENT;
492
493 if (zfs_txg_history == 0)
494 return (0);
495
496 mutex_enter(&ssh->lock);
497 for (sth = list_head(&ssh->list); sth != NULL;
d1d7e268 498 sth = list_next(&ssh->list, sth)) {
0b1401ee
BB
499 if (sth->txg == txg) {
500 sth->nread = nread;
501 sth->nwritten = nwritten;
502 sth->reads = reads;
503 sth->writes = writes;
3ccab252 504 sth->ndirty = ndirty;
0b1401ee
BB
505 error = 0;
506 break;
507 }
508 }
509 mutex_exit(&ssh->lock);
510
511 return (error);
512}
513
baf67d15
BB
514txg_stat_t *
515spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
516{
517 txg_stat_t *ts;
518
519 if (zfs_txg_history == 0)
520 return (NULL);
521
522 ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
523
524 spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
525 vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
526 spa_config_exit(spa, SCL_ALL, FTAG);
527
528 ts->txg = txg;
529 ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
530
531 spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
532
533 return (ts);
534}
535
536void
537spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
538{
539 if (ts == NULL)
540 return;
541
542 if (zfs_txg_history == 0) {
543 kmem_free(ts, sizeof (txg_stat_t));
544 return;
545 }
546
547 spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
548 vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
549 spa_config_exit(spa, SCL_ALL, FTAG);
550
551 spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
552 spa_txg_history_set_io(spa, ts->txg,
553 ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
554 ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
555 ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
556 ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
557 ts->ndirty);
558
559 kmem_free(ts, sizeof (txg_stat_t));
560}
561
2d37239a
BB
562/*
563 * ==========================================================================
564 * SPA TX Assign Histogram Routines
565 * ==========================================================================
566 */
567
568/*
569 * Tx statistics - Information exported regarding dmu_tx_assign time.
570 */
571
572/*
573 * When the kstat is written zero all buckets. When the kstat is read
574 * count the number of trailing buckets set to zero and update ks_ndata
575 * such that they are not output.
576 */
577static int
578spa_tx_assign_update(kstat_t *ksp, int rw)
579{
580 spa_t *spa = ksp->ks_private;
581 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
582 int i;
583
584 if (rw == KSTAT_WRITE) {
585 for (i = 0; i < ssh->count; i++)
586 ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
587 }
588
589 for (i = ssh->count; i > 0; i--)
590 if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
591 break;
592
593 ksp->ks_ndata = i;
d1d7e268 594 ksp->ks_data_size = i * sizeof (kstat_named_t);
2d37239a
BB
595
596 return (0);
597}
598
599static void
600spa_tx_assign_init(spa_t *spa)
601{
602 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
761b8ec6 603 char *name;
2d37239a
BB
604 kstat_named_t *ks;
605 kstat_t *ksp;
606 int i;
607
608 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
609
610 ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
d1d7e268 611 ssh->size = ssh->count * sizeof (kstat_named_t);
2d37239a
BB
612 ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
613
761b8ec6 614 name = kmem_asprintf("zfs/%s", spa_name(spa));
2d37239a
BB
615
616 for (i = 0; i < ssh->count; i++) {
617 ks = &((kstat_named_t *)ssh->private)[i];
618 ks->data_type = KSTAT_DATA_UINT64;
619 ks->value.ui64 = 0;
620 (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
621 (u_longlong_t)1 << i);
622 }
623
624 ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
625 KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
626 ssh->kstat = ksp;
627
628 if (ksp) {
629 ksp->ks_lock = &ssh->lock;
630 ksp->ks_data = ssh->private;
631 ksp->ks_ndata = ssh->count;
632 ksp->ks_data_size = ssh->size;
633 ksp->ks_private = spa;
634 ksp->ks_update = spa_tx_assign_update;
635 kstat_install(ksp);
636 }
761b8ec6 637 strfree(name);
2d37239a
BB
638}
639
640static void
641spa_tx_assign_destroy(spa_t *spa)
642{
643 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
644 kstat_t *ksp;
645
646 ksp = ssh->kstat;
647 if (ksp)
648 kstat_delete(ksp);
649
650 kmem_free(ssh->private, ssh->size);
651 mutex_destroy(&ssh->lock);
652}
653
654void
655spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
656{
657 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
658 uint64_t idx = 0;
659
4ca9c1de 660 while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
2d37239a
BB
661 idx++;
662
663 atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
664}
665
330847ff
MA
666/*
667 * ==========================================================================
668 * SPA IO History Routines
669 * ==========================================================================
670 */
671static int
672spa_io_history_update(kstat_t *ksp, int rw)
673{
674 if (rw == KSTAT_WRITE)
675 memset(ksp->ks_data, 0, ksp->ks_data_size);
676
677 return (0);
678}
679
680static void
681spa_io_history_init(spa_t *spa)
682{
683 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
761b8ec6 684 char *name;
330847ff
MA
685 kstat_t *ksp;
686
687 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
688
761b8ec6 689 name = kmem_asprintf("zfs/%s", spa_name(spa));
330847ff
MA
690
691 ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
692 ssh->kstat = ksp;
693
694 if (ksp) {
695 ksp->ks_lock = &ssh->lock;
696 ksp->ks_private = spa;
697 ksp->ks_update = spa_io_history_update;
698 kstat_install(ksp);
699 }
761b8ec6 700 strfree(name);
330847ff
MA
701}
702
703static void
704spa_io_history_destroy(spa_t *spa)
705{
706 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
707
708 if (ssh->kstat)
709 kstat_delete(ssh->kstat);
710
711 mutex_destroy(&ssh->lock);
712}
713
379ca9cf
OF
714/*
715 * ==========================================================================
716 * SPA MMP History Routines
717 * ==========================================================================
718 */
719
720/*
d2160d05
OF
721 * MMP statistics - Information exported regarding attempted MMP writes
722 * For MMP writes issued, fields used as per comments below.
723 * For MMP writes skipped, an entry represents a span of time when
724 * writes were skipped for same reason (error from mmp_random_leaf).
725 * Differences are:
726 * timestamp time first write skipped, if >1 skipped in a row
727 * mmp_delay delay value at timestamp
728 * vdev_guid number of writes skipped
729 * io_error one of enum mmp_error
730 * duration time span (ns) of skipped writes
379ca9cf
OF
731 */
732
733typedef struct spa_mmp_history {
7088545d 734 uint64_t mmp_kstat_id; /* unique # for updates */
379ca9cf 735 uint64_t txg; /* txg of last sync */
d2160d05
OF
736 uint64_t timestamp; /* UTC time MMP write issued */
737 uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */
379ca9cf
OF
738 uint64_t vdev_guid; /* unique ID of leaf vdev */
739 char *vdev_path;
d2160d05 740 int vdev_label; /* vdev label */
7088545d 741 int io_error; /* error status of MMP write */
d2160d05 742 hrtime_t error_start; /* hrtime of start of error period */
7088545d 743 hrtime_t duration; /* time from submission to completion */
379ca9cf
OF
744 list_node_t smh_link;
745} spa_mmp_history_t;
746
747static int
748spa_mmp_history_headers(char *buf, size_t size)
749{
7088545d
OF
750 (void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
751 "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
752 "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
379ca9cf
OF
753 return (0);
754}
755
756static int
757spa_mmp_history_data(char *buf, size_t size, void *data)
758{
759 spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
d2160d05
OF
760 char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
761 "%-10lld %s\n";
762 char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
763 "%-10lld %s\n";
379ca9cf 764
d2160d05 765 (void) snprintf(buf, size, (smh->error_start ? skip_fmt : write_fmt),
7088545d
OF
766 (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg,
767 (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
768 (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
769 (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
379ca9cf
OF
770 (smh->vdev_path ? smh->vdev_path : "-"));
771
772 return (0);
773}
774
775/*
776 * Calculate the address for the next spa_stats_history_t entry. The
777 * ssh->lock will be held until ksp->ks_ndata entries are processed.
778 */
779static void *
780spa_mmp_history_addr(kstat_t *ksp, loff_t n)
781{
782 spa_t *spa = ksp->ks_private;
783 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
784
785 ASSERT(MUTEX_HELD(&ssh->lock));
786
787 if (n == 0)
788 ssh->private = list_tail(&ssh->list);
789 else if (ssh->private)
790 ssh->private = list_prev(&ssh->list, ssh->private);
791
792 return (ssh->private);
793}
794
795/*
796 * When the kstat is written discard all spa_mmp_history_t entries. The
797 * ssh->lock will be held until ksp->ks_ndata entries are processed.
798 */
799static int
800spa_mmp_history_update(kstat_t *ksp, int rw)
801{
802 spa_t *spa = ksp->ks_private;
803 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
804
805 ASSERT(MUTEX_HELD(&ssh->lock));
806
807 if (rw == KSTAT_WRITE) {
808 spa_mmp_history_t *smh;
809
810 while ((smh = list_remove_head(&ssh->list))) {
811 ssh->size--;
812 if (smh->vdev_path)
813 strfree(smh->vdev_path);
814 kmem_free(smh, sizeof (spa_mmp_history_t));
815 }
816
817 ASSERT3U(ssh->size, ==, 0);
818 }
819
820 ksp->ks_ndata = ssh->size;
821 ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
822
823 return (0);
824}
825
826static void
827spa_mmp_history_init(spa_t *spa)
828{
829 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
761b8ec6 830 char *name;
379ca9cf
OF
831 kstat_t *ksp;
832
833 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
834 list_create(&ssh->list, sizeof (spa_mmp_history_t),
835 offsetof(spa_mmp_history_t, smh_link));
836
837 ssh->count = 0;
838 ssh->size = 0;
839 ssh->private = NULL;
840
761b8ec6 841 name = kmem_asprintf("zfs/%s", spa_name(spa));
379ca9cf
OF
842
843 ksp = kstat_create(name, 0, "multihost", "misc",
844 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
845 ssh->kstat = ksp;
846
847 if (ksp) {
848 ksp->ks_lock = &ssh->lock;
849 ksp->ks_data = NULL;
850 ksp->ks_private = spa;
851 ksp->ks_update = spa_mmp_history_update;
852 kstat_set_raw_ops(ksp, spa_mmp_history_headers,
853 spa_mmp_history_data, spa_mmp_history_addr);
854 kstat_install(ksp);
855 }
761b8ec6 856 strfree(name);
379ca9cf
OF
857}
858
859static void
860spa_mmp_history_destroy(spa_t *spa)
861{
862 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
863 spa_mmp_history_t *smh;
864 kstat_t *ksp;
865
866 ksp = ssh->kstat;
867 if (ksp)
868 kstat_delete(ksp);
869
870 mutex_enter(&ssh->lock);
871 while ((smh = list_remove_head(&ssh->list))) {
872 ssh->size--;
873 if (smh->vdev_path)
874 strfree(smh->vdev_path);
875 kmem_free(smh, sizeof (spa_mmp_history_t));
876 }
877
878 ASSERT3U(ssh->size, ==, 0);
879 list_destroy(&ssh->list);
880 mutex_exit(&ssh->lock);
881
882 mutex_destroy(&ssh->lock);
883}
884
d2160d05
OF
885/*
886 * Set duration in existing "skip" record to how long we have waited for a leaf
887 * vdev to become available.
888 *
889 * Important that we start search at the head of the list where new
890 * records are inserted, so this is normally an O(1) operation.
891 */
892int
893spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
894{
895 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
896 spa_mmp_history_t *smh;
897 int error = ENOENT;
898
899 if (zfs_multihost_history == 0 && ssh->size == 0)
900 return (0);
901
902 mutex_enter(&ssh->lock);
903 for (smh = list_head(&ssh->list); smh != NULL;
904 smh = list_next(&ssh->list, smh)) {
905 if (smh->mmp_kstat_id == mmp_kstat_id) {
906 ASSERT3U(smh->io_error, !=, 0);
907 smh->duration = gethrtime() - smh->error_start;
908 smh->vdev_guid++;
909 error = 0;
910 break;
911 }
912 }
913 mutex_exit(&ssh->lock);
914
915 return (error);
916}
917
379ca9cf 918/*
7088545d 919 * Set MMP write duration and error status in existing record.
d2160d05 920 * See comment re: search order above spa_mmp_history_set_skip().
7088545d
OF
921 */
922int
923spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
924 hrtime_t duration)
925{
926 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
927 spa_mmp_history_t *smh;
928 int error = ENOENT;
929
930 if (zfs_multihost_history == 0 && ssh->size == 0)
931 return (0);
932
933 mutex_enter(&ssh->lock);
934 for (smh = list_head(&ssh->list); smh != NULL;
935 smh = list_next(&ssh->list, smh)) {
936 if (smh->mmp_kstat_id == mmp_kstat_id) {
d2160d05 937 ASSERT(smh->io_error == 0);
7088545d
OF
938 smh->io_error = io_error;
939 smh->duration = duration;
940 error = 0;
941 break;
942 }
943 }
944 mutex_exit(&ssh->lock);
945
946 return (error);
947}
948
949/*
d2160d05
OF
950 * Add a new MMP historical record.
951 * error == 0 : a write was issued.
952 * error != 0 : a write was not issued because no leaves were found.
379ca9cf 953 */
d2160d05
OF
954void *
955spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
956 uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
957 int error)
379ca9cf 958{
379ca9cf
OF
959 spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
960 spa_mmp_history_t *smh, *rm;
961
962 if (zfs_multihost_history == 0 && ssh->size == 0)
d2160d05 963 return (NULL);
379ca9cf
OF
964
965 smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
966 smh->txg = txg;
967 smh->timestamp = timestamp;
968 smh->mmp_delay = mmp_delay;
d2160d05
OF
969 if (vd) {
970 smh->vdev_guid = vd->vdev_guid;
971 if (vd->vdev_path)
972 smh->vdev_path = strdup(vd->vdev_path);
973 }
379ca9cf 974 smh->vdev_label = label;
7088545d 975 smh->mmp_kstat_id = mmp_kstat_id;
379ca9cf 976
d2160d05
OF
977 if (error) {
978 smh->io_error = error;
979 smh->error_start = gethrtime();
980 smh->vdev_guid = 1;
981 }
982
379ca9cf
OF
983 mutex_enter(&ssh->lock);
984
985 list_insert_head(&ssh->list, smh);
986 ssh->size++;
987
988 while (ssh->size > zfs_multihost_history) {
989 ssh->size--;
990 rm = list_remove_tail(&ssh->list);
991 if (rm->vdev_path)
992 strfree(rm->vdev_path);
993 kmem_free(rm, sizeof (spa_mmp_history_t));
994 }
995
996 mutex_exit(&ssh->lock);
d2160d05 997 return ((void *)smh);
379ca9cf
OF
998}
999
1421c891
PS
1000void
1001spa_stats_init(spa_t *spa)
1002{
1003 spa_read_history_init(spa);
0b1401ee 1004 spa_txg_history_init(spa);
2d37239a 1005 spa_tx_assign_init(spa);
330847ff 1006 spa_io_history_init(spa);
379ca9cf 1007 spa_mmp_history_init(spa);
1421c891
PS
1008}
1009
1010void
1011spa_stats_destroy(spa_t *spa)
1012{
2d37239a 1013 spa_tx_assign_destroy(spa);
0b1401ee 1014 spa_txg_history_destroy(spa);
1421c891 1015 spa_read_history_destroy(spa);
330847ff 1016 spa_io_history_destroy(spa);
379ca9cf 1017 spa_mmp_history_destroy(spa);
1421c891
PS
1018}
1019
93ce2b4c 1020#if defined(_KERNEL)
379ca9cf 1021/* CSTYLED */
1421c891 1022module_param(zfs_read_history, int, 0644);
379ca9cf
OF
1023MODULE_PARM_DESC(zfs_read_history,
1024 "Historical statistics for the last N reads");
1421c891
PS
1025
1026module_param(zfs_read_history_hits, int, 0644);
379ca9cf
OF
1027MODULE_PARM_DESC(zfs_read_history_hits,
1028 "Include cache hits in read history");
0b1401ee
BB
1029
1030module_param(zfs_txg_history, int, 0644);
379ca9cf
OF
1031MODULE_PARM_DESC(zfs_txg_history,
1032 "Historical statistics for the last N txgs");
1033
1034module_param(zfs_multihost_history, int, 0644);
1035MODULE_PARM_DESC(zfs_multihost_history,
1036 "Historical statistics for last N multihost writes");
1037/* END CSTYLED */
1421c891 1038#endif