]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/spa_stats.c
Merge branch 'issue-2094'
[mirror_zfs.git] / module / zfs / spa_stats.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
24
25 /*
26 * Keeps stats on last N reads per spa_t, disabled by default.
27 */
28 int zfs_read_history = 0;
29
30 /*
31 * Include cache hits in history, disabled by default.
32 */
33 int zfs_read_history_hits = 0;
34
35 /*
36 * Keeps stats on the last N txgs, disabled by default.
37 */
38 int zfs_txg_history = 0;
39
40 /*
41 * ==========================================================================
42 * SPA Read History Routines
43 * ==========================================================================
44 */
45
46 /*
47 * Read statistics - Information exported regarding each arc_read call
48 */
49 typedef struct spa_read_history {
50 uint64_t uid; /* unique identifier */
51 hrtime_t start; /* time read completed */
52 uint64_t objset; /* read from this objset */
53 uint64_t object; /* read of this object number */
54 uint64_t level; /* block's indirection level */
55 uint64_t blkid; /* read of this block id */
56 char origin[24]; /* read originated from here */
57 uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
58 pid_t pid; /* PID of task doing read */
59 char comm[16]; /* process name of task doing read */
60 list_node_t srh_link;
61 } spa_read_history_t;
62
63 static int
64 spa_read_history_headers(char *buf, size_t size)
65 {
66 size = snprintf(buf, size - 1, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
67 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
68 "level", "blkid", "aflags", "origin", "pid", "process");
69 buf[size] = '\0';
70
71 return (0);
72 }
73
74 static int
75 spa_read_history_data(char *buf, size_t size, void *data)
76 {
77 spa_read_history_t *srh = (spa_read_history_t *)data;
78
79 size = snprintf(buf, size - 1, "%-8llu %-16llu 0x%-6llx "
80 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
81 (u_longlong_t)srh->uid, srh->start,
82 (longlong_t)srh->objset, (longlong_t)srh->object,
83 (longlong_t)srh->level, (longlong_t)srh->blkid,
84 srh->aflags, srh->origin, srh->pid, srh->comm);
85 buf[size] = '\0';
86
87 return (0);
88 }
89
90 /*
91 * Calculate the address for the next spa_stats_history_t entry. The
92 * ssh->lock will be held until ksp->ks_ndata entries are processed.
93 */
94 static void *
95 spa_read_history_addr(kstat_t *ksp, loff_t n)
96 {
97 spa_t *spa = ksp->ks_private;
98 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
99
100 ASSERT(MUTEX_HELD(&ssh->lock));
101
102 if (n == 0)
103 ssh->private = list_tail(&ssh->list);
104 else if (ssh->private)
105 ssh->private = list_prev(&ssh->list, ssh->private);
106
107 return (ssh->private);
108 }
109
110 /*
111 * When the kstat is written discard all spa_read_history_t entires. The
112 * ssh->lock will be held until ksp->ks_ndata entries are processed.
113 */
114 static int
115 spa_read_history_update(kstat_t *ksp, int rw)
116 {
117 spa_t *spa = ksp->ks_private;
118 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
119
120 if (rw == KSTAT_WRITE) {
121 spa_read_history_t *srh;
122
123 while ((srh = list_remove_head(&ssh->list))) {
124 ssh->size--;
125 kmem_free(srh, sizeof (spa_read_history_t));
126 }
127
128 ASSERT3U(ssh->size, ==, 0);
129 }
130
131 ksp->ks_ndata = ssh->size;
132 ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
133
134 return (0);
135 }
136
137 static void
138 spa_read_history_init(spa_t *spa)
139 {
140 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
141 char name[KSTAT_STRLEN];
142 kstat_t *ksp;
143
144 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
145 list_create(&ssh->list, sizeof (spa_read_history_t),
146 offsetof(spa_read_history_t, srh_link));
147
148 ssh->count = 0;
149 ssh->size = 0;
150 ssh->private = NULL;
151
152 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
153 name[KSTAT_STRLEN-1] = '\0';
154
155 ksp = kstat_create(name, 0, "reads", "misc",
156 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
157 ssh->kstat = ksp;
158
159 if (ksp) {
160 ksp->ks_lock = &ssh->lock;
161 ksp->ks_data = NULL;
162 ksp->ks_private = spa;
163 ksp->ks_update = spa_read_history_update;
164 kstat_set_raw_ops(ksp, spa_read_history_headers,
165 spa_read_history_data, spa_read_history_addr);
166 kstat_install(ksp);
167 }
168 }
169
170 static void
171 spa_read_history_destroy(spa_t *spa)
172 {
173 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
174 spa_read_history_t *srh;
175 kstat_t *ksp;
176
177 ksp = ssh->kstat;
178 if (ksp)
179 kstat_delete(ksp);
180
181 mutex_enter(&ssh->lock);
182 while ((srh = list_remove_head(&ssh->list))) {
183 ssh->size--;
184 kmem_free(srh, sizeof (spa_read_history_t));
185 }
186
187 ASSERT3U(ssh->size, ==, 0);
188 list_destroy(&ssh->list);
189 mutex_exit(&ssh->lock);
190
191 mutex_destroy(&ssh->lock);
192 }
193
194 void
195 spa_read_history_add(spa_t *spa, const zbookmark_t *zb, uint32_t aflags)
196 {
197 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
198 spa_read_history_t *srh, *rm;
199
200 ASSERT3P(spa, !=, NULL);
201 ASSERT3P(zb, !=, NULL);
202
203 if (zfs_read_history == 0 && ssh->size == 0)
204 return;
205
206 if (zfs_read_history_hits == 0 && (aflags & ARC_CACHED))
207 return;
208
209 srh = kmem_zalloc(sizeof (spa_read_history_t), KM_PUSHPAGE);
210 strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
211 srh->start = gethrtime();
212 srh->objset = zb->zb_objset;
213 srh->object = zb->zb_object;
214 srh->level = zb->zb_level;
215 srh->blkid = zb->zb_blkid;
216 srh->aflags = aflags;
217 srh->pid = getpid();
218
219 mutex_enter(&ssh->lock);
220
221 srh->uid = ssh->count++;
222 list_insert_head(&ssh->list, srh);
223 ssh->size++;
224
225 while (ssh->size > zfs_read_history) {
226 ssh->size--;
227 rm = list_remove_tail(&ssh->list);
228 kmem_free(rm, sizeof (spa_read_history_t));
229 }
230
231 mutex_exit(&ssh->lock);
232 }
233
234 /*
235 * ==========================================================================
236 * SPA TXG History Routines
237 * ==========================================================================
238 */
239
240 /*
241 * Txg statistics - Information exported regarding each txg sync
242 */
243
244 typedef struct spa_txg_history {
245 uint64_t txg; /* txg id */
246 txg_state_t state; /* active txg state */
247 uint64_t nread; /* number of bytes read */
248 uint64_t nwritten; /* number of bytes written */
249 uint64_t reads; /* number of read operations */
250 uint64_t writes; /* number of write operations */
251 uint64_t nreserved; /* number of bytes reserved */
252 hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
253 list_node_t sth_link;
254 } spa_txg_history_t;
255
256 static int
257 spa_txg_history_headers(char *buf, size_t size)
258 {
259 size = snprintf(buf, size - 1, "%-8s %-16s %-5s %-12s %-12s %-12s "
260 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
261 "nreserved", "nread", "nwritten", "reads", "writes",
262 "otime", "qtime", "wtime", "stime");
263 buf[size] = '\0';
264
265 return (0);
266 }
267
268 static int
269 spa_txg_history_data(char *buf, size_t size, void *data)
270 {
271 spa_txg_history_t *sth = (spa_txg_history_t *)data;
272 uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
273 char state;
274
275 switch (sth->state) {
276 case TXG_STATE_BIRTH: state = 'B'; break;
277 case TXG_STATE_OPEN: state = 'O'; break;
278 case TXG_STATE_QUIESCED: state = 'Q'; break;
279 case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break;
280 case TXG_STATE_SYNCED: state = 'S'; break;
281 case TXG_STATE_COMMITTED: state = 'C'; break;
282 default: state = '?'; break;
283 }
284
285 if (sth->times[TXG_STATE_OPEN])
286 open = sth->times[TXG_STATE_OPEN] -
287 sth->times[TXG_STATE_BIRTH];
288
289 if (sth->times[TXG_STATE_QUIESCED])
290 quiesce = sth->times[TXG_STATE_QUIESCED] -
291 sth->times[TXG_STATE_OPEN];
292
293 if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
294 wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
295 sth->times[TXG_STATE_QUIESCED];
296
297 if (sth->times[TXG_STATE_SYNCED])
298 sync = sth->times[TXG_STATE_SYNCED] -
299 sth->times[TXG_STATE_WAIT_FOR_SYNC];
300
301 size = snprintf(buf, size - 1, "%-8llu %-16llu %-5c %-12llu "
302 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
303 (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
304 (u_longlong_t)sth->nreserved,
305 (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
306 (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
307 (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
308 (u_longlong_t)sync);
309 buf[size] = '\0';
310
311 return (0);
312 }
313
314 /*
315 * Calculate the address for the next spa_stats_history_t entry. The
316 * ssh->lock will be held until ksp->ks_ndata entries are processed.
317 */
318 static void *
319 spa_txg_history_addr(kstat_t *ksp, loff_t n)
320 {
321 spa_t *spa = ksp->ks_private;
322 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
323
324 ASSERT(MUTEX_HELD(&ssh->lock));
325
326 if (n == 0)
327 ssh->private = list_tail(&ssh->list);
328 else if (ssh->private)
329 ssh->private = list_prev(&ssh->list, ssh->private);
330
331 return (ssh->private);
332 }
333
334 /*
335 * When the kstat is written discard all spa_txg_history_t entires. The
336 * ssh->lock will be held until ksp->ks_ndata entries are processed.
337 */
338 static int
339 spa_txg_history_update(kstat_t *ksp, int rw)
340 {
341 spa_t *spa = ksp->ks_private;
342 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
343
344 ASSERT(MUTEX_HELD(&ssh->lock));
345
346 if (rw == KSTAT_WRITE) {
347 spa_txg_history_t *sth;
348
349 while ((sth = list_remove_head(&ssh->list))) {
350 ssh->size--;
351 kmem_free(sth, sizeof (spa_txg_history_t));
352 }
353
354 ASSERT3U(ssh->size, ==, 0);
355 }
356
357 ksp->ks_ndata = ssh->size;
358 ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
359
360 return (0);
361 }
362
363 static void
364 spa_txg_history_init(spa_t *spa)
365 {
366 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
367 char name[KSTAT_STRLEN];
368 kstat_t *ksp;
369
370 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
371 list_create(&ssh->list, sizeof (spa_txg_history_t),
372 offsetof(spa_txg_history_t, sth_link));
373
374 ssh->count = 0;
375 ssh->size = 0;
376 ssh->private = NULL;
377
378 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
379 name[KSTAT_STRLEN-1] = '\0';
380
381 ksp = kstat_create(name, 0, "txgs", "misc",
382 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
383 ssh->kstat = ksp;
384
385 if (ksp) {
386 ksp->ks_lock = &ssh->lock;
387 ksp->ks_data = NULL;
388 ksp->ks_private = spa;
389 ksp->ks_update = spa_txg_history_update;
390 kstat_set_raw_ops(ksp, spa_txg_history_headers,
391 spa_txg_history_data, spa_txg_history_addr);
392 kstat_install(ksp);
393 }
394 }
395
396 static void
397 spa_txg_history_destroy(spa_t *spa)
398 {
399 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
400 spa_txg_history_t *sth;
401 kstat_t *ksp;
402
403 ksp = ssh->kstat;
404 if (ksp)
405 kstat_delete(ksp);
406
407 mutex_enter(&ssh->lock);
408 while ((sth = list_remove_head(&ssh->list))) {
409 ssh->size--;
410 kmem_free(sth, sizeof (spa_txg_history_t));
411 }
412
413 ASSERT3U(ssh->size, ==, 0);
414 list_destroy(&ssh->list);
415 mutex_exit(&ssh->lock);
416
417 mutex_destroy(&ssh->lock);
418 }
419
420 /*
421 * Add a new txg to historical record.
422 */
423 void
424 spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
425 {
426 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
427 spa_txg_history_t *sth, *rm;
428
429 if (zfs_txg_history == 0 && ssh->size == 0)
430 return;
431
432 sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_PUSHPAGE);
433 sth->txg = txg;
434 sth->state = TXG_STATE_OPEN;
435 sth->times[TXG_STATE_BIRTH] = birth_time;
436
437 mutex_enter(&ssh->lock);
438
439 list_insert_head(&ssh->list, sth);
440 ssh->size++;
441
442 while (ssh->size > zfs_txg_history) {
443 ssh->size--;
444 rm = list_remove_tail(&ssh->list);
445 kmem_free(rm, sizeof (spa_txg_history_t));
446 }
447
448 mutex_exit(&ssh->lock);
449 }
450
451 /*
452 * Set txg state completion time and increment current state.
453 */
454 int
455 spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
456 hrtime_t completed_time)
457 {
458 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
459 spa_txg_history_t *sth;
460 int error = ENOENT;
461
462 if (zfs_txg_history == 0)
463 return (0);
464
465 mutex_enter(&ssh->lock);
466 for (sth = list_head(&ssh->list); sth != NULL;
467 sth = list_next(&ssh->list, sth)) {
468 if (sth->txg == txg) {
469 sth->times[completed_state] = completed_time;
470 sth->state++;
471 error = 0;
472 break;
473 }
474 }
475 mutex_exit(&ssh->lock);
476
477 return (error);
478 }
479
480 /*
481 * Set txg IO stats.
482 */
483 int
484 spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
485 uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t nreserved)
486 {
487 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
488 spa_txg_history_t *sth;
489 int error = ENOENT;
490
491 if (zfs_txg_history == 0)
492 return (0);
493
494 mutex_enter(&ssh->lock);
495 for (sth = list_head(&ssh->list); sth != NULL;
496 sth = list_next(&ssh->list, sth)) {
497 if (sth->txg == txg) {
498 sth->nread = nread;
499 sth->nwritten = nwritten;
500 sth->reads = reads;
501 sth->writes = writes;
502 sth->nreserved = nreserved;
503 error = 0;
504 break;
505 }
506 }
507 mutex_exit(&ssh->lock);
508
509 return (error);
510 }
511
512 /*
513 * ==========================================================================
514 * SPA TX Assign Histogram Routines
515 * ==========================================================================
516 */
517
518 /*
519 * Tx statistics - Information exported regarding dmu_tx_assign time.
520 */
521
522 /*
523 * When the kstat is written zero all buckets. When the kstat is read
524 * count the number of trailing buckets set to zero and update ks_ndata
525 * such that they are not output.
526 */
527 static int
528 spa_tx_assign_update(kstat_t *ksp, int rw)
529 {
530 spa_t *spa = ksp->ks_private;
531 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
532 int i;
533
534 if (rw == KSTAT_WRITE) {
535 for (i = 0; i < ssh->count; i++)
536 ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
537 }
538
539 for (i = ssh->count; i > 0; i--)
540 if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
541 break;
542
543 ksp->ks_ndata = i;
544 ksp->ks_data_size = i * sizeof (kstat_named_t);
545
546 return (0);
547 }
548
549 static void
550 spa_tx_assign_init(spa_t *spa)
551 {
552 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
553 char name[KSTAT_STRLEN];
554 kstat_named_t *ks;
555 kstat_t *ksp;
556 int i;
557
558 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
559
560 ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
561 ssh->size = ssh->count * sizeof (kstat_named_t);
562 ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
563
564 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
565 name[KSTAT_STRLEN-1] = '\0';
566
567 for (i = 0; i < ssh->count; i++) {
568 ks = &((kstat_named_t *)ssh->private)[i];
569 ks->data_type = KSTAT_DATA_UINT64;
570 ks->value.ui64 = 0;
571 (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
572 (u_longlong_t)1 << i);
573 }
574
575 ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
576 KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
577 ssh->kstat = ksp;
578
579 if (ksp) {
580 ksp->ks_lock = &ssh->lock;
581 ksp->ks_data = ssh->private;
582 ksp->ks_ndata = ssh->count;
583 ksp->ks_data_size = ssh->size;
584 ksp->ks_private = spa;
585 ksp->ks_update = spa_tx_assign_update;
586 kstat_install(ksp);
587 }
588 }
589
590 static void
591 spa_tx_assign_destroy(spa_t *spa)
592 {
593 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
594 kstat_t *ksp;
595
596 ksp = ssh->kstat;
597 if (ksp)
598 kstat_delete(ksp);
599
600 kmem_free(ssh->private, ssh->size);
601 mutex_destroy(&ssh->lock);
602 }
603
604 void
605 spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
606 {
607 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
608 uint64_t idx = 0;
609
610 while (((1 << idx) < nsecs) && (idx < ssh->size - 1))
611 idx++;
612
613 atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
614 }
615
616 /*
617 * ==========================================================================
618 * SPA IO History Routines
619 * ==========================================================================
620 */
621 static int
622 spa_io_history_update(kstat_t *ksp, int rw)
623 {
624 if (rw == KSTAT_WRITE)
625 memset(ksp->ks_data, 0, ksp->ks_data_size);
626
627 return (0);
628 }
629
630 static void
631 spa_io_history_init(spa_t *spa)
632 {
633 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
634 char name[KSTAT_STRLEN];
635 kstat_t *ksp;
636
637 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
638
639 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
640 name[KSTAT_STRLEN-1] = '\0';
641
642 ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
643 ssh->kstat = ksp;
644
645 if (ksp) {
646 ksp->ks_lock = &ssh->lock;
647 ksp->ks_private = spa;
648 ksp->ks_update = spa_io_history_update;
649 kstat_install(ksp);
650 }
651 }
652
653 static void
654 spa_io_history_destroy(spa_t *spa)
655 {
656 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
657
658 if (ssh->kstat)
659 kstat_delete(ssh->kstat);
660
661 mutex_destroy(&ssh->lock);
662 }
663
664 void
665 spa_stats_init(spa_t *spa)
666 {
667 spa_read_history_init(spa);
668 spa_txg_history_init(spa);
669 spa_tx_assign_init(spa);
670 spa_io_history_init(spa);
671 }
672
673 void
674 spa_stats_destroy(spa_t *spa)
675 {
676 spa_tx_assign_destroy(spa);
677 spa_txg_history_destroy(spa);
678 spa_read_history_destroy(spa);
679 spa_io_history_destroy(spa);
680 }
681
682 #if defined(_KERNEL) && defined(HAVE_SPL)
683 module_param(zfs_read_history, int, 0644);
684 MODULE_PARM_DESC(zfs_read_history, "Historic statistics for the last N reads");
685
686 module_param(zfs_read_history_hits, int, 0644);
687 MODULE_PARM_DESC(zfs_read_history_hits, "Include cache hits in read history");
688
689 module_param(zfs_txg_history, int, 0644);
690 MODULE_PARM_DESC(zfs_txg_history, "Historic statistics for the last N txgs");
691 #endif