]> git.proxmox.com Git - mirror_zfs-debian.git/blame - module/zfs/spa_stats.c
Imported Upstream version 0.6.4.2
[mirror_zfs-debian.git] / module / zfs / spa_stats.c
CommitLineData
a08ee875
LG
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22#include <sys/zfs_context.h>
23#include <sys/spa_impl.h>
24
25/*
26 * Keeps stats on last N reads per spa_t, disabled by default.
27 */
28int zfs_read_history = 0;
29
30/*
31 * Include cache hits in history, disabled by default.
32 */
33int zfs_read_history_hits = 0;
34
35/*
36 * Keeps stats on the last N txgs, disabled by default.
37 */
38int zfs_txg_history = 0;
39
40/*
41 * ==========================================================================
42 * SPA Read History Routines
43 * ==========================================================================
44 */
45
46/*
47 * Read statistics - Information exported regarding each arc_read call
48 */
49typedef struct spa_read_history {
50 uint64_t uid; /* unique identifier */
51 hrtime_t start; /* time read completed */
52 uint64_t objset; /* read from this objset */
53 uint64_t object; /* read of this object number */
54 uint64_t level; /* block's indirection level */
55 uint64_t blkid; /* read of this block id */
56 char origin[24]; /* read originated from here */
57 uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
58 pid_t pid; /* PID of task doing read */
59 char comm[16]; /* process name of task doing read */
60 list_node_t srh_link;
61} spa_read_history_t;
62
63static int
64spa_read_history_headers(char *buf, size_t size)
65{
ea04106b 66 (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
a08ee875
LG
67 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
68 "level", "blkid", "aflags", "origin", "pid", "process");
a08ee875
LG
69
70 return (0);
71}
72
73static int
74spa_read_history_data(char *buf, size_t size, void *data)
75{
76 spa_read_history_t *srh = (spa_read_history_t *)data;
77
ea04106b 78 (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
a08ee875
LG
79 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
80 (u_longlong_t)srh->uid, srh->start,
81 (longlong_t)srh->objset, (longlong_t)srh->object,
82 (longlong_t)srh->level, (longlong_t)srh->blkid,
83 srh->aflags, srh->origin, srh->pid, srh->comm);
a08ee875
LG
84
85 return (0);
86}
87
88/*
89 * Calculate the address for the next spa_stats_history_t entry. The
90 * ssh->lock will be held until ksp->ks_ndata entries are processed.
91 */
92static void *
93spa_read_history_addr(kstat_t *ksp, loff_t n)
94{
95 spa_t *spa = ksp->ks_private;
96 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
97
98 ASSERT(MUTEX_HELD(&ssh->lock));
99
100 if (n == 0)
101 ssh->private = list_tail(&ssh->list);
102 else if (ssh->private)
103 ssh->private = list_prev(&ssh->list, ssh->private);
104
105 return (ssh->private);
106}
107
108/*
109 * When the kstat is written discard all spa_read_history_t entires. The
110 * ssh->lock will be held until ksp->ks_ndata entries are processed.
111 */
112static int
113spa_read_history_update(kstat_t *ksp, int rw)
114{
115 spa_t *spa = ksp->ks_private;
116 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
117
118 if (rw == KSTAT_WRITE) {
119 spa_read_history_t *srh;
120
121 while ((srh = list_remove_head(&ssh->list))) {
122 ssh->size--;
123 kmem_free(srh, sizeof (spa_read_history_t));
124 }
125
126 ASSERT3U(ssh->size, ==, 0);
127 }
128
129 ksp->ks_ndata = ssh->size;
130 ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
131
132 return (0);
133}
134
135static void
136spa_read_history_init(spa_t *spa)
137{
138 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
139 char name[KSTAT_STRLEN];
140 kstat_t *ksp;
141
142 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
143 list_create(&ssh->list, sizeof (spa_read_history_t),
144 offsetof(spa_read_history_t, srh_link));
145
146 ssh->count = 0;
147 ssh->size = 0;
148 ssh->private = NULL;
149
150 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
a08ee875
LG
151
152 ksp = kstat_create(name, 0, "reads", "misc",
153 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
154 ssh->kstat = ksp;
155
156 if (ksp) {
157 ksp->ks_lock = &ssh->lock;
158 ksp->ks_data = NULL;
159 ksp->ks_private = spa;
160 ksp->ks_update = spa_read_history_update;
161 kstat_set_raw_ops(ksp, spa_read_history_headers,
162 spa_read_history_data, spa_read_history_addr);
163 kstat_install(ksp);
164 }
165}
166
167static void
168spa_read_history_destroy(spa_t *spa)
169{
170 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
171 spa_read_history_t *srh;
172 kstat_t *ksp;
173
174 ksp = ssh->kstat;
175 if (ksp)
176 kstat_delete(ksp);
177
178 mutex_enter(&ssh->lock);
179 while ((srh = list_remove_head(&ssh->list))) {
180 ssh->size--;
181 kmem_free(srh, sizeof (spa_read_history_t));
182 }
183
184 ASSERT3U(ssh->size, ==, 0);
185 list_destroy(&ssh->list);
186 mutex_exit(&ssh->lock);
187
188 mutex_destroy(&ssh->lock);
189}
190
191void
ea04106b 192spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
a08ee875
LG
193{
194 spa_stats_history_t *ssh = &spa->spa_stats.read_history;
195 spa_read_history_t *srh, *rm;
196
197 ASSERT3P(spa, !=, NULL);
198 ASSERT3P(zb, !=, NULL);
199
200 if (zfs_read_history == 0 && ssh->size == 0)
201 return;
202
203 if (zfs_read_history_hits == 0 && (aflags & ARC_CACHED))
204 return;
205
ea04106b 206 srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
a08ee875
LG
207 strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
208 srh->start = gethrtime();
209 srh->objset = zb->zb_objset;
210 srh->object = zb->zb_object;
211 srh->level = zb->zb_level;
212 srh->blkid = zb->zb_blkid;
213 srh->aflags = aflags;
214 srh->pid = getpid();
215
216 mutex_enter(&ssh->lock);
217
218 srh->uid = ssh->count++;
219 list_insert_head(&ssh->list, srh);
220 ssh->size++;
221
222 while (ssh->size > zfs_read_history) {
223 ssh->size--;
224 rm = list_remove_tail(&ssh->list);
225 kmem_free(rm, sizeof (spa_read_history_t));
226 }
227
228 mutex_exit(&ssh->lock);
229}
230
231/*
232 * ==========================================================================
233 * SPA TXG History Routines
234 * ==========================================================================
235 */
236
237/*
238 * Txg statistics - Information exported regarding each txg sync
239 */
240
241typedef struct spa_txg_history {
242 uint64_t txg; /* txg id */
243 txg_state_t state; /* active txg state */
244 uint64_t nread; /* number of bytes read */
245 uint64_t nwritten; /* number of bytes written */
246 uint64_t reads; /* number of read operations */
247 uint64_t writes; /* number of write operations */
ea04106b 248 uint64_t ndirty; /* number of dirty bytes */
a08ee875
LG
249 hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
250 list_node_t sth_link;
251} spa_txg_history_t;
252
253static int
254spa_txg_history_headers(char *buf, size_t size)
255{
ea04106b 256 (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
a08ee875 257 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
ea04106b 258 "ndirty", "nread", "nwritten", "reads", "writes",
a08ee875 259 "otime", "qtime", "wtime", "stime");
a08ee875
LG
260
261 return (0);
262}
263
264static int
265spa_txg_history_data(char *buf, size_t size, void *data)
266{
267 spa_txg_history_t *sth = (spa_txg_history_t *)data;
268 uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
269 char state;
270
271 switch (sth->state) {
272 case TXG_STATE_BIRTH: state = 'B'; break;
273 case TXG_STATE_OPEN: state = 'O'; break;
274 case TXG_STATE_QUIESCED: state = 'Q'; break;
275 case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break;
276 case TXG_STATE_SYNCED: state = 'S'; break;
277 case TXG_STATE_COMMITTED: state = 'C'; break;
278 default: state = '?'; break;
279 }
280
281 if (sth->times[TXG_STATE_OPEN])
282 open = sth->times[TXG_STATE_OPEN] -
283 sth->times[TXG_STATE_BIRTH];
284
285 if (sth->times[TXG_STATE_QUIESCED])
286 quiesce = sth->times[TXG_STATE_QUIESCED] -
287 sth->times[TXG_STATE_OPEN];
288
289 if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
290 wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
291 sth->times[TXG_STATE_QUIESCED];
292
293 if (sth->times[TXG_STATE_SYNCED])
294 sync = sth->times[TXG_STATE_SYNCED] -
295 sth->times[TXG_STATE_WAIT_FOR_SYNC];
296
ea04106b 297 (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
a08ee875
LG
298 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
299 (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
ea04106b 300 (u_longlong_t)sth->ndirty,
a08ee875
LG
301 (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
302 (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
303 (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
304 (u_longlong_t)sync);
a08ee875
LG
305
306 return (0);
307}
308
309/*
310 * Calculate the address for the next spa_stats_history_t entry. The
311 * ssh->lock will be held until ksp->ks_ndata entries are processed.
312 */
313static void *
314spa_txg_history_addr(kstat_t *ksp, loff_t n)
315{
316 spa_t *spa = ksp->ks_private;
317 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
318
319 ASSERT(MUTEX_HELD(&ssh->lock));
320
321 if (n == 0)
322 ssh->private = list_tail(&ssh->list);
323 else if (ssh->private)
324 ssh->private = list_prev(&ssh->list, ssh->private);
325
326 return (ssh->private);
327}
328
329/*
330 * When the kstat is written discard all spa_txg_history_t entires. The
331 * ssh->lock will be held until ksp->ks_ndata entries are processed.
332 */
333static int
334spa_txg_history_update(kstat_t *ksp, int rw)
335{
336 spa_t *spa = ksp->ks_private;
337 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
338
339 ASSERT(MUTEX_HELD(&ssh->lock));
340
341 if (rw == KSTAT_WRITE) {
342 spa_txg_history_t *sth;
343
344 while ((sth = list_remove_head(&ssh->list))) {
345 ssh->size--;
346 kmem_free(sth, sizeof (spa_txg_history_t));
347 }
348
349 ASSERT3U(ssh->size, ==, 0);
350 }
351
352 ksp->ks_ndata = ssh->size;
353 ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
354
355 return (0);
356}
357
358static void
359spa_txg_history_init(spa_t *spa)
360{
361 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
362 char name[KSTAT_STRLEN];
363 kstat_t *ksp;
364
365 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
366 list_create(&ssh->list, sizeof (spa_txg_history_t),
367 offsetof(spa_txg_history_t, sth_link));
368
369 ssh->count = 0;
370 ssh->size = 0;
371 ssh->private = NULL;
372
373 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
a08ee875
LG
374
375 ksp = kstat_create(name, 0, "txgs", "misc",
376 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
377 ssh->kstat = ksp;
378
379 if (ksp) {
380 ksp->ks_lock = &ssh->lock;
381 ksp->ks_data = NULL;
382 ksp->ks_private = spa;
383 ksp->ks_update = spa_txg_history_update;
384 kstat_set_raw_ops(ksp, spa_txg_history_headers,
385 spa_txg_history_data, spa_txg_history_addr);
386 kstat_install(ksp);
387 }
388}
389
390static void
391spa_txg_history_destroy(spa_t *spa)
392{
393 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
394 spa_txg_history_t *sth;
395 kstat_t *ksp;
396
397 ksp = ssh->kstat;
398 if (ksp)
399 kstat_delete(ksp);
400
401 mutex_enter(&ssh->lock);
402 while ((sth = list_remove_head(&ssh->list))) {
403 ssh->size--;
404 kmem_free(sth, sizeof (spa_txg_history_t));
405 }
406
407 ASSERT3U(ssh->size, ==, 0);
408 list_destroy(&ssh->list);
409 mutex_exit(&ssh->lock);
410
411 mutex_destroy(&ssh->lock);
412}
413
414/*
415 * Add a new txg to historical record.
416 */
417void
418spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
419{
420 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
421 spa_txg_history_t *sth, *rm;
422
423 if (zfs_txg_history == 0 && ssh->size == 0)
424 return;
425
ea04106b 426 sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
a08ee875
LG
427 sth->txg = txg;
428 sth->state = TXG_STATE_OPEN;
429 sth->times[TXG_STATE_BIRTH] = birth_time;
430
431 mutex_enter(&ssh->lock);
432
433 list_insert_head(&ssh->list, sth);
434 ssh->size++;
435
436 while (ssh->size > zfs_txg_history) {
437 ssh->size--;
438 rm = list_remove_tail(&ssh->list);
439 kmem_free(rm, sizeof (spa_txg_history_t));
440 }
441
442 mutex_exit(&ssh->lock);
443}
444
445/*
446 * Set txg state completion time and increment current state.
447 */
448int
449spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
450 hrtime_t completed_time)
451{
452 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
453 spa_txg_history_t *sth;
454 int error = ENOENT;
455
456 if (zfs_txg_history == 0)
457 return (0);
458
459 mutex_enter(&ssh->lock);
460 for (sth = list_head(&ssh->list); sth != NULL;
461 sth = list_next(&ssh->list, sth)) {
462 if (sth->txg == txg) {
463 sth->times[completed_state] = completed_time;
464 sth->state++;
465 error = 0;
466 break;
467 }
468 }
469 mutex_exit(&ssh->lock);
470
471 return (error);
472}
473
474/*
475 * Set txg IO stats.
476 */
477int
478spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
ea04106b 479 uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
a08ee875
LG
480{
481 spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
482 spa_txg_history_t *sth;
483 int error = ENOENT;
484
485 if (zfs_txg_history == 0)
486 return (0);
487
488 mutex_enter(&ssh->lock);
489 for (sth = list_head(&ssh->list); sth != NULL;
490 sth = list_next(&ssh->list, sth)) {
491 if (sth->txg == txg) {
492 sth->nread = nread;
493 sth->nwritten = nwritten;
494 sth->reads = reads;
495 sth->writes = writes;
ea04106b 496 sth->ndirty = ndirty;
a08ee875
LG
497 error = 0;
498 break;
499 }
500 }
501 mutex_exit(&ssh->lock);
502
503 return (error);
504}
505
506/*
507 * ==========================================================================
508 * SPA TX Assign Histogram Routines
509 * ==========================================================================
510 */
511
512/*
513 * Tx statistics - Information exported regarding dmu_tx_assign time.
514 */
515
516/*
517 * When the kstat is written zero all buckets. When the kstat is read
518 * count the number of trailing buckets set to zero and update ks_ndata
519 * such that they are not output.
520 */
521static int
522spa_tx_assign_update(kstat_t *ksp, int rw)
523{
524 spa_t *spa = ksp->ks_private;
525 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
526 int i;
527
528 if (rw == KSTAT_WRITE) {
529 for (i = 0; i < ssh->count; i++)
530 ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
531 }
532
533 for (i = ssh->count; i > 0; i--)
534 if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
535 break;
536
537 ksp->ks_ndata = i;
538 ksp->ks_data_size = i * sizeof (kstat_named_t);
539
540 return (0);
541}
542
543static void
544spa_tx_assign_init(spa_t *spa)
545{
546 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
547 char name[KSTAT_STRLEN];
548 kstat_named_t *ks;
549 kstat_t *ksp;
550 int i;
551
552 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
553
554 ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
555 ssh->size = ssh->count * sizeof (kstat_named_t);
556 ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
557
558 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
a08ee875
LG
559
560 for (i = 0; i < ssh->count; i++) {
561 ks = &((kstat_named_t *)ssh->private)[i];
562 ks->data_type = KSTAT_DATA_UINT64;
563 ks->value.ui64 = 0;
564 (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
565 (u_longlong_t)1 << i);
566 }
567
568 ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
569 KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
570 ssh->kstat = ksp;
571
572 if (ksp) {
573 ksp->ks_lock = &ssh->lock;
574 ksp->ks_data = ssh->private;
575 ksp->ks_ndata = ssh->count;
576 ksp->ks_data_size = ssh->size;
577 ksp->ks_private = spa;
578 ksp->ks_update = spa_tx_assign_update;
579 kstat_install(ksp);
580 }
581}
582
583static void
584spa_tx_assign_destroy(spa_t *spa)
585{
586 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
587 kstat_t *ksp;
588
589 ksp = ssh->kstat;
590 if (ksp)
591 kstat_delete(ksp);
592
593 kmem_free(ssh->private, ssh->size);
594 mutex_destroy(&ssh->lock);
595}
596
597void
598spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
599{
600 spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
601 uint64_t idx = 0;
602
603 while (((1 << idx) < nsecs) && (idx < ssh->size - 1))
604 idx++;
605
606 atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
607}
608
609/*
610 * ==========================================================================
611 * SPA IO History Routines
612 * ==========================================================================
613 */
614static int
615spa_io_history_update(kstat_t *ksp, int rw)
616{
617 if (rw == KSTAT_WRITE)
618 memset(ksp->ks_data, 0, ksp->ks_data_size);
619
620 return (0);
621}
622
623static void
624spa_io_history_init(spa_t *spa)
625{
626 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
627 char name[KSTAT_STRLEN];
628 kstat_t *ksp;
629
630 mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
631
632 (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
a08ee875
LG
633
634 ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
635 ssh->kstat = ksp;
636
637 if (ksp) {
638 ksp->ks_lock = &ssh->lock;
639 ksp->ks_private = spa;
640 ksp->ks_update = spa_io_history_update;
641 kstat_install(ksp);
642 }
643}
644
645static void
646spa_io_history_destroy(spa_t *spa)
647{
648 spa_stats_history_t *ssh = &spa->spa_stats.io_history;
649
650 if (ssh->kstat)
651 kstat_delete(ssh->kstat);
652
653 mutex_destroy(&ssh->lock);
654}
655
656void
657spa_stats_init(spa_t *spa)
658{
659 spa_read_history_init(spa);
660 spa_txg_history_init(spa);
661 spa_tx_assign_init(spa);
662 spa_io_history_init(spa);
663}
664
665void
666spa_stats_destroy(spa_t *spa)
667{
668 spa_tx_assign_destroy(spa);
669 spa_txg_history_destroy(spa);
670 spa_read_history_destroy(spa);
671 spa_io_history_destroy(spa);
672}
673
674#if defined(_KERNEL) && defined(HAVE_SPL)
675module_param(zfs_read_history, int, 0644);
676MODULE_PARM_DESC(zfs_read_history, "Historic statistics for the last N reads");
677
678module_param(zfs_read_history_hits, int, 0644);
679MODULE_PARM_DESC(zfs_read_history_hits, "Include cache hits in read history");
680
681module_param(zfs_txg_history, int, 0644);
682MODULE_PARM_DESC(zfs_txg_history, "Historic statistics for the last N txgs");
683#endif