]>
Commit | Line | Data |
---|---|---|
1421c891 PS |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | #include <sys/zfs_context.h> | |
23 | #include <sys/spa_impl.h> | |
379ca9cf | 24 | #include <sys/vdev_impl.h> |
1421c891 PS |
25 | |
26 | /* | |
27 | * Keeps stats on last N reads per spa_t, disabled by default. | |
28 | */ | |
29 | int zfs_read_history = 0; | |
30 | ||
31 | /* | |
32 | * Include cache hits in history, disabled by default. | |
33 | */ | |
34 | int zfs_read_history_hits = 0; | |
35 | ||
0b1401ee | 36 | /* |
01ff0d75 | 37 | * Keeps stats on the last 100 txgs by default. |
0b1401ee | 38 | */ |
01ff0d75 | 39 | int zfs_txg_history = 100; |
0b1401ee | 40 | |
379ca9cf OF |
41 | /* |
42 | * Keeps stats on the last N MMP updates, disabled by default. | |
43 | */ | |
44 | int zfs_multihost_history = 0; | |
45 | ||
1421c891 PS |
46 | /* |
47 | * ========================================================================== | |
48 | * SPA Read History Routines | |
49 | * ========================================================================== | |
50 | */ | |
51 | ||
52 | /* | |
53 | * Read statistics - Information exported regarding each arc_read call | |
54 | */ | |
55 | typedef struct spa_read_history { | |
56 | uint64_t uid; /* unique identifier */ | |
57 | hrtime_t start; /* time read completed */ | |
58 | uint64_t objset; /* read from this objset */ | |
59 | uint64_t object; /* read of this object number */ | |
60 | uint64_t level; /* block's indirection level */ | |
61 | uint64_t blkid; /* read of this block id */ | |
62 | char origin[24]; /* read originated from here */ | |
63 | uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */ | |
64 | pid_t pid; /* PID of task doing read */ | |
65 | char comm[16]; /* process name of task doing read */ | |
66 | list_node_t srh_link; | |
67 | } spa_read_history_t; | |
68 | ||
69 | static int | |
70 | spa_read_history_headers(char *buf, size_t size) | |
71 | { | |
7b2d78a0 | 72 | (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s " |
1421c891 PS |
73 | "%-24s %-8s %-16s\n", "UID", "start", "objset", "object", |
74 | "level", "blkid", "aflags", "origin", "pid", "process"); | |
1421c891 PS |
75 | |
76 | return (0); | |
77 | } | |
78 | ||
79 | static int | |
80 | spa_read_history_data(char *buf, size_t size, void *data) | |
81 | { | |
82 | spa_read_history_t *srh = (spa_read_history_t *)data; | |
83 | ||
7b2d78a0 | 84 | (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx " |
1421c891 PS |
85 | "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n", |
86 | (u_longlong_t)srh->uid, srh->start, | |
87 | (longlong_t)srh->objset, (longlong_t)srh->object, | |
88 | (longlong_t)srh->level, (longlong_t)srh->blkid, | |
89 | srh->aflags, srh->origin, srh->pid, srh->comm); | |
1421c891 PS |
90 | |
91 | return (0); | |
92 | } | |
93 | ||
94 | /* | |
95 | * Calculate the address for the next spa_stats_history_t entry. The | |
96 | * ssh->lock will be held until ksp->ks_ndata entries are processed. | |
97 | */ | |
98 | static void * | |
99 | spa_read_history_addr(kstat_t *ksp, loff_t n) | |
100 | { | |
101 | spa_t *spa = ksp->ks_private; | |
102 | spa_stats_history_t *ssh = &spa->spa_stats.read_history; | |
103 | ||
104 | ASSERT(MUTEX_HELD(&ssh->lock)); | |
105 | ||
106 | if (n == 0) | |
107 | ssh->private = list_tail(&ssh->list); | |
108 | else if (ssh->private) | |
109 | ssh->private = list_prev(&ssh->list, ssh->private); | |
110 | ||
111 | return (ssh->private); | |
112 | } | |
113 | ||
114 | /* | |
4e33ba4c | 115 | * When the kstat is written discard all spa_read_history_t entries. The |
1421c891 PS |
116 | * ssh->lock will be held until ksp->ks_ndata entries are processed. |
117 | */ | |
118 | static int | |
119 | spa_read_history_update(kstat_t *ksp, int rw) | |
120 | { | |
121 | spa_t *spa = ksp->ks_private; | |
122 | spa_stats_history_t *ssh = &spa->spa_stats.read_history; | |
123 | ||
124 | if (rw == KSTAT_WRITE) { | |
125 | spa_read_history_t *srh; | |
126 | ||
127 | while ((srh = list_remove_head(&ssh->list))) { | |
128 | ssh->size--; | |
d1d7e268 | 129 | kmem_free(srh, sizeof (spa_read_history_t)); |
1421c891 PS |
130 | } |
131 | ||
132 | ASSERT3U(ssh->size, ==, 0); | |
133 | } | |
134 | ||
135 | ksp->ks_ndata = ssh->size; | |
d1d7e268 | 136 | ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t); |
1421c891 PS |
137 | |
138 | return (0); | |
139 | } | |
140 | ||
141 | static void | |
142 | spa_read_history_init(spa_t *spa) | |
143 | { | |
144 | spa_stats_history_t *ssh = &spa->spa_stats.read_history; | |
761b8ec6 | 145 | char *name; |
1421c891 PS |
146 | kstat_t *ksp; |
147 | ||
148 | mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); | |
149 | list_create(&ssh->list, sizeof (spa_read_history_t), | |
150 | offsetof(spa_read_history_t, srh_link)); | |
151 | ||
152 | ssh->count = 0; | |
153 | ssh->size = 0; | |
154 | ssh->private = NULL; | |
155 | ||
761b8ec6 | 156 | name = kmem_asprintf("zfs/%s", spa_name(spa)); |
1421c891 PS |
157 | |
158 | ksp = kstat_create(name, 0, "reads", "misc", | |
159 | KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); | |
160 | ssh->kstat = ksp; | |
161 | ||
162 | if (ksp) { | |
163 | ksp->ks_lock = &ssh->lock; | |
164 | ksp->ks_data = NULL; | |
165 | ksp->ks_private = spa; | |
166 | ksp->ks_update = spa_read_history_update; | |
167 | kstat_set_raw_ops(ksp, spa_read_history_headers, | |
168 | spa_read_history_data, spa_read_history_addr); | |
169 | kstat_install(ksp); | |
170 | } | |
761b8ec6 | 171 | strfree(name); |
1421c891 PS |
172 | } |
173 | ||
174 | static void | |
175 | spa_read_history_destroy(spa_t *spa) | |
176 | { | |
177 | spa_stats_history_t *ssh = &spa->spa_stats.read_history; | |
178 | spa_read_history_t *srh; | |
179 | kstat_t *ksp; | |
180 | ||
181 | ksp = ssh->kstat; | |
182 | if (ksp) | |
183 | kstat_delete(ksp); | |
184 | ||
185 | mutex_enter(&ssh->lock); | |
186 | while ((srh = list_remove_head(&ssh->list))) { | |
187 | ssh->size--; | |
d1d7e268 | 188 | kmem_free(srh, sizeof (spa_read_history_t)); |
1421c891 PS |
189 | } |
190 | ||
191 | ASSERT3U(ssh->size, ==, 0); | |
192 | list_destroy(&ssh->list); | |
193 | mutex_exit(&ssh->lock); | |
194 | ||
195 | mutex_destroy(&ssh->lock); | |
196 | } | |
197 | ||
198 | void | |
5dbd68a3 | 199 | spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags) |
1421c891 PS |
200 | { |
201 | spa_stats_history_t *ssh = &spa->spa_stats.read_history; | |
202 | spa_read_history_t *srh, *rm; | |
203 | ||
204 | ASSERT3P(spa, !=, NULL); | |
205 | ASSERT3P(zb, !=, NULL); | |
206 | ||
207 | if (zfs_read_history == 0 && ssh->size == 0) | |
208 | return; | |
209 | ||
2a432414 | 210 | if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED)) |
1421c891 PS |
211 | return; |
212 | ||
79c76d5b | 213 | srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP); |
d1d7e268 | 214 | strlcpy(srh->comm, getcomm(), sizeof (srh->comm)); |
1421c891 PS |
215 | srh->start = gethrtime(); |
216 | srh->objset = zb->zb_objset; | |
217 | srh->object = zb->zb_object; | |
218 | srh->level = zb->zb_level; | |
219 | srh->blkid = zb->zb_blkid; | |
220 | srh->aflags = aflags; | |
221 | srh->pid = getpid(); | |
222 | ||
223 | mutex_enter(&ssh->lock); | |
224 | ||
225 | srh->uid = ssh->count++; | |
226 | list_insert_head(&ssh->list, srh); | |
227 | ssh->size++; | |
228 | ||
229 | while (ssh->size > zfs_read_history) { | |
230 | ssh->size--; | |
231 | rm = list_remove_tail(&ssh->list); | |
d1d7e268 | 232 | kmem_free(rm, sizeof (spa_read_history_t)); |
1421c891 PS |
233 | } |
234 | ||
235 | mutex_exit(&ssh->lock); | |
236 | } | |
237 | ||
0b1401ee BB |
238 | /* |
239 | * ========================================================================== | |
240 | * SPA TXG History Routines | |
241 | * ========================================================================== | |
242 | */ | |
243 | ||
244 | /* | |
245 | * Txg statistics - Information exported regarding each txg sync | |
246 | */ | |
247 | ||
248 | typedef struct spa_txg_history { | |
249 | uint64_t txg; /* txg id */ | |
250 | txg_state_t state; /* active txg state */ | |
251 | uint64_t nread; /* number of bytes read */ | |
252 | uint64_t nwritten; /* number of bytes written */ | |
253 | uint64_t reads; /* number of read operations */ | |
254 | uint64_t writes; /* number of write operations */ | |
3ccab252 | 255 | uint64_t ndirty; /* number of dirty bytes */ |
0b1401ee BB |
256 | hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */ |
257 | list_node_t sth_link; | |
258 | } spa_txg_history_t; | |
259 | ||
260 | static int | |
261 | spa_txg_history_headers(char *buf, size_t size) | |
262 | { | |
7b2d78a0 | 263 | (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s " |
478d64fd | 264 | "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state", |
3ccab252 | 265 | "ndirty", "nread", "nwritten", "reads", "writes", |
478d64fd | 266 | "otime", "qtime", "wtime", "stime"); |
0b1401ee BB |
267 | |
268 | return (0); | |
269 | } | |
270 | ||
271 | static int | |
272 | spa_txg_history_data(char *buf, size_t size, void *data) | |
273 | { | |
274 | spa_txg_history_t *sth = (spa_txg_history_t *)data; | |
478d64fd | 275 | uint64_t open = 0, quiesce = 0, wait = 0, sync = 0; |
0b1401ee BB |
276 | char state; |
277 | ||
278 | switch (sth->state) { | |
279 | case TXG_STATE_BIRTH: state = 'B'; break; | |
280 | case TXG_STATE_OPEN: state = 'O'; break; | |
281 | case TXG_STATE_QUIESCED: state = 'Q'; break; | |
478d64fd | 282 | case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break; |
0b1401ee BB |
283 | case TXG_STATE_SYNCED: state = 'S'; break; |
284 | case TXG_STATE_COMMITTED: state = 'C'; break; | |
285 | default: state = '?'; break; | |
286 | } | |
287 | ||
288 | if (sth->times[TXG_STATE_OPEN]) | |
289 | open = sth->times[TXG_STATE_OPEN] - | |
290 | sth->times[TXG_STATE_BIRTH]; | |
291 | ||
292 | if (sth->times[TXG_STATE_QUIESCED]) | |
293 | quiesce = sth->times[TXG_STATE_QUIESCED] - | |
294 | sth->times[TXG_STATE_OPEN]; | |
295 | ||
478d64fd IL |
296 | if (sth->times[TXG_STATE_WAIT_FOR_SYNC]) |
297 | wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] - | |
298 | sth->times[TXG_STATE_QUIESCED]; | |
299 | ||
0b1401ee BB |
300 | if (sth->times[TXG_STATE_SYNCED]) |
301 | sync = sth->times[TXG_STATE_SYNCED] - | |
478d64fd | 302 | sth->times[TXG_STATE_WAIT_FOR_SYNC]; |
0b1401ee | 303 | |
7b2d78a0 | 304 | (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu " |
478d64fd | 305 | "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n", |
0b1401ee | 306 | (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state, |
3ccab252 | 307 | (u_longlong_t)sth->ndirty, |
0b1401ee BB |
308 | (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten, |
309 | (u_longlong_t)sth->reads, (u_longlong_t)sth->writes, | |
478d64fd IL |
310 | (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait, |
311 | (u_longlong_t)sync); | |
0b1401ee BB |
312 | |
313 | return (0); | |
314 | } | |
315 | ||
316 | /* | |
317 | * Calculate the address for the next spa_stats_history_t entry. The | |
318 | * ssh->lock will be held until ksp->ks_ndata entries are processed. | |
319 | */ | |
320 | static void * | |
321 | spa_txg_history_addr(kstat_t *ksp, loff_t n) | |
322 | { | |
323 | spa_t *spa = ksp->ks_private; | |
324 | spa_stats_history_t *ssh = &spa->spa_stats.txg_history; | |
325 | ||
326 | ASSERT(MUTEX_HELD(&ssh->lock)); | |
327 | ||
328 | if (n == 0) | |
329 | ssh->private = list_tail(&ssh->list); | |
330 | else if (ssh->private) | |
331 | ssh->private = list_prev(&ssh->list, ssh->private); | |
332 | ||
333 | return (ssh->private); | |
334 | } | |
335 | ||
336 | /* | |
4e33ba4c | 337 | * When the kstat is written discard all spa_txg_history_t entries. The |
0b1401ee BB |
338 | * ssh->lock will be held until ksp->ks_ndata entries are processed. |
339 | */ | |
340 | static int | |
341 | spa_txg_history_update(kstat_t *ksp, int rw) | |
342 | { | |
343 | spa_t *spa = ksp->ks_private; | |
344 | spa_stats_history_t *ssh = &spa->spa_stats.txg_history; | |
345 | ||
346 | ASSERT(MUTEX_HELD(&ssh->lock)); | |
347 | ||
348 | if (rw == KSTAT_WRITE) { | |
349 | spa_txg_history_t *sth; | |
350 | ||
351 | while ((sth = list_remove_head(&ssh->list))) { | |
352 | ssh->size--; | |
d1d7e268 | 353 | kmem_free(sth, sizeof (spa_txg_history_t)); |
0b1401ee BB |
354 | } |
355 | ||
356 | ASSERT3U(ssh->size, ==, 0); | |
357 | } | |
358 | ||
359 | ksp->ks_ndata = ssh->size; | |
d1d7e268 | 360 | ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t); |
0b1401ee BB |
361 | |
362 | return (0); | |
363 | } | |
364 | ||
365 | static void | |
366 | spa_txg_history_init(spa_t *spa) | |
367 | { | |
368 | spa_stats_history_t *ssh = &spa->spa_stats.txg_history; | |
761b8ec6 | 369 | char *name; |
0b1401ee BB |
370 | kstat_t *ksp; |
371 | ||
372 | mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); | |
373 | list_create(&ssh->list, sizeof (spa_txg_history_t), | |
374 | offsetof(spa_txg_history_t, sth_link)); | |
375 | ||
376 | ssh->count = 0; | |
377 | ssh->size = 0; | |
378 | ssh->private = NULL; | |
379 | ||
761b8ec6 | 380 | name = kmem_asprintf("zfs/%s", spa_name(spa)); |
0b1401ee BB |
381 | |
382 | ksp = kstat_create(name, 0, "txgs", "misc", | |
383 | KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); | |
384 | ssh->kstat = ksp; | |
385 | ||
386 | if (ksp) { | |
387 | ksp->ks_lock = &ssh->lock; | |
388 | ksp->ks_data = NULL; | |
389 | ksp->ks_private = spa; | |
390 | ksp->ks_update = spa_txg_history_update; | |
391 | kstat_set_raw_ops(ksp, spa_txg_history_headers, | |
392 | spa_txg_history_data, spa_txg_history_addr); | |
393 | kstat_install(ksp); | |
394 | } | |
761b8ec6 | 395 | strfree(name); |
0b1401ee BB |
396 | } |
397 | ||
398 | static void | |
399 | spa_txg_history_destroy(spa_t *spa) | |
400 | { | |
401 | spa_stats_history_t *ssh = &spa->spa_stats.txg_history; | |
402 | spa_txg_history_t *sth; | |
403 | kstat_t *ksp; | |
404 | ||
405 | ksp = ssh->kstat; | |
406 | if (ksp) | |
407 | kstat_delete(ksp); | |
408 | ||
409 | mutex_enter(&ssh->lock); | |
410 | while ((sth = list_remove_head(&ssh->list))) { | |
411 | ssh->size--; | |
d1d7e268 | 412 | kmem_free(sth, sizeof (spa_txg_history_t)); |
0b1401ee BB |
413 | } |
414 | ||
415 | ASSERT3U(ssh->size, ==, 0); | |
416 | list_destroy(&ssh->list); | |
417 | mutex_exit(&ssh->lock); | |
418 | ||
419 | mutex_destroy(&ssh->lock); | |
420 | } | |
421 | ||
422 | /* | |
423 | * Add a new txg to historical record. | |
424 | */ | |
425 | void | |
01b738f4 | 426 | spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time) |
0b1401ee BB |
427 | { |
428 | spa_stats_history_t *ssh = &spa->spa_stats.txg_history; | |
429 | spa_txg_history_t *sth, *rm; | |
430 | ||
431 | if (zfs_txg_history == 0 && ssh->size == 0) | |
432 | return; | |
433 | ||
79c76d5b | 434 | sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP); |
0b1401ee BB |
435 | sth->txg = txg; |
436 | sth->state = TXG_STATE_OPEN; | |
01b738f4 | 437 | sth->times[TXG_STATE_BIRTH] = birth_time; |
0b1401ee BB |
438 | |
439 | mutex_enter(&ssh->lock); | |
440 | ||
441 | list_insert_head(&ssh->list, sth); | |
442 | ssh->size++; | |
443 | ||
444 | while (ssh->size > zfs_txg_history) { | |
445 | ssh->size--; | |
446 | rm = list_remove_tail(&ssh->list); | |
d1d7e268 | 447 | kmem_free(rm, sizeof (spa_txg_history_t)); |
0b1401ee BB |
448 | } |
449 | ||
450 | mutex_exit(&ssh->lock); | |
451 | } | |
452 | ||
453 | /* | |
454 | * Set txg state completion time and increment current state. | |
455 | */ | |
456 | int | |
457 | spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state, | |
458 | hrtime_t completed_time) | |
459 | { | |
460 | spa_stats_history_t *ssh = &spa->spa_stats.txg_history; | |
461 | spa_txg_history_t *sth; | |
462 | int error = ENOENT; | |
463 | ||
464 | if (zfs_txg_history == 0) | |
465 | return (0); | |
466 | ||
467 | mutex_enter(&ssh->lock); | |
468 | for (sth = list_head(&ssh->list); sth != NULL; | |
d1d7e268 | 469 | sth = list_next(&ssh->list, sth)) { |
0b1401ee BB |
470 | if (sth->txg == txg) { |
471 | sth->times[completed_state] = completed_time; | |
472 | sth->state++; | |
473 | error = 0; | |
474 | break; | |
475 | } | |
476 | } | |
477 | mutex_exit(&ssh->lock); | |
478 | ||
479 | return (error); | |
480 | } | |
481 | ||
482 | /* | |
483 | * Set txg IO stats. | |
484 | */ | |
baf67d15 | 485 | static int |
0b1401ee | 486 | spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread, |
3ccab252 | 487 | uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty) |
0b1401ee BB |
488 | { |
489 | spa_stats_history_t *ssh = &spa->spa_stats.txg_history; | |
490 | spa_txg_history_t *sth; | |
491 | int error = ENOENT; | |
492 | ||
493 | if (zfs_txg_history == 0) | |
494 | return (0); | |
495 | ||
496 | mutex_enter(&ssh->lock); | |
497 | for (sth = list_head(&ssh->list); sth != NULL; | |
d1d7e268 | 498 | sth = list_next(&ssh->list, sth)) { |
0b1401ee BB |
499 | if (sth->txg == txg) { |
500 | sth->nread = nread; | |
501 | sth->nwritten = nwritten; | |
502 | sth->reads = reads; | |
503 | sth->writes = writes; | |
3ccab252 | 504 | sth->ndirty = ndirty; |
0b1401ee BB |
505 | error = 0; |
506 | break; | |
507 | } | |
508 | } | |
509 | mutex_exit(&ssh->lock); | |
510 | ||
511 | return (error); | |
512 | } | |
513 | ||
baf67d15 BB |
514 | txg_stat_t * |
515 | spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp) | |
516 | { | |
517 | txg_stat_t *ts; | |
518 | ||
519 | if (zfs_txg_history == 0) | |
520 | return (NULL); | |
521 | ||
522 | ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP); | |
523 | ||
524 | spa_config_enter(spa, SCL_ALL, FTAG, RW_READER); | |
525 | vdev_get_stats(spa->spa_root_vdev, &ts->vs1); | |
526 | spa_config_exit(spa, SCL_ALL, FTAG); | |
527 | ||
528 | ts->txg = txg; | |
529 | ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK]; | |
530 | ||
531 | spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime()); | |
532 | ||
533 | return (ts); | |
534 | } | |
535 | ||
536 | void | |
537 | spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts) | |
538 | { | |
539 | if (ts == NULL) | |
540 | return; | |
541 | ||
542 | if (zfs_txg_history == 0) { | |
543 | kmem_free(ts, sizeof (txg_stat_t)); | |
544 | return; | |
545 | } | |
546 | ||
547 | spa_config_enter(spa, SCL_ALL, FTAG, RW_READER); | |
548 | vdev_get_stats(spa->spa_root_vdev, &ts->vs2); | |
549 | spa_config_exit(spa, SCL_ALL, FTAG); | |
550 | ||
551 | spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime()); | |
552 | spa_txg_history_set_io(spa, ts->txg, | |
553 | ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ], | |
554 | ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE], | |
555 | ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ], | |
556 | ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE], | |
557 | ts->ndirty); | |
558 | ||
559 | kmem_free(ts, sizeof (txg_stat_t)); | |
560 | } | |
561 | ||
2d37239a BB |
562 | /* |
563 | * ========================================================================== | |
564 | * SPA TX Assign Histogram Routines | |
565 | * ========================================================================== | |
566 | */ | |
567 | ||
568 | /* | |
569 | * Tx statistics - Information exported regarding dmu_tx_assign time. | |
570 | */ | |
571 | ||
572 | /* | |
573 | * When the kstat is written zero all buckets. When the kstat is read | |
574 | * count the number of trailing buckets set to zero and update ks_ndata | |
575 | * such that they are not output. | |
576 | */ | |
577 | static int | |
578 | spa_tx_assign_update(kstat_t *ksp, int rw) | |
579 | { | |
580 | spa_t *spa = ksp->ks_private; | |
581 | spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; | |
582 | int i; | |
583 | ||
584 | if (rw == KSTAT_WRITE) { | |
585 | for (i = 0; i < ssh->count; i++) | |
586 | ((kstat_named_t *)ssh->private)[i].value.ui64 = 0; | |
587 | } | |
588 | ||
589 | for (i = ssh->count; i > 0; i--) | |
590 | if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0) | |
591 | break; | |
592 | ||
593 | ksp->ks_ndata = i; | |
d1d7e268 | 594 | ksp->ks_data_size = i * sizeof (kstat_named_t); |
2d37239a BB |
595 | |
596 | return (0); | |
597 | } | |
598 | ||
599 | static void | |
600 | spa_tx_assign_init(spa_t *spa) | |
601 | { | |
602 | spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; | |
761b8ec6 | 603 | char *name; |
2d37239a BB |
604 | kstat_named_t *ks; |
605 | kstat_t *ksp; | |
606 | int i; | |
607 | ||
608 | mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); | |
609 | ||
610 | ssh->count = 42; /* power of two buckets for 1ns to 2,199s */ | |
d1d7e268 | 611 | ssh->size = ssh->count * sizeof (kstat_named_t); |
2d37239a BB |
612 | ssh->private = kmem_alloc(ssh->size, KM_SLEEP); |
613 | ||
761b8ec6 | 614 | name = kmem_asprintf("zfs/%s", spa_name(spa)); |
2d37239a BB |
615 | |
616 | for (i = 0; i < ssh->count; i++) { | |
617 | ks = &((kstat_named_t *)ssh->private)[i]; | |
618 | ks->data_type = KSTAT_DATA_UINT64; | |
619 | ks->value.ui64 = 0; | |
620 | (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns", | |
621 | (u_longlong_t)1 << i); | |
622 | } | |
623 | ||
624 | ksp = kstat_create(name, 0, "dmu_tx_assign", "misc", | |
625 | KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL); | |
626 | ssh->kstat = ksp; | |
627 | ||
628 | if (ksp) { | |
629 | ksp->ks_lock = &ssh->lock; | |
630 | ksp->ks_data = ssh->private; | |
631 | ksp->ks_ndata = ssh->count; | |
632 | ksp->ks_data_size = ssh->size; | |
633 | ksp->ks_private = spa; | |
634 | ksp->ks_update = spa_tx_assign_update; | |
635 | kstat_install(ksp); | |
636 | } | |
761b8ec6 | 637 | strfree(name); |
2d37239a BB |
638 | } |
639 | ||
640 | static void | |
641 | spa_tx_assign_destroy(spa_t *spa) | |
642 | { | |
643 | spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; | |
644 | kstat_t *ksp; | |
645 | ||
646 | ksp = ssh->kstat; | |
647 | if (ksp) | |
648 | kstat_delete(ksp); | |
649 | ||
650 | kmem_free(ssh->private, ssh->size); | |
651 | mutex_destroy(&ssh->lock); | |
652 | } | |
653 | ||
654 | void | |
655 | spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs) | |
656 | { | |
657 | spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; | |
658 | uint64_t idx = 0; | |
659 | ||
4ca9c1de | 660 | while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1)) |
2d37239a BB |
661 | idx++; |
662 | ||
663 | atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64); | |
664 | } | |
665 | ||
330847ff MA |
666 | /* |
667 | * ========================================================================== | |
668 | * SPA IO History Routines | |
669 | * ========================================================================== | |
670 | */ | |
671 | static int | |
672 | spa_io_history_update(kstat_t *ksp, int rw) | |
673 | { | |
674 | if (rw == KSTAT_WRITE) | |
675 | memset(ksp->ks_data, 0, ksp->ks_data_size); | |
676 | ||
677 | return (0); | |
678 | } | |
679 | ||
680 | static void | |
681 | spa_io_history_init(spa_t *spa) | |
682 | { | |
683 | spa_stats_history_t *ssh = &spa->spa_stats.io_history; | |
761b8ec6 | 684 | char *name; |
330847ff MA |
685 | kstat_t *ksp; |
686 | ||
687 | mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); | |
688 | ||
761b8ec6 | 689 | name = kmem_asprintf("zfs/%s", spa_name(spa)); |
330847ff MA |
690 | |
691 | ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0); | |
692 | ssh->kstat = ksp; | |
693 | ||
694 | if (ksp) { | |
695 | ksp->ks_lock = &ssh->lock; | |
696 | ksp->ks_private = spa; | |
697 | ksp->ks_update = spa_io_history_update; | |
698 | kstat_install(ksp); | |
699 | } | |
761b8ec6 | 700 | strfree(name); |
330847ff MA |
701 | } |
702 | ||
703 | static void | |
704 | spa_io_history_destroy(spa_t *spa) | |
705 | { | |
706 | spa_stats_history_t *ssh = &spa->spa_stats.io_history; | |
707 | ||
708 | if (ssh->kstat) | |
709 | kstat_delete(ssh->kstat); | |
710 | ||
711 | mutex_destroy(&ssh->lock); | |
712 | } | |
713 | ||
379ca9cf OF |
714 | /* |
715 | * ========================================================================== | |
716 | * SPA MMP History Routines | |
717 | * ========================================================================== | |
718 | */ | |
719 | ||
720 | /* | |
d2160d05 OF |
721 | * MMP statistics - Information exported regarding attempted MMP writes |
722 | * For MMP writes issued, fields used as per comments below. | |
723 | * For MMP writes skipped, an entry represents a span of time when | |
724 | * writes were skipped for same reason (error from mmp_random_leaf). | |
725 | * Differences are: | |
726 | * timestamp time first write skipped, if >1 skipped in a row | |
727 | * mmp_delay delay value at timestamp | |
728 | * vdev_guid number of writes skipped | |
729 | * io_error one of enum mmp_error | |
730 | * duration time span (ns) of skipped writes | |
379ca9cf OF |
731 | */ |
732 | ||
733 | typedef struct spa_mmp_history { | |
7088545d | 734 | uint64_t mmp_kstat_id; /* unique # for updates */ |
379ca9cf | 735 | uint64_t txg; /* txg of last sync */ |
d2160d05 OF |
736 | uint64_t timestamp; /* UTC time MMP write issued */ |
737 | uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */ | |
379ca9cf OF |
738 | uint64_t vdev_guid; /* unique ID of leaf vdev */ |
739 | char *vdev_path; | |
d2160d05 | 740 | int vdev_label; /* vdev label */ |
7088545d | 741 | int io_error; /* error status of MMP write */ |
d2160d05 | 742 | hrtime_t error_start; /* hrtime of start of error period */ |
7088545d | 743 | hrtime_t duration; /* time from submission to completion */ |
379ca9cf OF |
744 | list_node_t smh_link; |
745 | } spa_mmp_history_t; | |
746 | ||
747 | static int | |
748 | spa_mmp_history_headers(char *buf, size_t size) | |
749 | { | |
7088545d OF |
750 | (void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s " |
751 | "%-10s %s\n", "id", "txg", "timestamp", "error", "duration", | |
752 | "mmp_delay", "vdev_guid", "vdev_label", "vdev_path"); | |
379ca9cf OF |
753 | return (0); |
754 | } | |
755 | ||
756 | static int | |
757 | spa_mmp_history_data(char *buf, size_t size, void *data) | |
758 | { | |
759 | spa_mmp_history_t *smh = (spa_mmp_history_t *)data; | |
d2160d05 OF |
760 | char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu " |
761 | "%-10lld %s\n"; | |
762 | char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu " | |
763 | "%-10lld %s\n"; | |
379ca9cf | 764 | |
d2160d05 | 765 | (void) snprintf(buf, size, (smh->error_start ? skip_fmt : write_fmt), |
7088545d OF |
766 | (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg, |
767 | (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error, | |
768 | (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay, | |
769 | (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label, | |
379ca9cf OF |
770 | (smh->vdev_path ? smh->vdev_path : "-")); |
771 | ||
772 | return (0); | |
773 | } | |
774 | ||
775 | /* | |
776 | * Calculate the address for the next spa_stats_history_t entry. The | |
777 | * ssh->lock will be held until ksp->ks_ndata entries are processed. | |
778 | */ | |
779 | static void * | |
780 | spa_mmp_history_addr(kstat_t *ksp, loff_t n) | |
781 | { | |
782 | spa_t *spa = ksp->ks_private; | |
783 | spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; | |
784 | ||
785 | ASSERT(MUTEX_HELD(&ssh->lock)); | |
786 | ||
787 | if (n == 0) | |
788 | ssh->private = list_tail(&ssh->list); | |
789 | else if (ssh->private) | |
790 | ssh->private = list_prev(&ssh->list, ssh->private); | |
791 | ||
792 | return (ssh->private); | |
793 | } | |
794 | ||
795 | /* | |
796 | * When the kstat is written discard all spa_mmp_history_t entries. The | |
797 | * ssh->lock will be held until ksp->ks_ndata entries are processed. | |
798 | */ | |
799 | static int | |
800 | spa_mmp_history_update(kstat_t *ksp, int rw) | |
801 | { | |
802 | spa_t *spa = ksp->ks_private; | |
803 | spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; | |
804 | ||
805 | ASSERT(MUTEX_HELD(&ssh->lock)); | |
806 | ||
807 | if (rw == KSTAT_WRITE) { | |
808 | spa_mmp_history_t *smh; | |
809 | ||
810 | while ((smh = list_remove_head(&ssh->list))) { | |
811 | ssh->size--; | |
812 | if (smh->vdev_path) | |
813 | strfree(smh->vdev_path); | |
814 | kmem_free(smh, sizeof (spa_mmp_history_t)); | |
815 | } | |
816 | ||
817 | ASSERT3U(ssh->size, ==, 0); | |
818 | } | |
819 | ||
820 | ksp->ks_ndata = ssh->size; | |
821 | ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t); | |
822 | ||
823 | return (0); | |
824 | } | |
825 | ||
826 | static void | |
827 | spa_mmp_history_init(spa_t *spa) | |
828 | { | |
829 | spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; | |
761b8ec6 | 830 | char *name; |
379ca9cf OF |
831 | kstat_t *ksp; |
832 | ||
833 | mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); | |
834 | list_create(&ssh->list, sizeof (spa_mmp_history_t), | |
835 | offsetof(spa_mmp_history_t, smh_link)); | |
836 | ||
837 | ssh->count = 0; | |
838 | ssh->size = 0; | |
839 | ssh->private = NULL; | |
840 | ||
761b8ec6 | 841 | name = kmem_asprintf("zfs/%s", spa_name(spa)); |
379ca9cf OF |
842 | |
843 | ksp = kstat_create(name, 0, "multihost", "misc", | |
844 | KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); | |
845 | ssh->kstat = ksp; | |
846 | ||
847 | if (ksp) { | |
848 | ksp->ks_lock = &ssh->lock; | |
849 | ksp->ks_data = NULL; | |
850 | ksp->ks_private = spa; | |
851 | ksp->ks_update = spa_mmp_history_update; | |
852 | kstat_set_raw_ops(ksp, spa_mmp_history_headers, | |
853 | spa_mmp_history_data, spa_mmp_history_addr); | |
854 | kstat_install(ksp); | |
855 | } | |
761b8ec6 | 856 | strfree(name); |
379ca9cf OF |
857 | } |
858 | ||
859 | static void | |
860 | spa_mmp_history_destroy(spa_t *spa) | |
861 | { | |
862 | spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; | |
863 | spa_mmp_history_t *smh; | |
864 | kstat_t *ksp; | |
865 | ||
866 | ksp = ssh->kstat; | |
867 | if (ksp) | |
868 | kstat_delete(ksp); | |
869 | ||
870 | mutex_enter(&ssh->lock); | |
871 | while ((smh = list_remove_head(&ssh->list))) { | |
872 | ssh->size--; | |
873 | if (smh->vdev_path) | |
874 | strfree(smh->vdev_path); | |
875 | kmem_free(smh, sizeof (spa_mmp_history_t)); | |
876 | } | |
877 | ||
878 | ASSERT3U(ssh->size, ==, 0); | |
879 | list_destroy(&ssh->list); | |
880 | mutex_exit(&ssh->lock); | |
881 | ||
882 | mutex_destroy(&ssh->lock); | |
883 | } | |
884 | ||
d2160d05 OF |
885 | /* |
886 | * Set duration in existing "skip" record to how long we have waited for a leaf | |
887 | * vdev to become available. | |
888 | * | |
889 | * Important that we start search at the head of the list where new | |
890 | * records are inserted, so this is normally an O(1) operation. | |
891 | */ | |
892 | int | |
893 | spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id) | |
894 | { | |
895 | spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; | |
896 | spa_mmp_history_t *smh; | |
897 | int error = ENOENT; | |
898 | ||
899 | if (zfs_multihost_history == 0 && ssh->size == 0) | |
900 | return (0); | |
901 | ||
902 | mutex_enter(&ssh->lock); | |
903 | for (smh = list_head(&ssh->list); smh != NULL; | |
904 | smh = list_next(&ssh->list, smh)) { | |
905 | if (smh->mmp_kstat_id == mmp_kstat_id) { | |
906 | ASSERT3U(smh->io_error, !=, 0); | |
907 | smh->duration = gethrtime() - smh->error_start; | |
908 | smh->vdev_guid++; | |
909 | error = 0; | |
910 | break; | |
911 | } | |
912 | } | |
913 | mutex_exit(&ssh->lock); | |
914 | ||
915 | return (error); | |
916 | } | |
917 | ||
379ca9cf | 918 | /* |
7088545d | 919 | * Set MMP write duration and error status in existing record. |
d2160d05 | 920 | * See comment re: search order above spa_mmp_history_set_skip(). |
7088545d OF |
921 | */ |
922 | int | |
923 | spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error, | |
924 | hrtime_t duration) | |
925 | { | |
926 | spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; | |
927 | spa_mmp_history_t *smh; | |
928 | int error = ENOENT; | |
929 | ||
930 | if (zfs_multihost_history == 0 && ssh->size == 0) | |
931 | return (0); | |
932 | ||
933 | mutex_enter(&ssh->lock); | |
934 | for (smh = list_head(&ssh->list); smh != NULL; | |
935 | smh = list_next(&ssh->list, smh)) { | |
936 | if (smh->mmp_kstat_id == mmp_kstat_id) { | |
d2160d05 | 937 | ASSERT(smh->io_error == 0); |
7088545d OF |
938 | smh->io_error = io_error; |
939 | smh->duration = duration; | |
940 | error = 0; | |
941 | break; | |
942 | } | |
943 | } | |
944 | mutex_exit(&ssh->lock); | |
945 | ||
946 | return (error); | |
947 | } | |
948 | ||
949 | /* | |
d2160d05 OF |
950 | * Add a new MMP historical record. |
951 | * error == 0 : a write was issued. | |
952 | * error != 0 : a write was not issued because no leaves were found. | |
379ca9cf | 953 | */ |
d2160d05 OF |
954 | void * |
955 | spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, | |
956 | uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id, | |
957 | int error) | |
379ca9cf | 958 | { |
379ca9cf OF |
959 | spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; |
960 | spa_mmp_history_t *smh, *rm; | |
961 | ||
962 | if (zfs_multihost_history == 0 && ssh->size == 0) | |
d2160d05 | 963 | return (NULL); |
379ca9cf OF |
964 | |
965 | smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP); | |
966 | smh->txg = txg; | |
967 | smh->timestamp = timestamp; | |
968 | smh->mmp_delay = mmp_delay; | |
d2160d05 OF |
969 | if (vd) { |
970 | smh->vdev_guid = vd->vdev_guid; | |
971 | if (vd->vdev_path) | |
972 | smh->vdev_path = strdup(vd->vdev_path); | |
973 | } | |
379ca9cf | 974 | smh->vdev_label = label; |
7088545d | 975 | smh->mmp_kstat_id = mmp_kstat_id; |
379ca9cf | 976 | |
d2160d05 OF |
977 | if (error) { |
978 | smh->io_error = error; | |
979 | smh->error_start = gethrtime(); | |
980 | smh->vdev_guid = 1; | |
981 | } | |
982 | ||
379ca9cf OF |
983 | mutex_enter(&ssh->lock); |
984 | ||
985 | list_insert_head(&ssh->list, smh); | |
986 | ssh->size++; | |
987 | ||
988 | while (ssh->size > zfs_multihost_history) { | |
989 | ssh->size--; | |
990 | rm = list_remove_tail(&ssh->list); | |
991 | if (rm->vdev_path) | |
992 | strfree(rm->vdev_path); | |
993 | kmem_free(rm, sizeof (spa_mmp_history_t)); | |
994 | } | |
995 | ||
996 | mutex_exit(&ssh->lock); | |
d2160d05 | 997 | return ((void *)smh); |
379ca9cf OF |
998 | } |
999 | ||
1421c891 PS |
1000 | void |
1001 | spa_stats_init(spa_t *spa) | |
1002 | { | |
1003 | spa_read_history_init(spa); | |
0b1401ee | 1004 | spa_txg_history_init(spa); |
2d37239a | 1005 | spa_tx_assign_init(spa); |
330847ff | 1006 | spa_io_history_init(spa); |
379ca9cf | 1007 | spa_mmp_history_init(spa); |
1421c891 PS |
1008 | } |
1009 | ||
1010 | void | |
1011 | spa_stats_destroy(spa_t *spa) | |
1012 | { | |
2d37239a | 1013 | spa_tx_assign_destroy(spa); |
0b1401ee | 1014 | spa_txg_history_destroy(spa); |
1421c891 | 1015 | spa_read_history_destroy(spa); |
330847ff | 1016 | spa_io_history_destroy(spa); |
379ca9cf | 1017 | spa_mmp_history_destroy(spa); |
1421c891 PS |
1018 | } |
1019 | ||
1020 | #if defined(_KERNEL) && defined(HAVE_SPL) | |
379ca9cf | 1021 | /* CSTYLED */ |
1421c891 | 1022 | module_param(zfs_read_history, int, 0644); |
379ca9cf OF |
1023 | MODULE_PARM_DESC(zfs_read_history, |
1024 | "Historical statistics for the last N reads"); | |
1421c891 PS |
1025 | |
1026 | module_param(zfs_read_history_hits, int, 0644); | |
379ca9cf OF |
1027 | MODULE_PARM_DESC(zfs_read_history_hits, |
1028 | "Include cache hits in read history"); | |
0b1401ee BB |
1029 | |
1030 | module_param(zfs_txg_history, int, 0644); | |
379ca9cf OF |
1031 | MODULE_PARM_DESC(zfs_txg_history, |
1032 | "Historical statistics for the last N txgs"); | |
1033 | ||
1034 | module_param(zfs_multihost_history, int, 0644); | |
1035 | MODULE_PARM_DESC(zfs_multihost_history, | |
1036 | "Historical statistics for last N multihost writes"); | |
1037 | /* END CSTYLED */ | |
1421c891 | 1038 | #endif |