]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/dmu_traverse.c
Add missing ZFS tunables
[mirror_zfs.git] / module / zfs / dmu_traverse.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
34dc7c2f
BB
23 */
24
34dc7c2f
BB
25#include <sys/zfs_context.h>
26#include <sys/dmu_objset.h>
27#include <sys/dmu_traverse.h>
28#include <sys/dsl_dataset.h>
29#include <sys/dsl_dir.h>
30#include <sys/dsl_pool.h>
31#include <sys/dnode.h>
32#include <sys/spa.h>
33#include <sys/zio.h>
34#include <sys/dmu_impl.h>
428870ff
BB
35#include <sys/sa.h>
36#include <sys/sa_impl.h>
b128c09f
BB
37#include <sys/callb.h>
38
572e2857
BB
39int zfs_pd_blks_max = 100;
40
41typedef struct prefetch_data {
b128c09f
BB
42 kmutex_t pd_mtx;
43 kcondvar_t pd_cv;
44 int pd_blks_max;
45 int pd_blks_fetched;
46 int pd_flags;
47 boolean_t pd_cancel;
48 boolean_t pd_exited;
572e2857 49} prefetch_data_t;
b128c09f 50
572e2857 51typedef struct traverse_data {
b128c09f
BB
52 spa_t *td_spa;
53 uint64_t td_objset;
54 blkptr_t *td_rootbp;
55 uint64_t td_min_txg;
56 int td_flags;
572e2857 57 prefetch_data_t *td_pfd;
b128c09f
BB
58 blkptr_cb_t *td_func;
59 void *td_arg;
572e2857 60} traverse_data_t;
34dc7c2f 61
6656bf56
BB
62typedef struct traverse_visitbp_data {
63 /* Function arguments */
64 traverse_data_t *tv_td;
65 const dnode_phys_t *tv_dnp;
66 arc_buf_t *tv_pbuf;
67 blkptr_t *tv_bp;
68 const zbookmark_t *tv_zb;
69 /* Local variables */
70 prefetch_data_t *tv_pd;
71 zbookmark_t tv_czb;
72 arc_buf_t *tv_buf;
73 boolean_t tv_hard;
74 objset_phys_t *tv_osp;
75 dnode_phys_t *tv_ldnp;
76 blkptr_t *tv_cbp;
77 uint32_t tv_flags;
78 int tv_err;
79 int tv_lasterr;
80 int tv_i;
81 int tv_epb;
82 int tv_depth;
83} traverse_visitbp_data_t;
84
85static inline int traverse_visitbp(traverse_data_t *td, const
86 dnode_phys_t *dnp, arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb);
572e2857 87static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
9babb374
BB
88 arc_buf_t *buf, uint64_t objset, uint64_t object);
89
428870ff 90static int
34dc7c2f
BB
91traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
92{
572e2857 93 traverse_data_t *td = arg;
b128c09f 94 zbookmark_t zb;
34dc7c2f 95
b128c09f 96 if (bp->blk_birth == 0)
428870ff 97 return (0);
34dc7c2f 98
b128c09f 99 if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
428870ff
BB
100 return (0);
101
102 SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
103 bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
104
105 (void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg);
b128c09f 106
428870ff 107 return (0);
34dc7c2f
BB
108}
109
428870ff 110static int
34dc7c2f
BB
111traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
112{
572e2857 113 traverse_data_t *td = arg;
34dc7c2f
BB
114
115 if (lrc->lrc_txtype == TX_WRITE) {
116 lr_write_t *lr = (lr_write_t *)lrc;
117 blkptr_t *bp = &lr->lr_blkptr;
b128c09f 118 zbookmark_t zb;
34dc7c2f 119
b128c09f 120 if (bp->blk_birth == 0)
428870ff 121 return (0);
34dc7c2f 122
b128c09f 123 if (claim_txg == 0 || bp->blk_birth < claim_txg)
428870ff 124 return (0);
b128c09f 125
572e2857
BB
126 SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
127 ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
428870ff
BB
128
129 (void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL,
130 td->td_arg);
34dc7c2f 131 }
428870ff 132 return (0);
34dc7c2f
BB
133}
134
135static void
572e2857 136traverse_zil(traverse_data_t *td, zil_header_t *zh)
34dc7c2f 137{
34dc7c2f
BB
138 uint64_t claim_txg = zh->zh_claim_txg;
139 zilog_t *zilog;
140
34dc7c2f
BB
141 /*
142 * We only want to visit blocks that have been claimed but not yet
428870ff 143 * replayed; plus, in read-only mode, blocks that are already stable.
34dc7c2f 144 */
fb5f0bc8 145 if (claim_txg == 0 && spa_writeable(td->td_spa))
34dc7c2f
BB
146 return;
147
b128c09f 148 zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
34dc7c2f 149
b128c09f 150 (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
34dc7c2f
BB
151 claim_txg);
152
153 zil_free(zilog);
154}
155
6656bf56
BB
156#define TRAVERSE_VISITBP_MAX_DEPTH 20
157
158static void
159__traverse_visitbp_init(traverse_visitbp_data_t *tv,
160 traverse_data_t *td, const dnode_phys_t *dnp,
161 arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb, int depth)
34dc7c2f 162{
6656bf56
BB
163 tv->tv_td = td;
164 tv->tv_dnp = dnp;
165 tv->tv_pbuf = pbuf;
166 tv->tv_bp = bp;
167 tv->tv_zb = zb;
168 tv->tv_err = 0;
169 tv->tv_lasterr = 0;
170 tv->tv_buf = NULL;
171 tv->tv_pd = td->td_pfd;
172 tv->tv_hard = td->td_flags & TRAVERSE_HARD;
173 tv->tv_flags = ARC_WAIT;
174 tv->tv_depth = depth;
175}
b128c09f 176
6656bf56
BB
177static noinline int
178__traverse_visitbp(traverse_visitbp_data_t *tv)
179{
180 ASSERT3S(tv->tv_depth, <, TRAVERSE_VISITBP_MAX_DEPTH);
181
182 if (tv->tv_bp->blk_birth == 0) {
183 tv->tv_err = tv->tv_td->td_func(tv->tv_td->td_spa, NULL, NULL,
184 tv->tv_pbuf, tv->tv_zb, tv->tv_dnp, tv->tv_td->td_arg);
185 return (tv->tv_err);
34dc7c2f
BB
186 }
187
6656bf56 188 if (tv->tv_bp->blk_birth <= tv->tv_td->td_min_txg)
b128c09f 189 return (0);
34dc7c2f 190
6656bf56
BB
191 if (tv->tv_pd && !tv->tv_pd->pd_exited &&
192 ((tv->tv_pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
193 BP_GET_TYPE(tv->tv_bp) == DMU_OT_DNODE ||
194 BP_GET_LEVEL(tv->tv_bp) > 0)) {
195 mutex_enter(&tv->tv_pd->pd_mtx);
196 ASSERT(tv->tv_pd->pd_blks_fetched >= 0);
197 while (tv->tv_pd->pd_blks_fetched == 0 && !tv->tv_pd->pd_exited)
198 cv_wait(&tv->tv_pd->pd_cv, &tv->tv_pd->pd_mtx);
199 tv->tv_pd->pd_blks_fetched--;
200 cv_broadcast(&tv->tv_pd->pd_cv);
201 mutex_exit(&tv->tv_pd->pd_mtx);
34dc7c2f
BB
202 }
203
6656bf56
BB
204 if (tv->tv_td->td_flags & TRAVERSE_PRE) {
205 tv->tv_err = tv->tv_td->td_func(tv->tv_td->td_spa, NULL,
206 tv->tv_bp, tv->tv_pbuf, tv->tv_zb, tv->tv_dnp,
207 tv->tv_td->td_arg);
208 if (tv->tv_err == TRAVERSE_VISIT_NO_CHILDREN)
572e2857 209 return (0);
6656bf56
BB
210 if (tv->tv_err)
211 return (tv->tv_err);
34dc7c2f
BB
212 }
213
6656bf56
BB
214 if (BP_GET_LEVEL(tv->tv_bp) > 0) {
215 tv->tv_epb = BP_GET_LSIZE(tv->tv_bp) >> SPA_BLKPTRSHIFT;
b128c09f 216
6656bf56
BB
217 tv->tv_err = dsl_read(NULL, tv->tv_td->td_spa, tv->tv_bp,
218 tv->tv_pbuf, arc_getbuf_func, &tv->tv_buf,
219 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
220 &tv->tv_flags, tv->tv_zb);
221 if (tv->tv_err)
222 return (tv->tv_err);
b128c09f
BB
223
224 /* recursively visitbp() blocks below this */
6656bf56
BB
225 tv->tv_cbp = tv->tv_buf->b_data;
226 for (tv->tv_i = 0; tv->tv_i < tv->tv_epb;
227 tv->tv_i++, tv->tv_cbp++) {
228 SET_BOOKMARK(&tv->tv_czb, tv->tv_zb->zb_objset,
229 tv->tv_zb->zb_object, tv->tv_zb->zb_level - 1,
230 tv->tv_zb->zb_blkid * tv->tv_epb + tv->tv_i);
231 __traverse_visitbp_init(tv + 1, tv->tv_td,
232 tv->tv_dnp, tv->tv_buf, tv->tv_cbp,
233 &tv->tv_czb, tv->tv_depth + 1);
234 tv->tv_err = __traverse_visitbp(tv + 1);
235 if (tv->tv_err) {
236 if (!tv->tv_hard)
428870ff 237 break;
6656bf56 238 tv->tv_lasterr = tv->tv_err;
428870ff 239 }
b128c09f 240 }
6656bf56
BB
241 } else if (BP_GET_TYPE(tv->tv_bp) == DMU_OT_DNODE) {
242 tv->tv_epb = BP_GET_LSIZE(tv->tv_bp) >> DNODE_SHIFT;
b128c09f 243
6656bf56
BB
244 tv->tv_err = dsl_read(NULL, tv->tv_td->td_spa, tv->tv_bp,
245 tv->tv_pbuf, arc_getbuf_func, &tv->tv_buf,
246 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
247 &tv->tv_flags, tv->tv_zb);
248 if (tv->tv_err)
249 return (tv->tv_err);
b128c09f
BB
250
251 /* recursively visitbp() blocks below this */
6656bf56
BB
252 tv->tv_dnp = tv->tv_buf->b_data;
253 for (tv->tv_i = 0; tv->tv_i < tv->tv_epb;
254 tv->tv_i++, tv->tv_dnp++) {
255 tv->tv_err = traverse_dnode(tv->tv_td, tv->tv_dnp,
256 tv->tv_buf, tv->tv_zb->zb_objset,
257 tv->tv_zb->zb_blkid * tv->tv_epb + tv->tv_i);
258 if (tv->tv_err) {
259 if (!tv->tv_hard)
428870ff 260 break;
6656bf56 261 tv->tv_lasterr = tv->tv_err;
428870ff 262 }
34dc7c2f 263 }
6656bf56
BB
264 } else if (BP_GET_TYPE(tv->tv_bp) == DMU_OT_OBJSET) {
265
266 tv->tv_err = dsl_read_nolock(NULL, tv->tv_td->td_spa,
267 tv->tv_bp, arc_getbuf_func, &tv->tv_buf,
268 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
269 &tv->tv_flags, tv->tv_zb);
270 if (tv->tv_err)
271 return (tv->tv_err);
272
273 tv->tv_osp = tv->tv_buf->b_data;
274 tv->tv_ldnp = &tv->tv_osp->os_meta_dnode;
275 tv->tv_err = traverse_dnode(tv->tv_td, tv->tv_ldnp, tv->tv_buf,
276 tv->tv_zb->zb_objset, DMU_META_DNODE_OBJECT);
277 if (tv->tv_err && tv->tv_hard) {
278 tv->tv_lasterr = tv->tv_err;
279 tv->tv_err = 0;
428870ff 280 }
6656bf56
BB
281 if (tv->tv_err == 0 &&
282 arc_buf_size(tv->tv_buf) >= sizeof (objset_phys_t)) {
283 tv->tv_ldnp = &tv->tv_osp->os_userused_dnode;
284 tv->tv_err = traverse_dnode(tv->tv_td, tv->tv_ldnp,
285 tv->tv_buf, tv->tv_zb->zb_objset,
9babb374
BB
286 DMU_USERUSED_OBJECT);
287 }
6656bf56
BB
288 if (tv->tv_err && tv->tv_hard) {
289 tv->tv_lasterr = tv->tv_err;
290 tv->tv_err = 0;
428870ff 291 }
6656bf56
BB
292 if (tv->tv_err == 0 &&
293 arc_buf_size(tv->tv_buf) >= sizeof (objset_phys_t)) {
294 tv->tv_ldnp = &tv->tv_osp->os_groupused_dnode;
295 tv->tv_err = traverse_dnode(tv->tv_td, tv->tv_ldnp,
296 tv->tv_buf, tv->tv_zb->zb_objset,
9babb374 297 DMU_GROUPUSED_OBJECT);
34dc7c2f 298 }
34dc7c2f
BB
299 }
300
6656bf56
BB
301 if (tv->tv_buf)
302 (void) arc_buf_remove_ref(tv->tv_buf, &tv->tv_buf);
34dc7c2f 303
6656bf56
BB
304 if (tv->tv_err == 0 && tv->tv_lasterr == 0 &&
305 (tv->tv_td->td_flags & TRAVERSE_POST)) {
306 tv->tv_err = tv->tv_td->td_func(tv->tv_td->td_spa, NULL,
307 tv->tv_bp, tv->tv_pbuf, tv->tv_zb, tv->tv_dnp,
308 tv->tv_td->td_arg);
428870ff 309 }
34dc7c2f 310
6656bf56
BB
311 return (tv->tv_err != 0 ? tv->tv_err : tv->tv_lasterr);
312}
313
314/*
315 * Due to limited stack space recursive functions are frowned upon in
316 * the Linux kernel. However, they often are the most elegant solution
317 * to a problem. The following code preserves the recursive function
318 * traverse_visitbp() but moves the local variables AND function
319 * arguments to the heap to minimize the stack frame size. Enough
320 * space is initially allocated on the stack for 16 levels of recursion.
321 * This change does ugly-up-the-code but it reduces the worst case
322 * usage from roughly 2496 bytes to 576 bytes on x86_64 archs.
323 */
324static int
325traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
326 arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
327{
328 traverse_visitbp_data_t *tv;
329 int error;
330
331 tv = kmem_zalloc(sizeof(traverse_visitbp_data_t) *
332 TRAVERSE_VISITBP_MAX_DEPTH, KM_SLEEP);
333 __traverse_visitbp_init(tv, td, dnp, pbuf, bp, zb, 0);
334
335 error = __traverse_visitbp(tv);
336
337 kmem_free(tv, sizeof(traverse_visitbp_data_t) *
338 TRAVERSE_VISITBP_MAX_DEPTH);
339
340 return (error);
34dc7c2f
BB
341}
342
9babb374 343static int
572e2857 344traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
9babb374
BB
345 arc_buf_t *buf, uint64_t objset, uint64_t object)
346{
428870ff 347 int j, err = 0, lasterr = 0;
9babb374 348 zbookmark_t czb;
428870ff 349 boolean_t hard = (td->td_flags & TRAVERSE_HARD);
9babb374
BB
350
351 for (j = 0; j < dnp->dn_nblkptr; j++) {
352 SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
353 err = traverse_visitbp(td, dnp, buf,
354 (blkptr_t *)&dnp->dn_blkptr[j], &czb);
428870ff
BB
355 if (err) {
356 if (!hard)
357 break;
358 lasterr = err;
359 }
9babb374 360 }
428870ff
BB
361
362 if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
363 SET_BOOKMARK(&czb, objset,
364 object, 0, DMU_SPILL_BLKID);
365 err = traverse_visitbp(td, dnp, buf,
366 (blkptr_t *)&dnp->dn_spill, &czb);
367 if (err) {
368 if (!hard)
369 return (err);
370 lasterr = err;
371 }
372 }
373 return (err != 0 ? err : lasterr);
9babb374
BB
374}
375
b128c09f
BB
376/* ARGSUSED */
377static int
428870ff
BB
378traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
379 arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp,
380 void *arg)
34dc7c2f 381{
572e2857 382 prefetch_data_t *pfd = arg;
b128c09f 383 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
34dc7c2f 384
b128c09f
BB
385 ASSERT(pfd->pd_blks_fetched >= 0);
386 if (pfd->pd_cancel)
387 return (EINTR);
34dc7c2f 388
b128c09f 389 if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
428870ff
BB
390 BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
391 BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
34dc7c2f
BB
392 return (0);
393
b128c09f
BB
394 mutex_enter(&pfd->pd_mtx);
395 while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max)
396 cv_wait(&pfd->pd_cv, &pfd->pd_mtx);
397 pfd->pd_blks_fetched++;
398 cv_broadcast(&pfd->pd_cv);
399 mutex_exit(&pfd->pd_mtx);
34dc7c2f 400
428870ff 401 (void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL,
b128c09f
BB
402 ZIO_PRIORITY_ASYNC_READ,
403 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
404 &aflags, zb);
34dc7c2f 405
b128c09f 406 return (0);
34dc7c2f
BB
407}
408
34dc7c2f 409static void
b128c09f 410traverse_prefetch_thread(void *arg)
34dc7c2f 411{
572e2857
BB
412 traverse_data_t *td_main = arg;
413 traverse_data_t td = *td_main;
b128c09f 414 zbookmark_t czb;
34dc7c2f 415
b128c09f
BB
416 td.td_func = traverse_prefetcher;
417 td.td_arg = td_main->td_pfd;
418 td.td_pfd = NULL;
34dc7c2f 419
428870ff
BB
420 SET_BOOKMARK(&czb, td.td_objset,
421 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
b128c09f 422 (void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb);
34dc7c2f 423
b128c09f
BB
424 mutex_enter(&td_main->td_pfd->pd_mtx);
425 td_main->td_pfd->pd_exited = B_TRUE;
426 cv_broadcast(&td_main->td_pfd->pd_cv);
427 mutex_exit(&td_main->td_pfd->pd_mtx);
34dc7c2f
BB
428}
429
b128c09f
BB
430/*
431 * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
432 * in syncing context).
433 */
434static int
572e2857 435traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
b128c09f 436 uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
34dc7c2f 437{
47050a88
BB
438 traverse_data_t *td;
439 prefetch_data_t *pd;
440 zbookmark_t *czb;
b128c09f 441 int err;
34dc7c2f 442
47050a88
BB
443 td = kmem_alloc(sizeof(traverse_data_t), KM_SLEEP);
444 pd = kmem_zalloc(sizeof(prefetch_data_t), KM_SLEEP);
445 czb = kmem_alloc(sizeof(zbookmark_t), KM_SLEEP);
446
447 td->td_spa = spa;
448 td->td_objset = ds ? ds->ds_object : 0;
449 td->td_rootbp = rootbp;
450 td->td_min_txg = txg_start;
451 td->td_func = func;
452 td->td_arg = arg;
453 td->td_pfd = pd;
454 td->td_flags = flags;
b128c09f 455
47050a88
BB
456 pd->pd_blks_max = zfs_pd_blks_max;
457 pd->pd_flags = flags;
458 mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
459 cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
b128c09f 460
572e2857
BB
461 /* See comment on ZIL traversal in dsl_scan_visitds. */
462 if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
463 objset_t *os;
464
465 err = dmu_objset_from_ds(ds, &os);
466 if (err)
467 return (err);
468
47050a88 469 traverse_zil(td, &os->os_zil_header);
572e2857
BB
470 }
471
b128c09f
BB
472 if (!(flags & TRAVERSE_PREFETCH) ||
473 0 == taskq_dispatch(system_taskq, traverse_prefetch_thread,
47050a88
BB
474 td, TQ_NOQUEUE))
475 pd->pd_exited = B_TRUE;
b128c09f 476
47050a88 477 SET_BOOKMARK(czb, td->td_objset,
428870ff 478 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
47050a88
BB
479 err = traverse_visitbp(td, NULL, NULL, rootbp, czb);
480
481 mutex_enter(&pd->pd_mtx);
482 pd->pd_cancel = B_TRUE;
483 cv_broadcast(&pd->pd_cv);
484 while (!pd->pd_exited)
485 cv_wait(&pd->pd_cv, &pd->pd_mtx);
486 mutex_exit(&pd->pd_mtx);
b128c09f 487
47050a88
BB
488 mutex_destroy(&pd->pd_mtx);
489 cv_destroy(&pd->pd_cv);
b128c09f 490
47050a88
BB
491 kmem_free(czb, sizeof(zbookmark_t));
492 kmem_free(pd, sizeof(struct prefetch_data));
493 kmem_free(td, sizeof(struct traverse_data));
34dc7c2f 494
b128c09f 495 return (err);
34dc7c2f
BB
496}
497
b128c09f
BB
498/*
499 * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
500 * in syncing context).
501 */
502int
503traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
504 blkptr_cb_t func, void *arg)
34dc7c2f 505{
572e2857 506 return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
b128c09f 507 &ds->ds_phys->ds_bp, txg_start, flags, func, arg));
34dc7c2f
BB
508}
509
b128c09f
BB
510/*
511 * NB: pool must not be changing on-disk (eg, from zdb or sync context).
512 */
513int
428870ff
BB
514traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
515 blkptr_cb_t func, void *arg)
34dc7c2f 516{
428870ff 517 int err, lasterr = 0;
b128c09f
BB
518 uint64_t obj;
519 dsl_pool_t *dp = spa_get_dsl(spa);
520 objset_t *mos = dp->dp_meta_objset;
428870ff 521 boolean_t hard = (flags & TRAVERSE_HARD);
b128c09f
BB
522
523 /* visit the MOS */
572e2857 524 err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
428870ff 525 txg_start, flags, func, arg);
b128c09f
BB
526 if (err)
527 return (err);
528
529 /* visit each dataset */
428870ff
BB
530 for (obj = 1; err == 0 || (err != ESRCH && hard);
531 err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
b128c09f
BB
532 dmu_object_info_t doi;
533
534 err = dmu_object_info(mos, obj, &doi);
428870ff
BB
535 if (err) {
536 if (!hard)
537 return (err);
538 lasterr = err;
539 continue;
540 }
b128c09f
BB
541
542 if (doi.doi_type == DMU_OT_DSL_DATASET) {
543 dsl_dataset_t *ds;
428870ff
BB
544 uint64_t txg = txg_start;
545
b128c09f
BB
546 rw_enter(&dp->dp_config_rwlock, RW_READER);
547 err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
548 rw_exit(&dp->dp_config_rwlock);
428870ff
BB
549 if (err) {
550 if (!hard)
551 return (err);
552 lasterr = err;
553 continue;
554 }
555 if (ds->ds_phys->ds_prev_snap_txg > txg)
556 txg = ds->ds_phys->ds_prev_snap_txg;
557 err = traverse_dataset(ds, txg, flags, func, arg);
b128c09f 558 dsl_dataset_rele(ds, FTAG);
428870ff
BB
559 if (err) {
560 if (!hard)
561 return (err);
562 lasterr = err;
563 }
b128c09f 564 }
34dc7c2f 565 }
b128c09f
BB
566 if (err == ESRCH)
567 err = 0;
428870ff 568 return (err != 0 ? err : lasterr);
34dc7c2f 569}
c28b2279
BB
570
571#if defined(_KERNEL) && defined(HAVE_SPL)
572EXPORT_SYMBOL(traverse_dataset);
573EXPORT_SYMBOL(traverse_pool);
c409e464
BB
574
575module_param(zfs_pd_blks_max, int, 0644);
576MODULE_PARM_DESC(zfs_pd_blks_max, "Max number of blocks to prefetch");
c28b2279 577#endif