]>
Commit | Line | Data |
---|---|---|
428870ff BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
428870ff BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
37f03da8 | 23 | * Copyright (c) 2011, 2018 by Delphix. All rights reserved. |
9210e43a | 24 | * Copyright (c) 2017 Datto Inc. |
428870ff BB |
25 | */ |
26 | ||
27 | #include <sys/bpobj.h> | |
28 | #include <sys/zfs_context.h> | |
27d96d22 | 29 | #include <sys/zfs_refcount.h> |
330d06f9 | 30 | #include <sys/dsl_pool.h> |
753c3839 MA |
31 | #include <sys/zfeature.h> |
32 | #include <sys/zap.h> | |
33 | ||
34 | /* | |
35 | * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj). | |
36 | */ | |
37 | uint64_t | |
38 | bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx) | |
39 | { | |
753c3839 MA |
40 | spa_t *spa = dmu_objset_spa(os); |
41 | dsl_pool_t *dp = dmu_objset_pool(os); | |
42 | ||
fa86b5db MA |
43 | if (spa_feature_is_enabled(spa, SPA_FEATURE_EMPTY_BPOBJ)) { |
44 | if (!spa_feature_is_active(spa, SPA_FEATURE_EMPTY_BPOBJ)) { | |
c99c9001 | 45 | ASSERT0(dp->dp_empty_bpobj); |
753c3839 | 46 | dp->dp_empty_bpobj = |
f1512ee6 | 47 | bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx); |
753c3839 MA |
48 | VERIFY(zap_add(os, |
49 | DMU_POOL_DIRECTORY_OBJECT, | |
50 | DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, | |
51 | &dp->dp_empty_bpobj, tx) == 0); | |
52 | } | |
fa86b5db | 53 | spa_feature_incr(spa, SPA_FEATURE_EMPTY_BPOBJ, tx); |
753c3839 MA |
54 | ASSERT(dp->dp_empty_bpobj != 0); |
55 | return (dp->dp_empty_bpobj); | |
56 | } else { | |
57 | return (bpobj_alloc(os, blocksize, tx)); | |
58 | } | |
59 | } | |
60 | ||
61 | void | |
62 | bpobj_decr_empty(objset_t *os, dmu_tx_t *tx) | |
63 | { | |
753c3839 MA |
64 | dsl_pool_t *dp = dmu_objset_pool(os); |
65 | ||
fa86b5db MA |
66 | spa_feature_decr(dmu_objset_spa(os), SPA_FEATURE_EMPTY_BPOBJ, tx); |
67 | if (!spa_feature_is_active(dmu_objset_spa(os), | |
68 | SPA_FEATURE_EMPTY_BPOBJ)) { | |
753c3839 MA |
69 | VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset, |
70 | DMU_POOL_DIRECTORY_OBJECT, | |
71 | DMU_POOL_EMPTY_BPOBJ, tx)); | |
72 | VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx)); | |
73 | dp->dp_empty_bpobj = 0; | |
74 | } | |
75 | } | |
428870ff BB |
76 | |
77 | uint64_t | |
78 | bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) | |
79 | { | |
80 | int size; | |
81 | ||
82 | if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT) | |
83 | size = BPOBJ_SIZE_V0; | |
84 | else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) | |
85 | size = BPOBJ_SIZE_V1; | |
37f03da8 SH |
86 | else if (!spa_feature_is_active(dmu_objset_spa(os), |
87 | SPA_FEATURE_LIVELIST)) | |
88 | size = BPOBJ_SIZE_V2; | |
428870ff BB |
89 | else |
90 | size = sizeof (bpobj_phys_t); | |
91 | ||
92 | return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize, | |
93 | DMU_OT_BPOBJ_HDR, size, tx)); | |
94 | } | |
95 | ||
96 | void | |
97 | bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) | |
98 | { | |
99 | int64_t i; | |
100 | bpobj_t bpo; | |
101 | dmu_object_info_t doi; | |
102 | int epb; | |
103 | dmu_buf_t *dbuf = NULL; | |
104 | ||
753c3839 | 105 | ASSERT(obj != dmu_objset_pool(os)->dp_empty_bpobj); |
428870ff BB |
106 | VERIFY3U(0, ==, bpobj_open(&bpo, os, obj)); |
107 | ||
108 | mutex_enter(&bpo.bpo_lock); | |
109 | ||
110 | if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0) | |
111 | goto out; | |
112 | ||
113 | VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi)); | |
114 | epb = doi.doi_data_block_size / sizeof (uint64_t); | |
115 | ||
116 | for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { | |
117 | uint64_t *objarray; | |
118 | uint64_t offset, blkoff; | |
119 | ||
120 | offset = i * sizeof (uint64_t); | |
121 | blkoff = P2PHASE(i, epb); | |
122 | ||
123 | if (dbuf == NULL || dbuf->db_offset > offset) { | |
124 | if (dbuf) | |
125 | dmu_buf_rele(dbuf, FTAG); | |
126 | VERIFY3U(0, ==, dmu_buf_hold(os, | |
127 | bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0)); | |
128 | } | |
129 | ||
130 | ASSERT3U(offset, >=, dbuf->db_offset); | |
131 | ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); | |
132 | ||
133 | objarray = dbuf->db_data; | |
134 | bpobj_free(os, objarray[blkoff], tx); | |
135 | } | |
136 | if (dbuf) { | |
137 | dmu_buf_rele(dbuf, FTAG); | |
138 | dbuf = NULL; | |
139 | } | |
140 | VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx)); | |
141 | ||
142 | out: | |
143 | mutex_exit(&bpo.bpo_lock); | |
144 | bpobj_close(&bpo); | |
145 | ||
146 | VERIFY3U(0, ==, dmu_object_free(os, obj, tx)); | |
147 | } | |
148 | ||
149 | int | |
150 | bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) | |
151 | { | |
152 | dmu_object_info_t doi; | |
153 | int err; | |
154 | ||
155 | err = dmu_object_info(os, object, &doi); | |
156 | if (err) | |
157 | return (err); | |
158 | ||
861166b0 | 159 | memset(bpo, 0, sizeof (*bpo)); |
428870ff BB |
160 | mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL); |
161 | ||
162 | ASSERT(bpo->bpo_dbuf == NULL); | |
163 | ASSERT(bpo->bpo_phys == NULL); | |
164 | ASSERT(object != 0); | |
165 | ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); | |
166 | ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); | |
167 | ||
572e2857 BB |
168 | err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf); |
169 | if (err) | |
170 | return (err); | |
171 | ||
428870ff BB |
172 | bpo->bpo_os = os; |
173 | bpo->bpo_object = object; | |
174 | bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT; | |
175 | bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0); | |
176 | bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1); | |
37f03da8 | 177 | bpo->bpo_havefreed = (doi.doi_bonus_size > BPOBJ_SIZE_V2); |
428870ff BB |
178 | bpo->bpo_phys = bpo->bpo_dbuf->db_data; |
179 | return (0); | |
180 | } | |
181 | ||
a1d477c2 MA |
182 | boolean_t |
183 | bpobj_is_open(const bpobj_t *bpo) | |
184 | { | |
185 | return (bpo->bpo_object != 0); | |
186 | } | |
187 | ||
428870ff BB |
188 | void |
189 | bpobj_close(bpobj_t *bpo) | |
190 | { | |
191 | /* Lame workaround for closing a bpobj that was never opened. */ | |
192 | if (bpo->bpo_object == 0) | |
193 | return; | |
194 | ||
195 | dmu_buf_rele(bpo->bpo_dbuf, bpo); | |
196 | if (bpo->bpo_cached_dbuf != NULL) | |
197 | dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); | |
198 | bpo->bpo_dbuf = NULL; | |
199 | bpo->bpo_phys = NULL; | |
200 | bpo->bpo_cached_dbuf = NULL; | |
572e2857 | 201 | bpo->bpo_object = 0; |
428870ff BB |
202 | |
203 | mutex_destroy(&bpo->bpo_lock); | |
204 | } | |
205 | ||
e60e158e JG |
206 | static boolean_t |
207 | bpobj_is_empty_impl(bpobj_t *bpo) | |
9b67f605 | 208 | { |
e60e158e | 209 | ASSERT(MUTEX_HELD(&bpo->bpo_lock)); |
a1d477c2 MA |
210 | return (bpo->bpo_phys->bpo_num_blkptrs == 0 && |
211 | (!bpo->bpo_havesubobj || bpo->bpo_phys->bpo_num_subobjs == 0)); | |
9b67f605 MA |
212 | } |
213 | ||
e60e158e JG |
214 | boolean_t |
215 | bpobj_is_empty(bpobj_t *bpo) | |
216 | { | |
217 | mutex_enter(&bpo->bpo_lock); | |
218 | boolean_t is_empty = bpobj_is_empty_impl(bpo); | |
219 | mutex_exit(&bpo->bpo_lock); | |
220 | return (is_empty); | |
221 | } | |
222 | ||
a73e8fdb PZ |
223 | /* |
224 | * A recursive iteration of the bpobjs would be nice here but we run the risk | |
225 | * of overflowing function stack space. Instead, find each subobj and add it | |
226 | * to the head of our list so it can be scanned for subjobjs. Like a | |
227 | * recursive implementation, the "deepest" subobjs will be freed first. | |
228 | * When a subobj is found to have no additional subojs, free it. | |
229 | */ | |
230 | typedef struct bpobj_info { | |
231 | bpobj_t *bpi_bpo; | |
232 | /* | |
233 | * This object is a subobj of bpi_parent, | |
234 | * at bpi_index in its subobj array. | |
235 | */ | |
236 | struct bpobj_info *bpi_parent; | |
237 | uint64_t bpi_index; | |
238 | /* How many of our subobj's are left to process. */ | |
239 | uint64_t bpi_unprocessed_subobjs; | |
240 | /* True after having visited this bpo's directly referenced BPs. */ | |
241 | boolean_t bpi_visited; | |
242 | list_node_t bpi_node; | |
243 | } bpobj_info_t; | |
244 | ||
245 | static bpobj_info_t * | |
246 | bpi_alloc(bpobj_t *bpo, bpobj_info_t *parent, uint64_t index) | |
428870ff | 247 | { |
a73e8fdb PZ |
248 | bpobj_info_t *bpi = kmem_zalloc(sizeof (bpobj_info_t), KM_SLEEP); |
249 | bpi->bpi_bpo = bpo; | |
250 | bpi->bpi_parent = parent; | |
251 | bpi->bpi_index = index; | |
252 | if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { | |
253 | bpi->bpi_unprocessed_subobjs = bpo->bpo_phys->bpo_num_subobjs; | |
254 | } | |
255 | return (bpi); | |
256 | } | |
428870ff | 257 | |
a73e8fdb PZ |
258 | /* |
259 | * Update bpobj and all of its parents with new space accounting. | |
260 | */ | |
261 | static void | |
37f03da8 SH |
262 | propagate_space_reduction(bpobj_info_t *bpi, int64_t freed, |
263 | int64_t comp_freed, int64_t uncomp_freed, dmu_tx_t *tx) | |
a73e8fdb | 264 | { |
428870ff | 265 | |
a73e8fdb PZ |
266 | for (; bpi != NULL; bpi = bpi->bpi_parent) { |
267 | bpobj_t *p = bpi->bpi_bpo; | |
268 | ASSERT(dmu_buf_is_dirty(p->bpo_dbuf, tx)); | |
269 | p->bpo_phys->bpo_bytes -= freed; | |
270 | ASSERT3S(p->bpo_phys->bpo_bytes, >=, 0); | |
271 | if (p->bpo_havecomp) { | |
272 | p->bpo_phys->bpo_comp -= comp_freed; | |
273 | p->bpo_phys->bpo_uncomp -= uncomp_freed; | |
274 | } | |
275 | } | |
276 | } | |
428870ff | 277 | |
a73e8fdb PZ |
278 | static int |
279 | bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg, | |
37f03da8 | 280 | int64_t start, dmu_tx_t *tx, boolean_t free) |
a73e8fdb PZ |
281 | { |
282 | int err = 0; | |
37f03da8 | 283 | int64_t freed = 0, comp_freed = 0, uncomp_freed = 0; |
a73e8fdb PZ |
284 | dmu_buf_t *dbuf = NULL; |
285 | bpobj_t *bpo = bpi->bpi_bpo; | |
428870ff | 286 | |
28430b51 AM |
287 | int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; |
288 | uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) * | |
289 | sizeof (blkptr_t); | |
290 | uint64_t ps = start * sizeof (blkptr_t); | |
291 | uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0, | |
292 | ps); | |
293 | if (pe > pb) { | |
294 | dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb, | |
295 | ZIO_PRIORITY_ASYNC_READ); | |
296 | } | |
297 | for (; i >= start; i--) { | |
a73e8fdb PZ |
298 | uint64_t offset = i * sizeof (blkptr_t); |
299 | uint64_t blkoff = P2PHASE(i, bpo->bpo_epb); | |
428870ff BB |
300 | |
301 | if (dbuf == NULL || dbuf->db_offset > offset) { | |
302 | if (dbuf) | |
303 | dmu_buf_rele(dbuf, FTAG); | |
37f03da8 | 304 | err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, |
28430b51 | 305 | offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH); |
428870ff BB |
306 | if (err) |
307 | break; | |
28430b51 AM |
308 | pe = pb; |
309 | pb = MAX((dbuf->db_offset > dmu_prefetch_max) ? | |
310 | dbuf->db_offset - dmu_prefetch_max : 0, ps); | |
311 | if (pe > pb) { | |
312 | dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, | |
313 | pb, pe - pb, ZIO_PRIORITY_ASYNC_READ); | |
314 | } | |
428870ff BB |
315 | } |
316 | ||
317 | ASSERT3U(offset, >=, dbuf->db_offset); | |
318 | ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); | |
319 | ||
a73e8fdb PZ |
320 | blkptr_t *bparray = dbuf->db_data; |
321 | blkptr_t *bp = &bparray[blkoff]; | |
37f03da8 SH |
322 | |
323 | boolean_t bp_freed = BP_GET_FREE(bp); | |
324 | err = func(arg, bp, bp_freed, tx); | |
428870ff BB |
325 | if (err) |
326 | break; | |
a73e8fdb | 327 | |
428870ff | 328 | if (free) { |
37f03da8 | 329 | int sign = bp_freed ? -1 : +1; |
a73e8fdb | 330 | spa_t *spa = dmu_objset_spa(bpo->bpo_os); |
37f03da8 SH |
331 | freed += sign * bp_get_dsize_sync(spa, bp); |
332 | comp_freed += sign * BP_GET_PSIZE(bp); | |
333 | uncomp_freed += sign * BP_GET_UCSIZE(bp); | |
a73e8fdb | 334 | ASSERT(dmu_buf_is_dirty(bpo->bpo_dbuf, tx)); |
428870ff BB |
335 | bpo->bpo_phys->bpo_num_blkptrs--; |
336 | ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0); | |
37f03da8 SH |
337 | if (bp_freed) { |
338 | ASSERT(bpo->bpo_havefreed); | |
339 | bpo->bpo_phys->bpo_num_freed--; | |
340 | ASSERT3S(bpo->bpo_phys->bpo_num_freed, >=, 0); | |
341 | } | |
428870ff BB |
342 | } |
343 | } | |
a73e8fdb PZ |
344 | if (free) { |
345 | propagate_space_reduction(bpi, freed, comp_freed, | |
346 | uncomp_freed, tx); | |
347 | VERIFY0(dmu_free_range(bpo->bpo_os, | |
348 | bpo->bpo_object, | |
349 | bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), | |
350 | DMU_OBJECT_END, tx)); | |
351 | } | |
428870ff BB |
352 | if (dbuf) { |
353 | dmu_buf_rele(dbuf, FTAG); | |
354 | dbuf = NULL; | |
355 | } | |
a73e8fdb PZ |
356 | return (err); |
357 | } | |
428870ff | 358 | |
a73e8fdb PZ |
359 | /* |
360 | * Given an initial bpo, start by freeing the BPs that are directly referenced | |
361 | * by that bpo. If the bpo has subobjs, read in its last subobj and push the | |
362 | * subobj to our stack. By popping items off our stack, eventually we will | |
363 | * encounter a bpo that has no subobjs. We can free its bpobj_info_t, and if | |
364 | * requested also free the now-empty bpo from disk and decrement | |
365 | * its parent's subobj count. We continue popping each subobj from our stack, | |
366 | * visiting its last subobj until they too have no more subobjs, and so on. | |
367 | */ | |
368 | static int | |
369 | bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg, | |
37f03da8 | 370 | dmu_tx_t *tx, boolean_t free, uint64_t *bpobj_size) |
a73e8fdb PZ |
371 | { |
372 | list_t stack; | |
373 | bpobj_info_t *bpi; | |
374 | int err = 0; | |
428870ff | 375 | |
a73e8fdb PZ |
376 | /* |
377 | * Create a "stack" for us to work with without worrying about | |
378 | * stack overflows. Initialize it with the initial_bpo. | |
379 | */ | |
380 | list_create(&stack, sizeof (bpobj_info_t), | |
381 | offsetof(bpobj_info_t, bpi_node)); | |
382 | mutex_enter(&initial_bpo->bpo_lock); | |
37f03da8 SH |
383 | |
384 | if (bpobj_size != NULL) | |
385 | *bpobj_size = initial_bpo->bpo_phys->bpo_num_blkptrs; | |
386 | ||
a73e8fdb PZ |
387 | list_insert_head(&stack, bpi_alloc(initial_bpo, NULL, 0)); |
388 | ||
389 | while ((bpi = list_head(&stack)) != NULL) { | |
390 | bpobj_t *bpo = bpi->bpi_bpo; | |
391 | ||
392 | ASSERT3P(bpo, !=, NULL); | |
393 | ASSERT(MUTEX_HELD(&bpo->bpo_lock)); | |
394 | ASSERT(bpobj_is_open(bpo)); | |
395 | ||
396 | if (free) | |
397 | dmu_buf_will_dirty(bpo->bpo_dbuf, tx); | |
398 | ||
399 | if (bpi->bpi_visited == B_FALSE) { | |
37f03da8 SH |
400 | err = bpobj_iterate_blkptrs(bpi, func, arg, 0, tx, |
401 | free); | |
a73e8fdb PZ |
402 | bpi->bpi_visited = B_TRUE; |
403 | if (err != 0) | |
428870ff BB |
404 | break; |
405 | } | |
a73e8fdb PZ |
406 | /* |
407 | * We've finished with this bpo's directly-referenced BP's and | |
408 | * it has no more unprocessed subobjs. We can free its | |
409 | * bpobj_info_t (unless it is the topmost, initial_bpo). | |
410 | * If we are freeing from disk, we can also do that. | |
411 | */ | |
412 | if (bpi->bpi_unprocessed_subobjs == 0) { | |
413 | /* | |
414 | * If there are no entries, there should | |
415 | * be no bytes. | |
416 | */ | |
e60e158e | 417 | if (bpobj_is_empty_impl(bpo)) { |
a73e8fdb PZ |
418 | ASSERT0(bpo->bpo_phys->bpo_bytes); |
419 | ASSERT0(bpo->bpo_phys->bpo_comp); | |
420 | ASSERT0(bpo->bpo_phys->bpo_uncomp); | |
421 | } | |
428870ff | 422 | |
a73e8fdb PZ |
423 | /* The initial_bpo has no parent and is not closed. */ |
424 | if (bpi->bpi_parent != NULL) { | |
425 | if (free) { | |
426 | bpobj_t *p = bpi->bpi_parent->bpi_bpo; | |
427 | ||
428 | ASSERT0(bpo->bpo_phys->bpo_num_blkptrs); | |
429 | ASSERT3U(p->bpo_phys->bpo_num_subobjs, | |
430 | >, 0); | |
431 | ASSERT3U(bpi->bpi_index, ==, | |
432 | p->bpo_phys->bpo_num_subobjs - 1); | |
433 | ASSERT(dmu_buf_is_dirty(bpo->bpo_dbuf, | |
434 | tx)); | |
435 | ||
436 | p->bpo_phys->bpo_num_subobjs--; | |
437 | ||
438 | VERIFY0(dmu_free_range(p->bpo_os, | |
439 | p->bpo_phys->bpo_subobjs, | |
440 | bpi->bpi_index * sizeof (uint64_t), | |
441 | sizeof (uint64_t), tx)); | |
442 | ||
443 | /* eliminate the empty subobj list */ | |
444 | if (bpo->bpo_havesubobj && | |
445 | bpo->bpo_phys->bpo_subobjs != 0) { | |
446 | ASSERT0(bpo->bpo_phys-> | |
447 | bpo_num_subobjs); | |
448 | err = dmu_object_free( | |
449 | bpo->bpo_os, | |
450 | bpo->bpo_phys->bpo_subobjs, | |
451 | tx); | |
452 | if (err) | |
453 | break; | |
454 | bpo->bpo_phys->bpo_subobjs = 0; | |
455 | } | |
456 | err = dmu_object_free(p->bpo_os, | |
457 | bpo->bpo_object, tx); | |
458 | if (err) | |
459 | break; | |
460 | } | |
461 | ||
462 | mutex_exit(&bpo->bpo_lock); | |
463 | bpobj_close(bpo); | |
464 | kmem_free(bpo, sizeof (bpobj_t)); | |
465 | } else { | |
466 | mutex_exit(&bpo->bpo_lock); | |
467 | } | |
428870ff | 468 | |
a73e8fdb PZ |
469 | /* |
470 | * Finished processing this bpo. Unlock, and free | |
471 | * our "stack" info. | |
472 | */ | |
473 | list_remove_head(&stack); | |
474 | kmem_free(bpi, sizeof (bpobj_info_t)); | |
475 | } else { | |
476 | /* | |
477 | * We have unprocessed subobjs. Process the next one. | |
478 | */ | |
479 | ASSERT(bpo->bpo_havecomp); | |
37f03da8 | 480 | ASSERT3P(bpobj_size, ==, NULL); |
a73e8fdb PZ |
481 | |
482 | /* Add the last subobj to stack. */ | |
483 | int64_t i = bpi->bpi_unprocessed_subobjs - 1; | |
484 | uint64_t offset = i * sizeof (uint64_t); | |
485 | ||
28430b51 | 486 | uint64_t subobj; |
a73e8fdb | 487 | err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, |
28430b51 AM |
488 | offset, sizeof (uint64_t), &subobj, |
489 | DMU_READ_NO_PREFETCH); | |
a73e8fdb | 490 | if (err) |
428870ff | 491 | break; |
428870ff | 492 | |
28430b51 AM |
493 | bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t), |
494 | KM_SLEEP); | |
495 | err = bpobj_open(subbpo, bpo->bpo_os, subobj); | |
496 | if (err) { | |
497 | kmem_free(subbpo, sizeof (bpobj_t)); | |
428870ff | 498 | break; |
28430b51 AM |
499 | } |
500 | ||
501 | if (subbpo->bpo_havesubobj && | |
502 | subbpo->bpo_phys->bpo_subobjs != 0) { | |
503 | dmu_prefetch(subbpo->bpo_os, | |
504 | subbpo->bpo_phys->bpo_subobjs, 0, 0, 0, | |
505 | ZIO_PRIORITY_ASYNC_READ); | |
506 | } | |
a73e8fdb | 507 | |
28430b51 AM |
508 | list_insert_head(&stack, bpi_alloc(subbpo, bpi, i)); |
509 | mutex_enter(&subbpo->bpo_lock); | |
a73e8fdb | 510 | bpi->bpi_unprocessed_subobjs--; |
428870ff BB |
511 | } |
512 | } | |
a73e8fdb PZ |
513 | /* |
514 | * Cleanup anything left on the "stack" after we left the loop. | |
515 | * Every bpo on the stack is locked so we must remember to undo | |
516 | * that now (in LIFO order). | |
517 | */ | |
518 | while ((bpi = list_remove_head(&stack)) != NULL) { | |
519 | bpobj_t *bpo = bpi->bpi_bpo; | |
520 | ASSERT(err != 0); | |
521 | ASSERT3P(bpo, !=, NULL); | |
428870ff | 522 | |
a73e8fdb PZ |
523 | mutex_exit(&bpo->bpo_lock); |
524 | ||
525 | /* do not free the initial_bpo */ | |
526 | if (bpi->bpi_parent != NULL) { | |
527 | bpobj_close(bpi->bpi_bpo); | |
528 | kmem_free(bpi->bpi_bpo, sizeof (bpobj_t)); | |
529 | } | |
530 | kmem_free(bpi, sizeof (bpobj_info_t)); | |
9b67f605 | 531 | } |
428870ff | 532 | |
a73e8fdb PZ |
533 | list_destroy(&stack); |
534 | ||
428870ff BB |
535 | return (err); |
536 | } | |
537 | ||
538 | /* | |
539 | * Iterate and remove the entries. If func returns nonzero, iteration | |
540 | * will stop and that entry will not be removed. | |
541 | */ | |
542 | int | |
543 | bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) | |
544 | { | |
37f03da8 | 545 | return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE, NULL)); |
428870ff BB |
546 | } |
547 | ||
548 | /* | |
549 | * Iterate the entries. If func returns nonzero, iteration will stop. | |
37f03da8 SH |
550 | * |
551 | * If there are no subobjs: | |
552 | * | |
553 | * *bpobj_size can be used to return the number of block pointers in the | |
554 | * bpobj. Note that this may be different from the number of block pointers | |
555 | * that are iterated over, if iteration is terminated early (e.g. by the func | |
556 | * returning nonzero). | |
557 | * | |
558 | * If there are concurrent (or subsequent) modifications to the bpobj then the | |
559 | * returned *bpobj_size can be passed as "start" to | |
560 | * livelist_bpobj_iterate_from_nofree() to iterate the newly added entries. | |
428870ff BB |
561 | */ |
562 | int | |
37f03da8 SH |
563 | bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, |
564 | uint64_t *bpobj_size) | |
428870ff | 565 | { |
37f03da8 SH |
566 | return (bpobj_iterate_impl(bpo, func, arg, NULL, B_FALSE, bpobj_size)); |
567 | } | |
568 | ||
569 | /* | |
570 | * Iterate over the blkptrs in the bpobj beginning at index start. If func | |
571 | * returns nonzero, iteration will stop. This is a livelist specific function | |
572 | * since it assumes that there are no subobjs present. | |
573 | */ | |
574 | int | |
575 | livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, | |
576 | int64_t start) | |
577 | { | |
578 | if (bpo->bpo_havesubobj) | |
579 | VERIFY0(bpo->bpo_phys->bpo_subobjs); | |
580 | bpobj_info_t *bpi = bpi_alloc(bpo, NULL, 0); | |
581 | int err = bpobj_iterate_blkptrs(bpi, func, arg, start, NULL, B_FALSE); | |
582 | kmem_free(bpi, sizeof (bpobj_info_t)); | |
583 | return (err); | |
428870ff BB |
584 | } |
585 | ||
9553c533 MA |
586 | /* |
587 | * Logically add subobj's contents to the parent bpobj. | |
588 | * | |
589 | * In the most general case, this is accomplished in constant time by adding | |
590 | * a reference to subobj. This case is used when enqueuing a large subobj: | |
591 | * +--------------+ +--------------+ | |
592 | * | bpobj |----------------------->| subobj list | | |
593 | * +----+----+----+----+----+ +-----+-----+--+--+ | |
594 | * | bp | bp | bp | bp | bp | | obj | obj | obj | | |
595 | * +----+----+----+----+----+ +-----+-----+-----+ | |
596 | * | |
597 | * +--------------+ +--------------+ | |
598 | * | sub-bpobj |----------------------> | subsubobj | | |
599 | * +----+----+----+----+---------+----+ +-----+-----+--+--------+-----+ | |
600 | * | bp | bp | bp | bp | ... | bp | | obj | obj | ... | obj | | |
601 | * +----+----+----+----+---------+----+ +-----+-----+-----------+-----+ | |
602 | * | |
603 | * Result: sub-bpobj added to parent's subobj list. | |
604 | * +--------------+ +--------------+ | |
605 | * | bpobj |----------------------->| subobj list | | |
606 | * +----+----+----+----+----+ +-----+-----+--+--+-----+ | |
607 | * | bp | bp | bp | bp | bp | | obj | obj | obj | OBJ | | |
608 | * +----+----+----+----+----+ +-----+-----+-----+--|--+ | |
609 | * | | |
610 | * /-----------------------------------------------------/ | |
611 | * v | |
612 | * +--------------+ +--------------+ | |
613 | * | sub-bpobj |----------------------> | subsubobj | | |
614 | * +----+----+----+----+---------+----+ +-----+-----+--+--------+-----+ | |
615 | * | bp | bp | bp | bp | ... | bp | | obj | obj | ... | obj | | |
616 | * +----+----+----+----+---------+----+ +-----+-----+-----------+-----+ | |
617 | * | |
618 | * | |
619 | * In a common case, the subobj is small: its bp's and its list of subobj's | |
620 | * are each stored in a single block. In this case we copy the subobj's | |
621 | * contents to the parent: | |
622 | * +--------------+ +--------------+ | |
623 | * | bpobj |----------------------->| subobj list | | |
624 | * +----+----+----+----+----+ +-----+-----+--+--+ | |
625 | * | bp | bp | bp | bp | bp | | obj | obj | obj | | |
626 | * +----+----+----+----+----+ +-----+-----+-----+ | |
627 | * ^ ^ | |
628 | * +--------------+ | +--------------+ | | |
629 | * | sub-bpobj |---------^------------> | subsubobj | ^ | |
630 | * +----+----+----+ | +-----+-----+--+ | | |
631 | * | BP | BP |-->-->-->-->-/ | OBJ | OBJ |-->-/ | |
632 | * +----+----+ +-----+-----+ | |
633 | * | |
634 | * Result: subobj destroyed, contents copied to parent: | |
635 | * +--------------+ +--------------+ | |
636 | * | bpobj |----------------------->| subobj list | | |
637 | * +----+----+----+----+----+----+----+ +-----+-----+--+--+-----+-----+ | |
638 | * | bp | bp | bp | bp | bp | BP | BP | | obj | obj | obj | OBJ | OBJ | | |
639 | * +----+----+----+----+----+----+----+ +-----+-----+-----+-----+-----+ | |
640 | * | |
641 | * | |
642 | * If the subobj has many BP's but few subobj's, we can copy the sub-subobj's | |
643 | * but retain the sub-bpobj: | |
644 | * +--------------+ +--------------+ | |
645 | * | bpobj |----------------------->| subobj list | | |
646 | * +----+----+----+----+----+ +-----+-----+--+--+ | |
647 | * | bp | bp | bp | bp | bp | | obj | obj | obj | | |
648 | * +----+----+----+----+----+ +-----+-----+-----+ | |
649 | * ^ | |
650 | * +--------------+ +--------------+ | | |
651 | * | sub-bpobj |----------------------> | subsubobj | ^ | |
652 | * +----+----+----+----+---------+----+ +-----+-----+--+ | | |
653 | * | bp | bp | bp | bp | ... | bp | | OBJ | OBJ |-->-/ | |
654 | * +----+----+----+----+---------+----+ +-----+-----+ | |
655 | * | |
656 | * Result: sub-sub-bpobjs and subobj added to parent's subobj list. | |
657 | * +--------------+ +--------------+ | |
658 | * | bpobj |-------------------->| subobj list | | |
659 | * +----+----+----+----+----+ +-----+-----+--+--+-----+-----+------+ | |
660 | * | bp | bp | bp | bp | bp | | obj | obj | obj | OBJ | OBJ | OBJ* | | |
661 | * +----+----+----+----+----+ +-----+-----+-----+-----+-----+--|---+ | |
662 | * | | |
663 | * /--------------------------------------------------------------/ | |
664 | * v | |
665 | * +--------------+ | |
666 | * | sub-bpobj | | |
667 | * +----+----+----+----+---------+----+ | |
668 | * | bp | bp | bp | bp | ... | bp | | |
669 | * +----+----+----+----+---------+----+ | |
670 | */ | |
428870ff BB |
671 | void |
672 | bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) | |
673 | { | |
674 | bpobj_t subbpo; | |
572e2857 | 675 | uint64_t used, comp, uncomp, subsubobjs; |
9553c533 MA |
676 | boolean_t copy_subsub = B_TRUE; |
677 | boolean_t copy_bps = B_TRUE; | |
428870ff | 678 | |
a1d477c2 MA |
679 | ASSERT(bpobj_is_open(bpo)); |
680 | ASSERT(subobj != 0); | |
428870ff BB |
681 | ASSERT(bpo->bpo_havesubobj); |
682 | ASSERT(bpo->bpo_havecomp); | |
753c3839 MA |
683 | ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj); |
684 | ||
685 | if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) { | |
686 | bpobj_decr_empty(bpo->bpo_os, tx); | |
687 | return; | |
688 | } | |
428870ff BB |
689 | |
690 | VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); | |
a1d477c2 | 691 | if (bpobj_is_empty(&subbpo)) { |
428870ff | 692 | /* No point in having an empty subobj. */ |
572e2857 | 693 | bpobj_close(&subbpo); |
428870ff BB |
694 | bpobj_free(bpo->bpo_os, subobj, tx); |
695 | return; | |
696 | } | |
dc5c8006 | 697 | VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); |
428870ff | 698 | |
df7eeccc | 699 | mutex_enter(&bpo->bpo_lock); |
428870ff | 700 | dmu_buf_will_dirty(bpo->bpo_dbuf, tx); |
428870ff | 701 | |
9553c533 | 702 | dmu_object_info_t doi; |
13fe0198 | 703 | |
9553c533 MA |
704 | if (bpo->bpo_phys->bpo_subobjs != 0) { |
705 | ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, | |
706 | &doi)); | |
707 | ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ); | |
708 | } | |
572e2857 BB |
709 | |
710 | /* | |
711 | * If subobj has only one block of subobjs, then move subobj's | |
9553c533 MA |
712 | * subobjs to bpo's subobj list directly. This reduces recursion in |
713 | * bpobj_iterate due to nested subobjs. | |
572e2857 BB |
714 | */ |
715 | subsubobjs = subbpo.bpo_phys->bpo_subobjs; | |
716 | if (subsubobjs != 0) { | |
9553c533 MA |
717 | VERIFY0(dmu_object_info(bpo->bpo_os, subsubobjs, &doi)); |
718 | if (doi.doi_max_offset > doi.doi_data_block_size) { | |
719 | copy_subsub = B_FALSE; | |
720 | } | |
721 | } | |
722 | ||
723 | /* | |
724 | * If, in addition to having only one block of subobj's, subobj has | |
725 | * only one block of bp's, then move subobj's bp's to bpo's bp list | |
726 | * directly. This reduces recursion in bpobj_iterate due to nested | |
727 | * subobjs. | |
728 | */ | |
729 | VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subobj, &doi)); | |
730 | if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub) { | |
731 | copy_bps = B_FALSE; | |
732 | } | |
733 | ||
734 | if (copy_subsub && subsubobjs != 0) { | |
735 | dmu_buf_t *subdb; | |
736 | uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs; | |
737 | ||
738 | VERIFY0(dmu_buf_hold(bpo->bpo_os, subsubobjs, | |
739 | 0, FTAG, &subdb, 0)); | |
740 | /* | |
741 | * Make sure that we are not asking dmu_write() | |
742 | * to write more data than we have in our buffer. | |
743 | */ | |
744 | VERIFY3U(subdb->db_size, >=, | |
745 | numsubsub * sizeof (subobj)); | |
746 | if (bpo->bpo_phys->bpo_subobjs == 0) { | |
747 | bpo->bpo_phys->bpo_subobjs = | |
748 | dmu_object_alloc(bpo->bpo_os, | |
749 | DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE, | |
750 | DMU_OT_NONE, 0, tx); | |
572e2857 | 751 | } |
9553c533 MA |
752 | dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, |
753 | bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), | |
754 | numsubsub * sizeof (subobj), subdb->db_data, tx); | |
755 | dmu_buf_rele(subdb, FTAG); | |
756 | bpo->bpo_phys->bpo_num_subobjs += numsubsub; | |
757 | ||
758 | dmu_buf_will_dirty(subbpo.bpo_dbuf, tx); | |
759 | subbpo.bpo_phys->bpo_subobjs = 0; | |
760 | VERIFY0(dmu_object_free(bpo->bpo_os, subsubobjs, tx)); | |
572e2857 | 761 | } |
9553c533 MA |
762 | |
763 | if (copy_bps) { | |
764 | dmu_buf_t *bps; | |
765 | uint64_t numbps = subbpo.bpo_phys->bpo_num_blkptrs; | |
766 | ||
767 | ASSERT(copy_subsub); | |
768 | VERIFY0(dmu_buf_hold(bpo->bpo_os, subobj, | |
769 | 0, FTAG, &bps, 0)); | |
770 | ||
771 | /* | |
772 | * Make sure that we are not asking dmu_write() | |
773 | * to write more data than we have in our buffer. | |
774 | */ | |
775 | VERIFY3U(bps->db_size, >=, numbps * sizeof (blkptr_t)); | |
776 | dmu_write(bpo->bpo_os, bpo->bpo_object, | |
777 | bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), | |
778 | numbps * sizeof (blkptr_t), | |
779 | bps->db_data, tx); | |
780 | dmu_buf_rele(bps, FTAG); | |
781 | bpo->bpo_phys->bpo_num_blkptrs += numbps; | |
782 | ||
783 | bpobj_close(&subbpo); | |
784 | VERIFY0(dmu_object_free(bpo->bpo_os, subobj, tx)); | |
785 | } else { | |
786 | bpobj_close(&subbpo); | |
787 | if (bpo->bpo_phys->bpo_subobjs == 0) { | |
788 | bpo->bpo_phys->bpo_subobjs = | |
789 | dmu_object_alloc(bpo->bpo_os, | |
790 | DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE, | |
791 | DMU_OT_NONE, 0, tx); | |
792 | } | |
793 | ||
794 | dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, | |
795 | bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), | |
796 | sizeof (subobj), &subobj, tx); | |
797 | bpo->bpo_phys->bpo_num_subobjs++; | |
798 | } | |
799 | ||
428870ff BB |
800 | bpo->bpo_phys->bpo_bytes += used; |
801 | bpo->bpo_phys->bpo_comp += comp; | |
802 | bpo->bpo_phys->bpo_uncomp += uncomp; | |
803 | mutex_exit(&bpo->bpo_lock); | |
572e2857 | 804 | |
428870ff BB |
805 | } |
806 | ||
dc5c8006 AM |
807 | /* |
808 | * Prefetch metadata required for bpobj_enqueue_subobj(). | |
809 | */ | |
810 | void | |
811 | bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj) | |
812 | { | |
813 | dmu_object_info_t doi; | |
814 | bpobj_t subbpo; | |
815 | uint64_t subsubobjs; | |
816 | boolean_t copy_subsub = B_TRUE; | |
817 | boolean_t copy_bps = B_TRUE; | |
818 | ||
819 | ASSERT(bpobj_is_open(bpo)); | |
820 | ASSERT(subobj != 0); | |
821 | ||
822 | if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) | |
823 | return; | |
824 | ||
825 | if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0) | |
826 | return; | |
827 | if (bpobj_is_empty(&subbpo)) { | |
828 | bpobj_close(&subbpo); | |
829 | return; | |
830 | } | |
831 | subsubobjs = subbpo.bpo_phys->bpo_subobjs; | |
832 | bpobj_close(&subbpo); | |
833 | ||
834 | if (subsubobjs != 0) { | |
835 | if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0) | |
836 | return; | |
837 | if (doi.doi_max_offset > doi.doi_data_block_size) | |
838 | copy_subsub = B_FALSE; | |
839 | } | |
840 | ||
841 | if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0) | |
842 | return; | |
843 | if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub) | |
844 | copy_bps = B_FALSE; | |
845 | ||
846 | if (copy_subsub && subsubobjs != 0) { | |
847 | if (bpo->bpo_phys->bpo_subobjs) { | |
848 | dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0, | |
849 | bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1, | |
850 | ZIO_PRIORITY_ASYNC_READ); | |
851 | } | |
852 | dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1, | |
853 | ZIO_PRIORITY_ASYNC_READ); | |
854 | } | |
855 | ||
856 | if (copy_bps) { | |
857 | dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, | |
858 | bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1, | |
859 | ZIO_PRIORITY_ASYNC_READ); | |
860 | dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1, | |
861 | ZIO_PRIORITY_ASYNC_READ); | |
862 | } else if (bpo->bpo_phys->bpo_subobjs) { | |
863 | dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0, | |
864 | bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1, | |
865 | ZIO_PRIORITY_ASYNC_READ); | |
866 | } | |
867 | } | |
868 | ||
428870ff | 869 | void |
37f03da8 SH |
870 | bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed, |
871 | dmu_tx_t *tx) | |
428870ff BB |
872 | { |
873 | blkptr_t stored_bp = *bp; | |
874 | uint64_t offset; | |
875 | int blkoff; | |
876 | blkptr_t *bparray; | |
877 | ||
a1d477c2 | 878 | ASSERT(bpobj_is_open(bpo)); |
428870ff | 879 | ASSERT(!BP_IS_HOLE(bp)); |
753c3839 | 880 | ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj); |
428870ff | 881 | |
9b67f605 MA |
882 | if (BP_IS_EMBEDDED(bp)) { |
883 | /* | |
884 | * The bpobj will compress better without the payload. | |
885 | * | |
886 | * Note that we store EMBEDDED bp's because they have an | |
887 | * uncompressed size, which must be accounted for. An | |
888 | * alternative would be to add their size to bpo_uncomp | |
889 | * without storing the bp, but that would create additional | |
890 | * complications: bpo_uncomp would be inconsistent with the | |
891 | * set of BP's stored, and bpobj_iterate() wouldn't visit | |
892 | * all the space accounted for in the bpobj. | |
893 | */ | |
861166b0 | 894 | memset(&stored_bp, 0, sizeof (stored_bp)); |
9b67f605 | 895 | stored_bp.blk_prop = bp->blk_prop; |
493fcce9 | 896 | BP_SET_LOGICAL_BIRTH(&stored_bp, BP_GET_LOGICAL_BIRTH(bp)); |
9b67f605 MA |
897 | } else if (!BP_GET_DEDUP(bp)) { |
898 | /* The bpobj will compress better without the checksum */ | |
861166b0 | 899 | memset(&stored_bp.blk_cksum, 0, sizeof (stored_bp.blk_cksum)); |
9b67f605 MA |
900 | } |
901 | ||
428870ff | 902 | stored_bp.blk_fill = 0; |
37f03da8 | 903 | BP_SET_FREE(&stored_bp, bp_freed); |
428870ff | 904 | |
428870ff BB |
905 | mutex_enter(&bpo->bpo_lock); |
906 | ||
907 | offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp); | |
908 | blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb); | |
909 | ||
910 | if (bpo->bpo_cached_dbuf == NULL || | |
911 | offset < bpo->bpo_cached_dbuf->db_offset || | |
912 | offset >= bpo->bpo_cached_dbuf->db_offset + | |
913 | bpo->bpo_cached_dbuf->db_size) { | |
914 | if (bpo->bpo_cached_dbuf) | |
915 | dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); | |
916 | VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, | |
917 | offset, bpo, &bpo->bpo_cached_dbuf, 0)); | |
51f55742 | 918 | ASSERT3P(bpo->bpo_cached_dbuf, !=, NULL); |
428870ff BB |
919 | } |
920 | ||
921 | dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx); | |
922 | bparray = bpo->bpo_cached_dbuf->db_data; | |
923 | bparray[blkoff] = stored_bp; | |
924 | ||
925 | dmu_buf_will_dirty(bpo->bpo_dbuf, tx); | |
926 | bpo->bpo_phys->bpo_num_blkptrs++; | |
37f03da8 SH |
927 | int sign = bp_freed ? -1 : +1; |
928 | bpo->bpo_phys->bpo_bytes += sign * | |
428870ff BB |
929 | bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); |
930 | if (bpo->bpo_havecomp) { | |
37f03da8 SH |
931 | bpo->bpo_phys->bpo_comp += sign * BP_GET_PSIZE(bp); |
932 | bpo->bpo_phys->bpo_uncomp += sign * BP_GET_UCSIZE(bp); | |
933 | } | |
934 | if (bp_freed) { | |
935 | ASSERT(bpo->bpo_havefreed); | |
936 | bpo->bpo_phys->bpo_num_freed++; | |
428870ff BB |
937 | } |
938 | mutex_exit(&bpo->bpo_lock); | |
939 | } | |
940 | ||
941 | struct space_range_arg { | |
942 | spa_t *spa; | |
943 | uint64_t mintxg; | |
944 | uint64_t maxtxg; | |
945 | uint64_t used; | |
946 | uint64_t comp; | |
947 | uint64_t uncomp; | |
948 | }; | |
949 | ||
428870ff | 950 | static int |
37f03da8 | 951 | space_range_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) |
428870ff | 952 | { |
14e4e3cb | 953 | (void) bp_freed, (void) tx; |
428870ff BB |
954 | struct space_range_arg *sra = arg; |
955 | ||
493fcce9 GW |
956 | if (BP_GET_LOGICAL_BIRTH(bp) > sra->mintxg && |
957 | BP_GET_LOGICAL_BIRTH(bp) <= sra->maxtxg) { | |
330d06f9 MA |
958 | if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) |
959 | sra->used += bp_get_dsize_sync(sra->spa, bp); | |
960 | else | |
961 | sra->used += bp_get_dsize(sra->spa, bp); | |
428870ff BB |
962 | sra->comp += BP_GET_PSIZE(bp); |
963 | sra->uncomp += BP_GET_UCSIZE(bp); | |
964 | } | |
965 | return (0); | |
966 | } | |
967 | ||
968 | int | |
969 | bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) | |
970 | { | |
a1d477c2 | 971 | ASSERT(bpobj_is_open(bpo)); |
428870ff BB |
972 | mutex_enter(&bpo->bpo_lock); |
973 | ||
974 | *usedp = bpo->bpo_phys->bpo_bytes; | |
975 | if (bpo->bpo_havecomp) { | |
976 | *compp = bpo->bpo_phys->bpo_comp; | |
977 | *uncompp = bpo->bpo_phys->bpo_uncomp; | |
978 | mutex_exit(&bpo->bpo_lock); | |
979 | return (0); | |
980 | } else { | |
981 | mutex_exit(&bpo->bpo_lock); | |
982 | return (bpobj_space_range(bpo, 0, UINT64_MAX, | |
983 | usedp, compp, uncompp)); | |
984 | } | |
985 | } | |
986 | ||
987 | /* | |
988 | * Return the amount of space in the bpobj which is: | |
493fcce9 | 989 | * mintxg < logical birth <= maxtxg |
428870ff BB |
990 | */ |
991 | int | |
992 | bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg, | |
993 | uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) | |
994 | { | |
995 | struct space_range_arg sra = { 0 }; | |
996 | int err; | |
997 | ||
a1d477c2 MA |
998 | ASSERT(bpobj_is_open(bpo)); |
999 | ||
428870ff BB |
1000 | /* |
1001 | * As an optimization, if they want the whole txg range, just | |
1002 | * get bpo_bytes rather than iterating over the bps. | |
1003 | */ | |
1004 | if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp) | |
1005 | return (bpobj_space(bpo, usedp, compp, uncompp)); | |
1006 | ||
1007 | sra.spa = dmu_objset_spa(bpo->bpo_os); | |
1008 | sra.mintxg = mintxg; | |
1009 | sra.maxtxg = maxtxg; | |
1010 | ||
1011 | err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL); | |
1012 | *usedp = sra.used; | |
1013 | *compp = sra.comp; | |
1014 | *uncompp = sra.uncomp; | |
1015 | return (err); | |
1016 | } | |
37f03da8 SH |
1017 | |
1018 | /* | |
1019 | * A bpobj_itor_t to append blkptrs to a bplist. Note that while blkptrs in a | |
1020 | * bpobj are designated as free or allocated that information is not preserved | |
1021 | * in bplists. | |
1022 | */ | |
37f03da8 SH |
1023 | int |
1024 | bplist_append_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, | |
1025 | dmu_tx_t *tx) | |
1026 | { | |
14e4e3cb | 1027 | (void) bp_freed, (void) tx; |
37f03da8 SH |
1028 | bplist_t *bpl = arg; |
1029 | bplist_append(bpl, bp); | |
1030 | return (0); | |
1031 | } |