]>
Commit | Line | Data |
---|---|---|
428870ff BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
df7eeccc | 23 | * Copyright (c) 2011, 2016 by Delphix. All rights reserved. |
9210e43a | 24 | * Copyright (c) 2017 Datto Inc. |
428870ff BB |
25 | */ |
26 | ||
27 | #include <sys/bpobj.h> | |
28 | #include <sys/zfs_context.h> | |
29 | #include <sys/refcount.h> | |
330d06f9 | 30 | #include <sys/dsl_pool.h> |
753c3839 MA |
31 | #include <sys/zfeature.h> |
32 | #include <sys/zap.h> | |
33 | ||
34 | /* | |
35 | * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj). | |
36 | */ | |
37 | uint64_t | |
38 | bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx) | |
39 | { | |
753c3839 MA |
40 | spa_t *spa = dmu_objset_spa(os); |
41 | dsl_pool_t *dp = dmu_objset_pool(os); | |
42 | ||
fa86b5db MA |
43 | if (spa_feature_is_enabled(spa, SPA_FEATURE_EMPTY_BPOBJ)) { |
44 | if (!spa_feature_is_active(spa, SPA_FEATURE_EMPTY_BPOBJ)) { | |
c99c9001 | 45 | ASSERT0(dp->dp_empty_bpobj); |
753c3839 | 46 | dp->dp_empty_bpobj = |
f1512ee6 | 47 | bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx); |
753c3839 MA |
48 | VERIFY(zap_add(os, |
49 | DMU_POOL_DIRECTORY_OBJECT, | |
50 | DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, | |
51 | &dp->dp_empty_bpobj, tx) == 0); | |
52 | } | |
fa86b5db | 53 | spa_feature_incr(spa, SPA_FEATURE_EMPTY_BPOBJ, tx); |
753c3839 MA |
54 | ASSERT(dp->dp_empty_bpobj != 0); |
55 | return (dp->dp_empty_bpobj); | |
56 | } else { | |
57 | return (bpobj_alloc(os, blocksize, tx)); | |
58 | } | |
59 | } | |
60 | ||
61 | void | |
62 | bpobj_decr_empty(objset_t *os, dmu_tx_t *tx) | |
63 | { | |
753c3839 MA |
64 | dsl_pool_t *dp = dmu_objset_pool(os); |
65 | ||
fa86b5db MA |
66 | spa_feature_decr(dmu_objset_spa(os), SPA_FEATURE_EMPTY_BPOBJ, tx); |
67 | if (!spa_feature_is_active(dmu_objset_spa(os), | |
68 | SPA_FEATURE_EMPTY_BPOBJ)) { | |
753c3839 MA |
69 | VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset, |
70 | DMU_POOL_DIRECTORY_OBJECT, | |
71 | DMU_POOL_EMPTY_BPOBJ, tx)); | |
72 | VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx)); | |
73 | dp->dp_empty_bpobj = 0; | |
74 | } | |
75 | } | |
428870ff BB |
76 | |
77 | uint64_t | |
78 | bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) | |
79 | { | |
80 | int size; | |
81 | ||
82 | if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT) | |
83 | size = BPOBJ_SIZE_V0; | |
84 | else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) | |
85 | size = BPOBJ_SIZE_V1; | |
86 | else | |
87 | size = sizeof (bpobj_phys_t); | |
88 | ||
89 | return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize, | |
90 | DMU_OT_BPOBJ_HDR, size, tx)); | |
91 | } | |
92 | ||
93 | void | |
94 | bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) | |
95 | { | |
96 | int64_t i; | |
97 | bpobj_t bpo; | |
98 | dmu_object_info_t doi; | |
99 | int epb; | |
100 | dmu_buf_t *dbuf = NULL; | |
101 | ||
753c3839 | 102 | ASSERT(obj != dmu_objset_pool(os)->dp_empty_bpobj); |
428870ff BB |
103 | VERIFY3U(0, ==, bpobj_open(&bpo, os, obj)); |
104 | ||
105 | mutex_enter(&bpo.bpo_lock); | |
106 | ||
107 | if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0) | |
108 | goto out; | |
109 | ||
110 | VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi)); | |
111 | epb = doi.doi_data_block_size / sizeof (uint64_t); | |
112 | ||
113 | for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { | |
114 | uint64_t *objarray; | |
115 | uint64_t offset, blkoff; | |
116 | ||
117 | offset = i * sizeof (uint64_t); | |
118 | blkoff = P2PHASE(i, epb); | |
119 | ||
120 | if (dbuf == NULL || dbuf->db_offset > offset) { | |
121 | if (dbuf) | |
122 | dmu_buf_rele(dbuf, FTAG); | |
123 | VERIFY3U(0, ==, dmu_buf_hold(os, | |
124 | bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0)); | |
125 | } | |
126 | ||
127 | ASSERT3U(offset, >=, dbuf->db_offset); | |
128 | ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); | |
129 | ||
130 | objarray = dbuf->db_data; | |
131 | bpobj_free(os, objarray[blkoff], tx); | |
132 | } | |
133 | if (dbuf) { | |
134 | dmu_buf_rele(dbuf, FTAG); | |
135 | dbuf = NULL; | |
136 | } | |
137 | VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx)); | |
138 | ||
139 | out: | |
140 | mutex_exit(&bpo.bpo_lock); | |
141 | bpobj_close(&bpo); | |
142 | ||
143 | VERIFY3U(0, ==, dmu_object_free(os, obj, tx)); | |
144 | } | |
145 | ||
146 | int | |
147 | bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) | |
148 | { | |
149 | dmu_object_info_t doi; | |
150 | int err; | |
151 | ||
152 | err = dmu_object_info(os, object, &doi); | |
153 | if (err) | |
154 | return (err); | |
155 | ||
156 | bzero(bpo, sizeof (*bpo)); | |
157 | mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL); | |
158 | ||
159 | ASSERT(bpo->bpo_dbuf == NULL); | |
160 | ASSERT(bpo->bpo_phys == NULL); | |
161 | ASSERT(object != 0); | |
162 | ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); | |
163 | ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); | |
164 | ||
572e2857 BB |
165 | err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf); |
166 | if (err) | |
167 | return (err); | |
168 | ||
428870ff BB |
169 | bpo->bpo_os = os; |
170 | bpo->bpo_object = object; | |
171 | bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT; | |
172 | bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0); | |
173 | bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1); | |
428870ff BB |
174 | bpo->bpo_phys = bpo->bpo_dbuf->db_data; |
175 | return (0); | |
176 | } | |
177 | ||
178 | void | |
179 | bpobj_close(bpobj_t *bpo) | |
180 | { | |
181 | /* Lame workaround for closing a bpobj that was never opened. */ | |
182 | if (bpo->bpo_object == 0) | |
183 | return; | |
184 | ||
185 | dmu_buf_rele(bpo->bpo_dbuf, bpo); | |
186 | if (bpo->bpo_cached_dbuf != NULL) | |
187 | dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); | |
188 | bpo->bpo_dbuf = NULL; | |
189 | bpo->bpo_phys = NULL; | |
190 | bpo->bpo_cached_dbuf = NULL; | |
572e2857 | 191 | bpo->bpo_object = 0; |
428870ff BB |
192 | |
193 | mutex_destroy(&bpo->bpo_lock); | |
194 | } | |
195 | ||
9b67f605 MA |
196 | static boolean_t |
197 | bpobj_hasentries(bpobj_t *bpo) | |
198 | { | |
199 | return (bpo->bpo_phys->bpo_num_blkptrs != 0 || | |
200 | (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs != 0)); | |
201 | } | |
202 | ||
428870ff BB |
203 | static int |
204 | bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, | |
205 | boolean_t free) | |
206 | { | |
207 | dmu_object_info_t doi; | |
208 | int epb; | |
209 | int64_t i; | |
210 | int err = 0; | |
211 | dmu_buf_t *dbuf = NULL; | |
212 | ||
213 | mutex_enter(&bpo->bpo_lock); | |
214 | ||
9210e43a AP |
215 | if (!bpobj_hasentries(bpo)) |
216 | goto out; | |
217 | ||
428870ff BB |
218 | if (free) |
219 | dmu_buf_will_dirty(bpo->bpo_dbuf, tx); | |
220 | ||
221 | for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) { | |
222 | blkptr_t *bparray; | |
223 | blkptr_t *bp; | |
224 | uint64_t offset, blkoff; | |
225 | ||
226 | offset = i * sizeof (blkptr_t); | |
227 | blkoff = P2PHASE(i, bpo->bpo_epb); | |
228 | ||
229 | if (dbuf == NULL || dbuf->db_offset > offset) { | |
230 | if (dbuf) | |
231 | dmu_buf_rele(dbuf, FTAG); | |
232 | err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset, | |
233 | FTAG, &dbuf, 0); | |
234 | if (err) | |
235 | break; | |
236 | } | |
237 | ||
238 | ASSERT3U(offset, >=, dbuf->db_offset); | |
239 | ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); | |
240 | ||
241 | bparray = dbuf->db_data; | |
242 | bp = &bparray[blkoff]; | |
243 | err = func(arg, bp, tx); | |
244 | if (err) | |
245 | break; | |
246 | if (free) { | |
247 | bpo->bpo_phys->bpo_bytes -= | |
248 | bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); | |
249 | ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); | |
250 | if (bpo->bpo_havecomp) { | |
251 | bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp); | |
252 | bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp); | |
253 | } | |
254 | bpo->bpo_phys->bpo_num_blkptrs--; | |
255 | ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0); | |
256 | } | |
257 | } | |
258 | if (dbuf) { | |
259 | dmu_buf_rele(dbuf, FTAG); | |
260 | dbuf = NULL; | |
261 | } | |
262 | if (free) { | |
428870ff | 263 | VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object, |
ee45fbd8 | 264 | (i + 1) * sizeof (blkptr_t), DMU_OBJECT_END, tx)); |
428870ff BB |
265 | } |
266 | if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0) | |
267 | goto out; | |
268 | ||
269 | ASSERT(bpo->bpo_havecomp); | |
270 | err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi); | |
572e2857 BB |
271 | if (err) { |
272 | mutex_exit(&bpo->bpo_lock); | |
428870ff | 273 | return (err); |
572e2857 | 274 | } |
fa86b5db | 275 | ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ); |
428870ff BB |
276 | epb = doi.doi_data_block_size / sizeof (uint64_t); |
277 | ||
278 | for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { | |
279 | uint64_t *objarray; | |
280 | uint64_t offset, blkoff; | |
281 | bpobj_t sublist; | |
282 | uint64_t used_before, comp_before, uncomp_before; | |
283 | uint64_t used_after, comp_after, uncomp_after; | |
284 | ||
285 | offset = i * sizeof (uint64_t); | |
286 | blkoff = P2PHASE(i, epb); | |
287 | ||
288 | if (dbuf == NULL || dbuf->db_offset > offset) { | |
289 | if (dbuf) | |
290 | dmu_buf_rele(dbuf, FTAG); | |
291 | err = dmu_buf_hold(bpo->bpo_os, | |
292 | bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0); | |
293 | if (err) | |
294 | break; | |
295 | } | |
296 | ||
297 | ASSERT3U(offset, >=, dbuf->db_offset); | |
298 | ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); | |
299 | ||
300 | objarray = dbuf->db_data; | |
301 | err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]); | |
302 | if (err) | |
303 | break; | |
304 | if (free) { | |
305 | err = bpobj_space(&sublist, | |
306 | &used_before, &comp_before, &uncomp_before); | |
50f9ea01 WA |
307 | if (err != 0) { |
308 | bpobj_close(&sublist); | |
428870ff | 309 | break; |
50f9ea01 | 310 | } |
428870ff BB |
311 | } |
312 | err = bpobj_iterate_impl(&sublist, func, arg, tx, free); | |
313 | if (free) { | |
314 | VERIFY3U(0, ==, bpobj_space(&sublist, | |
315 | &used_after, &comp_after, &uncomp_after)); | |
316 | bpo->bpo_phys->bpo_bytes -= used_before - used_after; | |
317 | ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); | |
572e2857 | 318 | bpo->bpo_phys->bpo_comp -= comp_before - comp_after; |
428870ff BB |
319 | bpo->bpo_phys->bpo_uncomp -= |
320 | uncomp_before - uncomp_after; | |
321 | } | |
322 | ||
323 | bpobj_close(&sublist); | |
324 | if (err) | |
325 | break; | |
326 | if (free) { | |
327 | err = dmu_object_free(bpo->bpo_os, | |
328 | objarray[blkoff], tx); | |
329 | if (err) | |
330 | break; | |
331 | bpo->bpo_phys->bpo_num_subobjs--; | |
332 | ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0); | |
333 | } | |
334 | } | |
335 | if (dbuf) { | |
336 | dmu_buf_rele(dbuf, FTAG); | |
337 | dbuf = NULL; | |
338 | } | |
339 | if (free) { | |
340 | VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, | |
341 | bpo->bpo_phys->bpo_subobjs, | |
ee45fbd8 | 342 | (i + 1) * sizeof (uint64_t), DMU_OBJECT_END, tx)); |
428870ff BB |
343 | } |
344 | ||
345 | out: | |
346 | /* If there are no entries, there should be no bytes. */ | |
9b67f605 MA |
347 | if (!bpobj_hasentries(bpo)) { |
348 | ASSERT0(bpo->bpo_phys->bpo_bytes); | |
349 | ASSERT0(bpo->bpo_phys->bpo_comp); | |
350 | ASSERT0(bpo->bpo_phys->bpo_uncomp); | |
351 | } | |
428870ff BB |
352 | |
353 | mutex_exit(&bpo->bpo_lock); | |
354 | return (err); | |
355 | } | |
356 | ||
357 | /* | |
358 | * Iterate and remove the entries. If func returns nonzero, iteration | |
359 | * will stop and that entry will not be removed. | |
360 | */ | |
361 | int | |
362 | bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) | |
363 | { | |
364 | return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE)); | |
365 | } | |
366 | ||
367 | /* | |
368 | * Iterate the entries. If func returns nonzero, iteration will stop. | |
369 | */ | |
370 | int | |
371 | bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) | |
372 | { | |
373 | return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE)); | |
374 | } | |
375 | ||
376 | void | |
377 | bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) | |
378 | { | |
379 | bpobj_t subbpo; | |
572e2857 | 380 | uint64_t used, comp, uncomp, subsubobjs; |
428870ff BB |
381 | |
382 | ASSERT(bpo->bpo_havesubobj); | |
383 | ASSERT(bpo->bpo_havecomp); | |
753c3839 MA |
384 | ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj); |
385 | ||
386 | if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) { | |
387 | bpobj_decr_empty(bpo->bpo_os, tx); | |
388 | return; | |
389 | } | |
428870ff BB |
390 | |
391 | VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); | |
392 | VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); | |
428870ff | 393 | |
9b67f605 | 394 | if (!bpobj_hasentries(&subbpo)) { |
428870ff | 395 | /* No point in having an empty subobj. */ |
572e2857 | 396 | bpobj_close(&subbpo); |
428870ff BB |
397 | bpobj_free(bpo->bpo_os, subobj, tx); |
398 | return; | |
399 | } | |
400 | ||
df7eeccc | 401 | mutex_enter(&bpo->bpo_lock); |
428870ff BB |
402 | dmu_buf_will_dirty(bpo->bpo_dbuf, tx); |
403 | if (bpo->bpo_phys->bpo_subobjs == 0) { | |
404 | bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os, | |
f1512ee6 MA |
405 | DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE, |
406 | DMU_OT_NONE, 0, tx); | |
428870ff BB |
407 | } |
408 | ||
1c27024e | 409 | ASSERTV(dmu_object_info_t doi); |
13fe0198 MA |
410 | ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi)); |
411 | ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ); | |
412 | ||
428870ff BB |
413 | dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, |
414 | bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), | |
415 | sizeof (subobj), &subobj, tx); | |
416 | bpo->bpo_phys->bpo_num_subobjs++; | |
572e2857 BB |
417 | |
418 | /* | |
419 | * If subobj has only one block of subobjs, then move subobj's | |
420 | * subobjs to bpo's subobj list directly. This reduces | |
421 | * recursion in bpobj_iterate due to nested subobjs. | |
422 | */ | |
423 | subsubobjs = subbpo.bpo_phys->bpo_subobjs; | |
424 | if (subsubobjs != 0) { | |
425 | dmu_object_info_t doi; | |
426 | ||
427 | VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi)); | |
428 | if (doi.doi_max_offset == doi.doi_data_block_size) { | |
429 | dmu_buf_t *subdb; | |
430 | uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs; | |
431 | ||
432 | VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs, | |
433 | 0, FTAG, &subdb, 0)); | |
d1fada1e MA |
434 | /* |
435 | * Make sure that we are not asking dmu_write() | |
436 | * to write more data than we have in our buffer. | |
437 | */ | |
438 | VERIFY3U(subdb->db_size, >=, | |
439 | numsubsub * sizeof (subobj)); | |
572e2857 BB |
440 | dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, |
441 | bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), | |
442 | numsubsub * sizeof (subobj), subdb->db_data, tx); | |
443 | dmu_buf_rele(subdb, FTAG); | |
444 | bpo->bpo_phys->bpo_num_subobjs += numsubsub; | |
445 | ||
446 | dmu_buf_will_dirty(subbpo.bpo_dbuf, tx); | |
447 | subbpo.bpo_phys->bpo_subobjs = 0; | |
448 | VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os, | |
449 | subsubobjs, tx)); | |
450 | } | |
451 | } | |
428870ff BB |
452 | bpo->bpo_phys->bpo_bytes += used; |
453 | bpo->bpo_phys->bpo_comp += comp; | |
454 | bpo->bpo_phys->bpo_uncomp += uncomp; | |
455 | mutex_exit(&bpo->bpo_lock); | |
572e2857 BB |
456 | |
457 | bpobj_close(&subbpo); | |
428870ff BB |
458 | } |
459 | ||
460 | void | |
461 | bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx) | |
462 | { | |
463 | blkptr_t stored_bp = *bp; | |
464 | uint64_t offset; | |
465 | int blkoff; | |
466 | blkptr_t *bparray; | |
467 | ||
468 | ASSERT(!BP_IS_HOLE(bp)); | |
753c3839 | 469 | ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj); |
428870ff | 470 | |
9b67f605 MA |
471 | if (BP_IS_EMBEDDED(bp)) { |
472 | /* | |
473 | * The bpobj will compress better without the payload. | |
474 | * | |
475 | * Note that we store EMBEDDED bp's because they have an | |
476 | * uncompressed size, which must be accounted for. An | |
477 | * alternative would be to add their size to bpo_uncomp | |
478 | * without storing the bp, but that would create additional | |
479 | * complications: bpo_uncomp would be inconsistent with the | |
480 | * set of BP's stored, and bpobj_iterate() wouldn't visit | |
481 | * all the space accounted for in the bpobj. | |
482 | */ | |
483 | bzero(&stored_bp, sizeof (stored_bp)); | |
484 | stored_bp.blk_prop = bp->blk_prop; | |
485 | stored_bp.blk_birth = bp->blk_birth; | |
486 | } else if (!BP_GET_DEDUP(bp)) { | |
487 | /* The bpobj will compress better without the checksum */ | |
488 | bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum)); | |
489 | } | |
490 | ||
428870ff BB |
491 | /* We never need the fill count. */ |
492 | stored_bp.blk_fill = 0; | |
493 | ||
428870ff BB |
494 | mutex_enter(&bpo->bpo_lock); |
495 | ||
496 | offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp); | |
497 | blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb); | |
498 | ||
499 | if (bpo->bpo_cached_dbuf == NULL || | |
500 | offset < bpo->bpo_cached_dbuf->db_offset || | |
501 | offset >= bpo->bpo_cached_dbuf->db_offset + | |
502 | bpo->bpo_cached_dbuf->db_size) { | |
503 | if (bpo->bpo_cached_dbuf) | |
504 | dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); | |
505 | VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, | |
506 | offset, bpo, &bpo->bpo_cached_dbuf, 0)); | |
507 | } | |
508 | ||
509 | dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx); | |
510 | bparray = bpo->bpo_cached_dbuf->db_data; | |
511 | bparray[blkoff] = stored_bp; | |
512 | ||
513 | dmu_buf_will_dirty(bpo->bpo_dbuf, tx); | |
514 | bpo->bpo_phys->bpo_num_blkptrs++; | |
515 | bpo->bpo_phys->bpo_bytes += | |
516 | bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); | |
517 | if (bpo->bpo_havecomp) { | |
518 | bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp); | |
519 | bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp); | |
520 | } | |
521 | mutex_exit(&bpo->bpo_lock); | |
522 | } | |
523 | ||
524 | struct space_range_arg { | |
525 | spa_t *spa; | |
526 | uint64_t mintxg; | |
527 | uint64_t maxtxg; | |
528 | uint64_t used; | |
529 | uint64_t comp; | |
530 | uint64_t uncomp; | |
531 | }; | |
532 | ||
533 | /* ARGSUSED */ | |
534 | static int | |
535 | space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) | |
536 | { | |
537 | struct space_range_arg *sra = arg; | |
538 | ||
539 | if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { | |
330d06f9 MA |
540 | if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) |
541 | sra->used += bp_get_dsize_sync(sra->spa, bp); | |
542 | else | |
543 | sra->used += bp_get_dsize(sra->spa, bp); | |
428870ff BB |
544 | sra->comp += BP_GET_PSIZE(bp); |
545 | sra->uncomp += BP_GET_UCSIZE(bp); | |
546 | } | |
547 | return (0); | |
548 | } | |
549 | ||
550 | int | |
551 | bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) | |
552 | { | |
553 | mutex_enter(&bpo->bpo_lock); | |
554 | ||
555 | *usedp = bpo->bpo_phys->bpo_bytes; | |
556 | if (bpo->bpo_havecomp) { | |
557 | *compp = bpo->bpo_phys->bpo_comp; | |
558 | *uncompp = bpo->bpo_phys->bpo_uncomp; | |
559 | mutex_exit(&bpo->bpo_lock); | |
560 | return (0); | |
561 | } else { | |
562 | mutex_exit(&bpo->bpo_lock); | |
563 | return (bpobj_space_range(bpo, 0, UINT64_MAX, | |
564 | usedp, compp, uncompp)); | |
565 | } | |
566 | } | |
567 | ||
568 | /* | |
569 | * Return the amount of space in the bpobj which is: | |
570 | * mintxg < blk_birth <= maxtxg | |
571 | */ | |
572 | int | |
573 | bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg, | |
574 | uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) | |
575 | { | |
576 | struct space_range_arg sra = { 0 }; | |
577 | int err; | |
578 | ||
579 | /* | |
580 | * As an optimization, if they want the whole txg range, just | |
581 | * get bpo_bytes rather than iterating over the bps. | |
582 | */ | |
583 | if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp) | |
584 | return (bpobj_space(bpo, usedp, compp, uncompp)); | |
585 | ||
586 | sra.spa = dmu_objset_spa(bpo->bpo_os); | |
587 | sra.mintxg = mintxg; | |
588 | sra.maxtxg = maxtxg; | |
589 | ||
590 | err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL); | |
591 | *usedp = sra.used; | |
592 | *compp = sra.comp; | |
593 | *uncompp = sra.uncomp; | |
594 | return (err); | |
595 | } |