]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zfs/bpobj.c
Fix gcc array subscript above bounds warning
[mirror_zfs-debian.git] / module / zfs / bpobj.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011 by Delphix. All rights reserved.
24 */
25
26 #include <sys/bpobj.h>
27 #include <sys/zfs_context.h>
28 #include <sys/refcount.h>
29 #include <sys/dsl_pool.h>
30
31 uint64_t
32 bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
33 {
34 int size;
35
36 if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT)
37 size = BPOBJ_SIZE_V0;
38 else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
39 size = BPOBJ_SIZE_V1;
40 else
41 size = sizeof (bpobj_phys_t);
42
43 return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize,
44 DMU_OT_BPOBJ_HDR, size, tx));
45 }
46
47 void
48 bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
49 {
50 int64_t i;
51 bpobj_t bpo;
52 dmu_object_info_t doi;
53 int epb;
54 dmu_buf_t *dbuf = NULL;
55
56 VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
57
58 mutex_enter(&bpo.bpo_lock);
59
60 if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0)
61 goto out;
62
63 VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi));
64 epb = doi.doi_data_block_size / sizeof (uint64_t);
65
66 for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
67 uint64_t *objarray;
68 uint64_t offset, blkoff;
69
70 offset = i * sizeof (uint64_t);
71 blkoff = P2PHASE(i, epb);
72
73 if (dbuf == NULL || dbuf->db_offset > offset) {
74 if (dbuf)
75 dmu_buf_rele(dbuf, FTAG);
76 VERIFY3U(0, ==, dmu_buf_hold(os,
77 bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0));
78 }
79
80 ASSERT3U(offset, >=, dbuf->db_offset);
81 ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
82
83 objarray = dbuf->db_data;
84 bpobj_free(os, objarray[blkoff], tx);
85 }
86 if (dbuf) {
87 dmu_buf_rele(dbuf, FTAG);
88 dbuf = NULL;
89 }
90 VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx));
91
92 out:
93 mutex_exit(&bpo.bpo_lock);
94 bpobj_close(&bpo);
95
96 VERIFY3U(0, ==, dmu_object_free(os, obj, tx));
97 }
98
99 int
100 bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
101 {
102 dmu_object_info_t doi;
103 int err;
104
105 err = dmu_object_info(os, object, &doi);
106 if (err)
107 return (err);
108
109 bzero(bpo, sizeof (*bpo));
110 mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
111
112 ASSERT(bpo->bpo_dbuf == NULL);
113 ASSERT(bpo->bpo_phys == NULL);
114 ASSERT(object != 0);
115 ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
116 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
117
118 err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
119 if (err)
120 return (err);
121
122 bpo->bpo_os = os;
123 bpo->bpo_object = object;
124 bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
125 bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
126 bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
127 bpo->bpo_phys = bpo->bpo_dbuf->db_data;
128 return (0);
129 }
130
131 void
132 bpobj_close(bpobj_t *bpo)
133 {
134 /* Lame workaround for closing a bpobj that was never opened. */
135 if (bpo->bpo_object == 0)
136 return;
137
138 dmu_buf_rele(bpo->bpo_dbuf, bpo);
139 if (bpo->bpo_cached_dbuf != NULL)
140 dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
141 bpo->bpo_dbuf = NULL;
142 bpo->bpo_phys = NULL;
143 bpo->bpo_cached_dbuf = NULL;
144 bpo->bpo_object = 0;
145
146 mutex_destroy(&bpo->bpo_lock);
147 }
148
149 static int
150 bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
151 boolean_t free)
152 {
153 dmu_object_info_t doi;
154 int epb;
155 int64_t i;
156 int err = 0;
157 dmu_buf_t *dbuf = NULL;
158
159 mutex_enter(&bpo->bpo_lock);
160
161 if (free)
162 dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
163
164 for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
165 blkptr_t *bparray;
166 blkptr_t *bp;
167 uint64_t offset, blkoff;
168
169 offset = i * sizeof (blkptr_t);
170 blkoff = P2PHASE(i, bpo->bpo_epb);
171
172 if (dbuf == NULL || dbuf->db_offset > offset) {
173 if (dbuf)
174 dmu_buf_rele(dbuf, FTAG);
175 err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
176 FTAG, &dbuf, 0);
177 if (err)
178 break;
179 }
180
181 ASSERT3U(offset, >=, dbuf->db_offset);
182 ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
183
184 bparray = dbuf->db_data;
185 bp = &bparray[blkoff];
186 err = func(arg, bp, tx);
187 if (err)
188 break;
189 if (free) {
190 bpo->bpo_phys->bpo_bytes -=
191 bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
192 ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
193 if (bpo->bpo_havecomp) {
194 bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp);
195 bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp);
196 }
197 bpo->bpo_phys->bpo_num_blkptrs--;
198 ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
199 }
200 }
201 if (dbuf) {
202 dmu_buf_rele(dbuf, FTAG);
203 dbuf = NULL;
204 }
205 if (free) {
206 i++;
207 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
208 i * sizeof (blkptr_t), -1ULL, tx));
209 }
210 if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
211 goto out;
212
213 ASSERT(bpo->bpo_havecomp);
214 err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
215 if (err) {
216 mutex_exit(&bpo->bpo_lock);
217 return (err);
218 }
219 epb = doi.doi_data_block_size / sizeof (uint64_t);
220
221 for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
222 uint64_t *objarray;
223 uint64_t offset, blkoff;
224 bpobj_t sublist;
225 uint64_t used_before, comp_before, uncomp_before;
226 uint64_t used_after, comp_after, uncomp_after;
227
228 offset = i * sizeof (uint64_t);
229 blkoff = P2PHASE(i, epb);
230
231 if (dbuf == NULL || dbuf->db_offset > offset) {
232 if (dbuf)
233 dmu_buf_rele(dbuf, FTAG);
234 err = dmu_buf_hold(bpo->bpo_os,
235 bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0);
236 if (err)
237 break;
238 }
239
240 ASSERT3U(offset, >=, dbuf->db_offset);
241 ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
242
243 objarray = dbuf->db_data;
244 err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]);
245 if (err)
246 break;
247 if (free) {
248 err = bpobj_space(&sublist,
249 &used_before, &comp_before, &uncomp_before);
250 if (err)
251 break;
252 }
253 err = bpobj_iterate_impl(&sublist, func, arg, tx, free);
254 if (free) {
255 VERIFY3U(0, ==, bpobj_space(&sublist,
256 &used_after, &comp_after, &uncomp_after));
257 bpo->bpo_phys->bpo_bytes -= used_before - used_after;
258 ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
259 bpo->bpo_phys->bpo_comp -= comp_before - comp_after;
260 bpo->bpo_phys->bpo_uncomp -=
261 uncomp_before - uncomp_after;
262 }
263
264 bpobj_close(&sublist);
265 if (err)
266 break;
267 if (free) {
268 err = dmu_object_free(bpo->bpo_os,
269 objarray[blkoff], tx);
270 if (err)
271 break;
272 bpo->bpo_phys->bpo_num_subobjs--;
273 ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
274 }
275 }
276 if (dbuf) {
277 dmu_buf_rele(dbuf, FTAG);
278 dbuf = NULL;
279 }
280 if (free) {
281 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
282 bpo->bpo_phys->bpo_subobjs,
283 (i + 1) * sizeof (uint64_t), -1ULL, tx));
284 }
285
286 out:
287 /* If there are no entries, there should be no bytes. */
288 ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 ||
289 (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) ||
290 bpo->bpo_phys->bpo_bytes == 0);
291
292 mutex_exit(&bpo->bpo_lock);
293 return (err);
294 }
295
296 /*
297 * Iterate and remove the entries. If func returns nonzero, iteration
298 * will stop and that entry will not be removed.
299 */
300 int
301 bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
302 {
303 return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
304 }
305
306 /*
307 * Iterate the entries. If func returns nonzero, iteration will stop.
308 */
309 int
310 bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
311 {
312 return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
313 }
314
315 void
316 bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
317 {
318 bpobj_t subbpo;
319 uint64_t used, comp, uncomp, subsubobjs;
320
321 ASSERT(bpo->bpo_havesubobj);
322 ASSERT(bpo->bpo_havecomp);
323
324 VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
325 VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
326
327 if (used == 0) {
328 /* No point in having an empty subobj. */
329 bpobj_close(&subbpo);
330 bpobj_free(bpo->bpo_os, subobj, tx);
331 return;
332 }
333
334 dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
335 if (bpo->bpo_phys->bpo_subobjs == 0) {
336 bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
337 DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
338 }
339
340 mutex_enter(&bpo->bpo_lock);
341 dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
342 bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
343 sizeof (subobj), &subobj, tx);
344 bpo->bpo_phys->bpo_num_subobjs++;
345
346 /*
347 * If subobj has only one block of subobjs, then move subobj's
348 * subobjs to bpo's subobj list directly. This reduces
349 * recursion in bpobj_iterate due to nested subobjs.
350 */
351 subsubobjs = subbpo.bpo_phys->bpo_subobjs;
352 if (subsubobjs != 0) {
353 dmu_object_info_t doi;
354
355 VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi));
356 if (doi.doi_max_offset == doi.doi_data_block_size) {
357 dmu_buf_t *subdb;
358 uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs;
359
360 VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs,
361 0, FTAG, &subdb, 0));
362 dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
363 bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
364 numsubsub * sizeof (subobj), subdb->db_data, tx);
365 dmu_buf_rele(subdb, FTAG);
366 bpo->bpo_phys->bpo_num_subobjs += numsubsub;
367
368 dmu_buf_will_dirty(subbpo.bpo_dbuf, tx);
369 subbpo.bpo_phys->bpo_subobjs = 0;
370 VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os,
371 subsubobjs, tx));
372 }
373 }
374 bpo->bpo_phys->bpo_bytes += used;
375 bpo->bpo_phys->bpo_comp += comp;
376 bpo->bpo_phys->bpo_uncomp += uncomp;
377 mutex_exit(&bpo->bpo_lock);
378
379 bpobj_close(&subbpo);
380 }
381
382 void
383 bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
384 {
385 blkptr_t stored_bp = *bp;
386 uint64_t offset;
387 int blkoff;
388 blkptr_t *bparray;
389
390 ASSERT(!BP_IS_HOLE(bp));
391
392 /* We never need the fill count. */
393 stored_bp.blk_fill = 0;
394
395 /* The bpobj will compress better if we can leave off the checksum */
396 if (!BP_GET_DEDUP(bp))
397 bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
398
399 mutex_enter(&bpo->bpo_lock);
400
401 offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
402 blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb);
403
404 if (bpo->bpo_cached_dbuf == NULL ||
405 offset < bpo->bpo_cached_dbuf->db_offset ||
406 offset >= bpo->bpo_cached_dbuf->db_offset +
407 bpo->bpo_cached_dbuf->db_size) {
408 if (bpo->bpo_cached_dbuf)
409 dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
410 VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
411 offset, bpo, &bpo->bpo_cached_dbuf, 0));
412 }
413
414 dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx);
415 bparray = bpo->bpo_cached_dbuf->db_data;
416 bparray[blkoff] = stored_bp;
417
418 dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
419 bpo->bpo_phys->bpo_num_blkptrs++;
420 bpo->bpo_phys->bpo_bytes +=
421 bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
422 if (bpo->bpo_havecomp) {
423 bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp);
424 bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
425 }
426 mutex_exit(&bpo->bpo_lock);
427 }
428
429 struct space_range_arg {
430 spa_t *spa;
431 uint64_t mintxg;
432 uint64_t maxtxg;
433 uint64_t used;
434 uint64_t comp;
435 uint64_t uncomp;
436 };
437
438 /* ARGSUSED */
439 static int
440 space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
441 {
442 struct space_range_arg *sra = arg;
443
444 if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
445 if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
446 sra->used += bp_get_dsize_sync(sra->spa, bp);
447 else
448 sra->used += bp_get_dsize(sra->spa, bp);
449 sra->comp += BP_GET_PSIZE(bp);
450 sra->uncomp += BP_GET_UCSIZE(bp);
451 }
452 return (0);
453 }
454
455 int
456 bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
457 {
458 mutex_enter(&bpo->bpo_lock);
459
460 *usedp = bpo->bpo_phys->bpo_bytes;
461 if (bpo->bpo_havecomp) {
462 *compp = bpo->bpo_phys->bpo_comp;
463 *uncompp = bpo->bpo_phys->bpo_uncomp;
464 mutex_exit(&bpo->bpo_lock);
465 return (0);
466 } else {
467 mutex_exit(&bpo->bpo_lock);
468 return (bpobj_space_range(bpo, 0, UINT64_MAX,
469 usedp, compp, uncompp));
470 }
471 }
472
473 /*
474 * Return the amount of space in the bpobj which is:
475 * mintxg < blk_birth <= maxtxg
476 */
477 int
478 bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
479 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
480 {
481 struct space_range_arg sra = { 0 };
482 int err;
483
484 /*
485 * As an optimization, if they want the whole txg range, just
486 * get bpo_bytes rather than iterating over the bps.
487 */
488 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
489 return (bpobj_space(bpo, usedp, compp, uncompp));
490
491 sra.spa = dmu_objset_spa(bpo->bpo_os);
492 sra.mintxg = mintxg;
493 sra.maxtxg = maxtxg;
494
495 err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
496 *usedp = sra.used;
497 *compp = sra.comp;
498 *uncompp = sra.uncomp;
499 return (err);
500 }