]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zfs/bplist.c
93b7741d77be22927045d2345d5b37693db55c32
[mirror_zfs-debian.git] / module / zfs / bplist.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/bplist.h>
27 #include <sys/zfs_context.h>
28
29 static int
30 bplist_hold(bplist_t *bpl)
31 {
32 ASSERT(MUTEX_HELD(&bpl->bpl_lock));
33 if (bpl->bpl_dbuf == NULL) {
34 int err = dmu_bonus_hold(bpl->bpl_mos,
35 bpl->bpl_object, bpl, &bpl->bpl_dbuf);
36 if (err)
37 return (err);
38 bpl->bpl_phys = bpl->bpl_dbuf->db_data;
39 }
40 return (0);
41 }
42
43 uint64_t
44 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
45 {
46 int size;
47
48 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
49 BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
50
51 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
52 DMU_OT_BPLIST_HDR, size, tx));
53 }
54
55 void
56 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
57 {
58 VERIFY(dmu_object_free(mos, object, tx) == 0);
59 }
60
61 int
62 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
63 {
64 dmu_object_info_t doi;
65 int err;
66
67 err = dmu_object_info(mos, object, &doi);
68 if (err)
69 return (err);
70
71 mutex_enter(&bpl->bpl_lock);
72
73 ASSERT(bpl->bpl_dbuf == NULL);
74 ASSERT(bpl->bpl_phys == NULL);
75 ASSERT(bpl->bpl_cached_dbuf == NULL);
76 ASSERT(bpl->bpl_queue == NULL);
77 ASSERT(object != 0);
78 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
79 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
80
81 bpl->bpl_mos = mos;
82 bpl->bpl_object = object;
83 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
84 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
85 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
86
87 mutex_exit(&bpl->bpl_lock);
88 return (0);
89 }
90
91 void
92 bplist_close(bplist_t *bpl)
93 {
94 mutex_enter(&bpl->bpl_lock);
95
96 ASSERT(bpl->bpl_queue == NULL);
97
98 if (bpl->bpl_cached_dbuf) {
99 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
100 bpl->bpl_cached_dbuf = NULL;
101 }
102 if (bpl->bpl_dbuf) {
103 dmu_buf_rele(bpl->bpl_dbuf, bpl);
104 bpl->bpl_dbuf = NULL;
105 bpl->bpl_phys = NULL;
106 }
107
108 mutex_exit(&bpl->bpl_lock);
109 }
110
111 boolean_t
112 bplist_empty(bplist_t *bpl)
113 {
114 boolean_t rv;
115
116 if (bpl->bpl_object == 0)
117 return (B_TRUE);
118
119 mutex_enter(&bpl->bpl_lock);
120 VERIFY(0 == bplist_hold(bpl)); /* XXX */
121 rv = (bpl->bpl_phys->bpl_entries == 0);
122 mutex_exit(&bpl->bpl_lock);
123
124 return (rv);
125 }
126
127 static int
128 bplist_cache(bplist_t *bpl, uint64_t blkid)
129 {
130 int err = 0;
131
132 if (bpl->bpl_cached_dbuf == NULL ||
133 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
134 if (bpl->bpl_cached_dbuf != NULL)
135 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
136 err = dmu_buf_hold(bpl->bpl_mos,
137 bpl->bpl_object, blkid << bpl->bpl_blockshift,
138 bpl, &bpl->bpl_cached_dbuf);
139 ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
140 1ULL << bpl->bpl_blockshift);
141 }
142 return (err);
143 }
144
145 int
146 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
147 {
148 uint64_t blk, off;
149 blkptr_t *bparray;
150 int err;
151
152 mutex_enter(&bpl->bpl_lock);
153
154 err = bplist_hold(bpl);
155 if (err) {
156 mutex_exit(&bpl->bpl_lock);
157 return (err);
158 }
159
160 if (*itorp >= bpl->bpl_phys->bpl_entries) {
161 mutex_exit(&bpl->bpl_lock);
162 return (ENOENT);
163 }
164
165 blk = *itorp >> bpl->bpl_bpshift;
166 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
167
168 err = bplist_cache(bpl, blk);
169 if (err) {
170 mutex_exit(&bpl->bpl_lock);
171 return (err);
172 }
173
174 bparray = bpl->bpl_cached_dbuf->db_data;
175 *bp = bparray[off];
176 (*itorp)++;
177 mutex_exit(&bpl->bpl_lock);
178 return (0);
179 }
180
181 int
182 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
183 {
184 uint64_t blk, off;
185 blkptr_t *bparray;
186 int err;
187
188 ASSERT(!BP_IS_HOLE(bp));
189 mutex_enter(&bpl->bpl_lock);
190 err = bplist_hold(bpl);
191 if (err)
192 return (err);
193
194 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
195 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
196
197 err = bplist_cache(bpl, blk);
198 if (err) {
199 mutex_exit(&bpl->bpl_lock);
200 return (err);
201 }
202
203 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
204 bparray = bpl->bpl_cached_dbuf->db_data;
205 bparray[off] = *bp;
206
207 /* We never need the fill count. */
208 bparray[off].blk_fill = 0;
209
210 /* The bplist will compress better if we can leave off the checksum */
211 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
212
213 dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
214 bpl->bpl_phys->bpl_entries++;
215 bpl->bpl_phys->bpl_bytes +=
216 bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
217 if (bpl->bpl_havecomp) {
218 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
219 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
220 }
221 mutex_exit(&bpl->bpl_lock);
222
223 return (0);
224 }
225
226 /*
227 * Deferred entry; will be written later by bplist_sync().
228 */
229 void
230 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
231 {
232 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
233
234 ASSERT(!BP_IS_HOLE(bp));
235 mutex_enter(&bpl->bpl_lock);
236 bpq->bpq_blk = *bp;
237 bpq->bpq_next = bpl->bpl_queue;
238 bpl->bpl_queue = bpq;
239 mutex_exit(&bpl->bpl_lock);
240 }
241
242 void
243 bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
244 {
245 bplist_q_t *bpq;
246
247 mutex_enter(&bpl->bpl_lock);
248 while ((bpq = bpl->bpl_queue) != NULL) {
249 bpl->bpl_queue = bpq->bpq_next;
250 mutex_exit(&bpl->bpl_lock);
251 VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
252 kmem_free(bpq, sizeof (*bpq));
253 mutex_enter(&bpl->bpl_lock);
254 }
255 mutex_exit(&bpl->bpl_lock);
256 }
257
258 void
259 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
260 {
261 mutex_enter(&bpl->bpl_lock);
262 ASSERT3P(bpl->bpl_queue, ==, NULL);
263 VERIFY(0 == bplist_hold(bpl));
264 dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
265 VERIFY(0 == dmu_free_range(bpl->bpl_mos,
266 bpl->bpl_object, 0, -1ULL, tx));
267 bpl->bpl_phys->bpl_entries = 0;
268 bpl->bpl_phys->bpl_bytes = 0;
269 if (bpl->bpl_havecomp) {
270 bpl->bpl_phys->bpl_comp = 0;
271 bpl->bpl_phys->bpl_uncomp = 0;
272 }
273 mutex_exit(&bpl->bpl_lock);
274 }
275
276 int
277 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
278 {
279 int err;
280
281 mutex_enter(&bpl->bpl_lock);
282
283 err = bplist_hold(bpl);
284 if (err) {
285 mutex_exit(&bpl->bpl_lock);
286 return (err);
287 }
288
289 *usedp = bpl->bpl_phys->bpl_bytes;
290 if (bpl->bpl_havecomp) {
291 *compp = bpl->bpl_phys->bpl_comp;
292 *uncompp = bpl->bpl_phys->bpl_uncomp;
293 }
294 mutex_exit(&bpl->bpl_lock);
295
296 if (!bpl->bpl_havecomp) {
297 uint64_t itor = 0, comp = 0, uncomp = 0;
298 blkptr_t bp;
299
300 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
301 comp += BP_GET_PSIZE(&bp);
302 uncomp += BP_GET_UCSIZE(&bp);
303 }
304 if (err == ENOENT)
305 err = 0;
306 *compp = comp;
307 *uncompp = uncomp;
308 }
309
310 return (err);
311 }
312
313 /*
314 * Return (in *dasizep) the amount of space on the deadlist which is:
315 * mintxg < blk_birth <= maxtxg
316 */
317 int
318 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg,
319 uint64_t *dasizep)
320 {
321 uint64_t size = 0;
322 uint64_t itor = 0;
323 blkptr_t bp;
324 int err;
325
326 /*
327 * As an optimization, if they want the whole txg range, just
328 * get bpl_bytes rather than iterating over the bps.
329 */
330 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) {
331 mutex_enter(&bpl->bpl_lock);
332 err = bplist_hold(bpl);
333 if (err == 0)
334 *dasizep = bpl->bpl_phys->bpl_bytes;
335 mutex_exit(&bpl->bpl_lock);
336 return (err);
337 }
338
339 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
340 if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) {
341 size +=
342 bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), &bp);
343 }
344 }
345 if (err == ENOENT)
346 err = 0;
347 *dasizep = size;
348 return (err);
349 }