]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
b128c09f | 22 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f BB |
23 | * Use is subject to license terms. |
24 | */ | |
25 | ||
34dc7c2f BB |
26 | #include <sys/bplist.h> |
27 | #include <sys/zfs_context.h> | |
28 | ||
29 | static int | |
30 | bplist_hold(bplist_t *bpl) | |
31 | { | |
32 | ASSERT(MUTEX_HELD(&bpl->bpl_lock)); | |
33 | if (bpl->bpl_dbuf == NULL) { | |
34 | int err = dmu_bonus_hold(bpl->bpl_mos, | |
35 | bpl->bpl_object, bpl, &bpl->bpl_dbuf); | |
36 | if (err) | |
37 | return (err); | |
38 | bpl->bpl_phys = bpl->bpl_dbuf->db_data; | |
39 | } | |
40 | return (0); | |
41 | } | |
42 | ||
43 | uint64_t | |
44 | bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) | |
45 | { | |
46 | int size; | |
47 | ||
48 | size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? | |
49 | BPLIST_SIZE_V0 : sizeof (bplist_phys_t); | |
50 | ||
51 | return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, | |
52 | DMU_OT_BPLIST_HDR, size, tx)); | |
53 | } | |
54 | ||
55 | void | |
56 | bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx) | |
57 | { | |
58 | VERIFY(dmu_object_free(mos, object, tx) == 0); | |
59 | } | |
60 | ||
61 | int | |
62 | bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) | |
63 | { | |
64 | dmu_object_info_t doi; | |
65 | int err; | |
66 | ||
67 | err = dmu_object_info(mos, object, &doi); | |
68 | if (err) | |
69 | return (err); | |
70 | ||
71 | mutex_enter(&bpl->bpl_lock); | |
72 | ||
73 | ASSERT(bpl->bpl_dbuf == NULL); | |
74 | ASSERT(bpl->bpl_phys == NULL); | |
75 | ASSERT(bpl->bpl_cached_dbuf == NULL); | |
76 | ASSERT(bpl->bpl_queue == NULL); | |
77 | ASSERT(object != 0); | |
78 | ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); | |
79 | ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); | |
80 | ||
81 | bpl->bpl_mos = mos; | |
82 | bpl->bpl_object = object; | |
83 | bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); | |
84 | bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; | |
85 | bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); | |
86 | ||
87 | mutex_exit(&bpl->bpl_lock); | |
88 | return (0); | |
89 | } | |
90 | ||
91 | void | |
92 | bplist_close(bplist_t *bpl) | |
93 | { | |
94 | mutex_enter(&bpl->bpl_lock); | |
95 | ||
96 | ASSERT(bpl->bpl_queue == NULL); | |
97 | ||
98 | if (bpl->bpl_cached_dbuf) { | |
99 | dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); | |
100 | bpl->bpl_cached_dbuf = NULL; | |
101 | } | |
102 | if (bpl->bpl_dbuf) { | |
103 | dmu_buf_rele(bpl->bpl_dbuf, bpl); | |
104 | bpl->bpl_dbuf = NULL; | |
105 | bpl->bpl_phys = NULL; | |
106 | } | |
107 | ||
108 | mutex_exit(&bpl->bpl_lock); | |
109 | } | |
110 | ||
111 | boolean_t | |
112 | bplist_empty(bplist_t *bpl) | |
113 | { | |
114 | boolean_t rv; | |
115 | ||
116 | if (bpl->bpl_object == 0) | |
117 | return (B_TRUE); | |
118 | ||
119 | mutex_enter(&bpl->bpl_lock); | |
120 | VERIFY(0 == bplist_hold(bpl)); /* XXX */ | |
121 | rv = (bpl->bpl_phys->bpl_entries == 0); | |
122 | mutex_exit(&bpl->bpl_lock); | |
123 | ||
124 | return (rv); | |
125 | } | |
126 | ||
127 | static int | |
128 | bplist_cache(bplist_t *bpl, uint64_t blkid) | |
129 | { | |
130 | int err = 0; | |
131 | ||
132 | if (bpl->bpl_cached_dbuf == NULL || | |
133 | bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) { | |
134 | if (bpl->bpl_cached_dbuf != NULL) | |
135 | dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); | |
136 | err = dmu_buf_hold(bpl->bpl_mos, | |
137 | bpl->bpl_object, blkid << bpl->bpl_blockshift, | |
138 | bpl, &bpl->bpl_cached_dbuf); | |
139 | ASSERT(err || bpl->bpl_cached_dbuf->db_size == | |
140 | 1ULL << bpl->bpl_blockshift); | |
141 | } | |
142 | return (err); | |
143 | } | |
144 | ||
145 | int | |
146 | bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp) | |
147 | { | |
148 | uint64_t blk, off; | |
149 | blkptr_t *bparray; | |
150 | int err; | |
151 | ||
152 | mutex_enter(&bpl->bpl_lock); | |
153 | ||
154 | err = bplist_hold(bpl); | |
155 | if (err) { | |
156 | mutex_exit(&bpl->bpl_lock); | |
157 | return (err); | |
158 | } | |
159 | ||
160 | if (*itorp >= bpl->bpl_phys->bpl_entries) { | |
161 | mutex_exit(&bpl->bpl_lock); | |
162 | return (ENOENT); | |
163 | } | |
164 | ||
165 | blk = *itorp >> bpl->bpl_bpshift; | |
166 | off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift); | |
167 | ||
168 | err = bplist_cache(bpl, blk); | |
169 | if (err) { | |
170 | mutex_exit(&bpl->bpl_lock); | |
171 | return (err); | |
172 | } | |
173 | ||
174 | bparray = bpl->bpl_cached_dbuf->db_data; | |
175 | *bp = bparray[off]; | |
176 | (*itorp)++; | |
177 | mutex_exit(&bpl->bpl_lock); | |
178 | return (0); | |
179 | } | |
180 | ||
181 | int | |
b128c09f | 182 | bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx) |
34dc7c2f BB |
183 | { |
184 | uint64_t blk, off; | |
185 | blkptr_t *bparray; | |
186 | int err; | |
187 | ||
188 | ASSERT(!BP_IS_HOLE(bp)); | |
189 | mutex_enter(&bpl->bpl_lock); | |
190 | err = bplist_hold(bpl); | |
191 | if (err) | |
192 | return (err); | |
193 | ||
194 | blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift; | |
195 | off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift); | |
196 | ||
197 | err = bplist_cache(bpl, blk); | |
198 | if (err) { | |
199 | mutex_exit(&bpl->bpl_lock); | |
200 | return (err); | |
201 | } | |
202 | ||
203 | dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx); | |
204 | bparray = bpl->bpl_cached_dbuf->db_data; | |
205 | bparray[off] = *bp; | |
206 | ||
207 | /* We never need the fill count. */ | |
208 | bparray[off].blk_fill = 0; | |
209 | ||
210 | /* The bplist will compress better if we can leave off the checksum */ | |
211 | bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum)); | |
212 | ||
213 | dmu_buf_will_dirty(bpl->bpl_dbuf, tx); | |
214 | bpl->bpl_phys->bpl_entries++; | |
215 | bpl->bpl_phys->bpl_bytes += | |
216 | bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp); | |
217 | if (bpl->bpl_havecomp) { | |
218 | bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); | |
219 | bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); | |
220 | } | |
221 | mutex_exit(&bpl->bpl_lock); | |
222 | ||
223 | return (0); | |
224 | } | |
225 | ||
226 | /* | |
227 | * Deferred entry; will be written later by bplist_sync(). | |
228 | */ | |
229 | void | |
b128c09f | 230 | bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp) |
34dc7c2f BB |
231 | { |
232 | bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP); | |
233 | ||
234 | ASSERT(!BP_IS_HOLE(bp)); | |
235 | mutex_enter(&bpl->bpl_lock); | |
236 | bpq->bpq_blk = *bp; | |
237 | bpq->bpq_next = bpl->bpl_queue; | |
238 | bpl->bpl_queue = bpq; | |
239 | mutex_exit(&bpl->bpl_lock); | |
240 | } | |
241 | ||
242 | void | |
243 | bplist_sync(bplist_t *bpl, dmu_tx_t *tx) | |
244 | { | |
245 | bplist_q_t *bpq; | |
246 | ||
247 | mutex_enter(&bpl->bpl_lock); | |
248 | while ((bpq = bpl->bpl_queue) != NULL) { | |
249 | bpl->bpl_queue = bpq->bpq_next; | |
250 | mutex_exit(&bpl->bpl_lock); | |
251 | VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx)); | |
252 | kmem_free(bpq, sizeof (*bpq)); | |
253 | mutex_enter(&bpl->bpl_lock); | |
254 | } | |
255 | mutex_exit(&bpl->bpl_lock); | |
256 | } | |
257 | ||
258 | void | |
259 | bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) | |
260 | { | |
261 | mutex_enter(&bpl->bpl_lock); | |
262 | ASSERT3P(bpl->bpl_queue, ==, NULL); | |
263 | VERIFY(0 == bplist_hold(bpl)); | |
264 | dmu_buf_will_dirty(bpl->bpl_dbuf, tx); | |
265 | VERIFY(0 == dmu_free_range(bpl->bpl_mos, | |
266 | bpl->bpl_object, 0, -1ULL, tx)); | |
267 | bpl->bpl_phys->bpl_entries = 0; | |
268 | bpl->bpl_phys->bpl_bytes = 0; | |
269 | if (bpl->bpl_havecomp) { | |
270 | bpl->bpl_phys->bpl_comp = 0; | |
271 | bpl->bpl_phys->bpl_uncomp = 0; | |
272 | } | |
273 | mutex_exit(&bpl->bpl_lock); | |
274 | } | |
275 | ||
276 | int | |
277 | bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) | |
278 | { | |
279 | int err; | |
280 | ||
281 | mutex_enter(&bpl->bpl_lock); | |
282 | ||
283 | err = bplist_hold(bpl); | |
284 | if (err) { | |
285 | mutex_exit(&bpl->bpl_lock); | |
286 | return (err); | |
287 | } | |
288 | ||
289 | *usedp = bpl->bpl_phys->bpl_bytes; | |
290 | if (bpl->bpl_havecomp) { | |
291 | *compp = bpl->bpl_phys->bpl_comp; | |
292 | *uncompp = bpl->bpl_phys->bpl_uncomp; | |
293 | } | |
294 | mutex_exit(&bpl->bpl_lock); | |
295 | ||
296 | if (!bpl->bpl_havecomp) { | |
297 | uint64_t itor = 0, comp = 0, uncomp = 0; | |
298 | blkptr_t bp; | |
299 | ||
300 | while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { | |
301 | comp += BP_GET_PSIZE(&bp); | |
302 | uncomp += BP_GET_UCSIZE(&bp); | |
303 | } | |
304 | if (err == ENOENT) | |
305 | err = 0; | |
306 | *compp = comp; | |
307 | *uncompp = uncomp; | |
308 | } | |
309 | ||
310 | return (err); | |
311 | } | |
b128c09f BB |
312 | |
313 | /* | |
314 | * Return (in *dasizep) the amount of space on the deadlist which is: | |
315 | * mintxg < blk_birth <= maxtxg | |
316 | */ | |
317 | int | |
318 | bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg, | |
319 | uint64_t *dasizep) | |
320 | { | |
321 | uint64_t size = 0; | |
322 | uint64_t itor = 0; | |
323 | blkptr_t bp; | |
324 | int err; | |
325 | ||
326 | /* | |
327 | * As an optimization, if they want the whole txg range, just | |
328 | * get bpl_bytes rather than iterating over the bps. | |
329 | */ | |
330 | if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) { | |
331 | mutex_enter(&bpl->bpl_lock); | |
332 | err = bplist_hold(bpl); | |
333 | if (err == 0) | |
334 | *dasizep = bpl->bpl_phys->bpl_bytes; | |
335 | mutex_exit(&bpl->bpl_lock); | |
336 | return (err); | |
337 | } | |
338 | ||
339 | while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { | |
340 | if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) { | |
341 | size += | |
342 | bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), &bp); | |
343 | } | |
344 | } | |
345 | if (err == ENOENT) | |
346 | err = 0; | |
347 | *dasizep = size; | |
348 | return (err); | |
349 | } |