]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
9babb374 | 22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f BB |
23 | * Use is subject to license terms. |
24 | */ | |
c99c9001 | 25 | /* |
9bd274dd | 26 | * Copyright (c) 2012, 2014 by Delphix. All rights reserved. |
c99c9001 | 27 | */ |
34dc7c2f | 28 | |
34dc7c2f BB |
29 | #include <sys/zfs_context.h> |
30 | #include <sys/spa.h> | |
31 | #include <sys/dmu.h> | |
93cf2076 GW |
32 | #include <sys/dmu_tx.h> |
33 | #include <sys/dnode.h> | |
34 | #include <sys/dsl_pool.h> | |
34dc7c2f BB |
35 | #include <sys/zio.h> |
36 | #include <sys/space_map.h> | |
93cf2076 GW |
37 | #include <sys/refcount.h> |
38 | #include <sys/zfeature.h> | |
34dc7c2f BB |
39 | |
40 | /* | |
93cf2076 GW |
41 | * This value controls how the space map's block size is allowed to grow. |
42 | * If the value is set to the same size as SPACE_MAP_INITIAL_BLOCKSIZE then | |
43 | * the space map block size will remain fixed. Setting this value to something | |
44 | * greater than SPACE_MAP_INITIAL_BLOCKSIZE will allow the space map to | |
45 | * increase its block size as needed. To maintain backwards compatibilty the | |
46 | * space map's block size must be a power of 2 and SPACE_MAP_INITIAL_BLOCKSIZE | |
47 | * or larger. | |
34dc7c2f | 48 | */ |
93cf2076 | 49 | int space_map_max_blksz = (1 << 12); |
34dc7c2f BB |
50 | |
51 | /* | |
93cf2076 GW |
52 | * Load the space map disk into the specified range tree. Segments of maptype |
53 | * are added to the range tree, other segment types are removed. | |
54 | * | |
34dc7c2f BB |
55 | * Note: space_map_load() will drop sm_lock across dmu_read() calls. |
56 | * The caller must be OK with this. | |
57 | */ | |
58 | int | |
93cf2076 | 59 | space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype) |
34dc7c2f BB |
60 | { |
61 | uint64_t *entry, *entry_map, *entry_map_end; | |
62 | uint64_t bufsize, size, offset, end, space; | |
34dc7c2f BB |
63 | int error = 0; |
64 | ||
65 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
34dc7c2f | 66 | |
93cf2076 GW |
67 | end = space_map_length(sm); |
68 | space = space_map_allocated(sm); | |
34dc7c2f | 69 | |
93cf2076 | 70 | VERIFY0(range_tree_space(rt)); |
34dc7c2f BB |
71 | |
72 | if (maptype == SM_FREE) { | |
93cf2076 | 73 | range_tree_add(rt, sm->sm_start, sm->sm_size); |
34dc7c2f BB |
74 | space = sm->sm_size - space; |
75 | } | |
76 | ||
93cf2076 | 77 | bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE); |
34dc7c2f BB |
78 | entry_map = zio_buf_alloc(bufsize); |
79 | ||
80 | mutex_exit(sm->sm_lock); | |
93cf2076 GW |
81 | if (end > bufsize) { |
82 | dmu_prefetch(sm->sm_os, space_map_object(sm), bufsize, | |
83 | end - bufsize); | |
84 | } | |
34dc7c2f BB |
85 | mutex_enter(sm->sm_lock); |
86 | ||
87 | for (offset = 0; offset < end; offset += bufsize) { | |
88 | size = MIN(end - offset, bufsize); | |
89 | VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0); | |
90 | VERIFY(size != 0); | |
93cf2076 | 91 | ASSERT3U(sm->sm_blksz, !=, 0); |
34dc7c2f BB |
92 | |
93 | dprintf("object=%llu offset=%llx size=%llx\n", | |
93cf2076 | 94 | space_map_object(sm), offset, size); |
34dc7c2f BB |
95 | |
96 | mutex_exit(sm->sm_lock); | |
93cf2076 GW |
97 | error = dmu_read(sm->sm_os, space_map_object(sm), offset, size, |
98 | entry_map, DMU_READ_PREFETCH); | |
34dc7c2f BB |
99 | mutex_enter(sm->sm_lock); |
100 | if (error != 0) | |
101 | break; | |
102 | ||
103 | entry_map_end = entry_map + (size / sizeof (uint64_t)); | |
104 | for (entry = entry_map; entry < entry_map_end; entry++) { | |
105 | uint64_t e = *entry; | |
93cf2076 | 106 | uint64_t offset, size; |
34dc7c2f BB |
107 | |
108 | if (SM_DEBUG_DECODE(e)) /* Skip debug entries */ | |
109 | continue; | |
110 | ||
93cf2076 GW |
111 | offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) + |
112 | sm->sm_start; | |
113 | size = SM_RUN_DECODE(e) << sm->sm_shift; | |
114 | ||
115 | VERIFY0(P2PHASE(offset, 1ULL << sm->sm_shift)); | |
116 | VERIFY0(P2PHASE(size, 1ULL << sm->sm_shift)); | |
117 | VERIFY3U(offset, >=, sm->sm_start); | |
118 | VERIFY3U(offset + size, <=, sm->sm_start + sm->sm_size); | |
119 | if (SM_TYPE_DECODE(e) == maptype) { | |
120 | VERIFY3U(range_tree_space(rt) + size, <=, | |
121 | sm->sm_size); | |
122 | range_tree_add(rt, offset, size); | |
123 | } else { | |
124 | range_tree_remove(rt, offset, size); | |
125 | } | |
34dc7c2f BB |
126 | } |
127 | } | |
128 | ||
93cf2076 GW |
129 | if (error == 0) |
130 | VERIFY3U(range_tree_space(rt), ==, space); | |
131 | else | |
132 | range_tree_vacate(rt, NULL, NULL); | |
34dc7c2f BB |
133 | |
134 | zio_buf_free(entry_map, bufsize); | |
34dc7c2f BB |
135 | return (error); |
136 | } | |
137 | ||
138 | void | |
93cf2076 | 139 | space_map_histogram_clear(space_map_t *sm) |
34dc7c2f | 140 | { |
93cf2076 GW |
141 | if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) |
142 | return; | |
34dc7c2f | 143 | |
93cf2076 GW |
144 | bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram)); |
145 | } | |
34dc7c2f | 146 | |
93cf2076 GW |
147 | boolean_t |
148 | space_map_histogram_verify(space_map_t *sm, range_tree_t *rt) | |
149 | { | |
150 | int i; | |
34dc7c2f | 151 | |
93cf2076 GW |
152 | /* |
153 | * Verify that the in-core range tree does not have any | |
154 | * ranges smaller than our sm_shift size. | |
155 | */ | |
156 | for (i = 0; i < sm->sm_shift; i++) { | |
157 | if (rt->rt_histogram[i] != 0) | |
158 | return (B_FALSE); | |
159 | } | |
160 | return (B_TRUE); | |
34dc7c2f BB |
161 | } |
162 | ||
93cf2076 GW |
163 | void |
164 | space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx) | |
9babb374 | 165 | { |
93cf2076 GW |
166 | int idx = 0; |
167 | int i; | |
168 | ||
169 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
170 | ASSERT(dmu_tx_is_syncing(tx)); | |
171 | VERIFY3U(space_map_object(sm), !=, 0); | |
172 | ||
173 | if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) | |
174 | return; | |
175 | ||
176 | dmu_buf_will_dirty(sm->sm_dbuf, tx); | |
177 | ||
178 | ASSERT(space_map_histogram_verify(sm, rt)); | |
179 | ||
180 | /* | |
181 | * Transfer the content of the range tree histogram to the space | |
182 | * map histogram. The space map histogram contains 32 buckets ranging | |
183 | * between 2^sm_shift to 2^(32+sm_shift-1). The range tree, | |
184 | * however, can represent ranges from 2^0 to 2^63. Since the space | |
185 | * map only cares about allocatable blocks (minimum of sm_shift) we | |
186 | * can safely ignore all ranges in the range tree smaller than sm_shift. | |
187 | */ | |
188 | for (i = sm->sm_shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { | |
189 | ||
190 | /* | |
191 | * Since the largest histogram bucket in the space map is | |
192 | * 2^(32+sm_shift-1), we need to normalize the values in | |
193 | * the range tree for any bucket larger than that size. For | |
194 | * example given an sm_shift of 9, ranges larger than 2^40 | |
195 | * would get normalized as if they were 1TB ranges. Assume | |
196 | * the range tree had a count of 5 in the 2^44 (16TB) bucket, | |
197 | * the calculation below would normalize this to 5 * 2^4 (16). | |
198 | */ | |
199 | ASSERT3U(i, >=, idx + sm->sm_shift); | |
200 | sm->sm_phys->smp_histogram[idx] += | |
201 | rt->rt_histogram[i] << (i - idx - sm->sm_shift); | |
202 | ||
203 | /* | |
204 | * Increment the space map's index as long as we haven't | |
205 | * reached the maximum bucket size. Accumulate all ranges | |
206 | * larger than the max bucket size into the last bucket. | |
207 | */ | |
208 | if (idx < SPACE_MAP_HISTOGRAM_SIZE(sm) - 1) { | |
209 | ASSERT3U(idx + sm->sm_shift, ==, i); | |
210 | idx++; | |
211 | ASSERT3U(idx, <, SPACE_MAP_HISTOGRAM_SIZE(sm)); | |
212 | } | |
213 | } | |
9babb374 BB |
214 | } |
215 | ||
34dc7c2f | 216 | uint64_t |
93cf2076 | 217 | space_map_entries(space_map_t *sm, range_tree_t *rt) |
34dc7c2f | 218 | { |
93cf2076 GW |
219 | avl_tree_t *t = &rt->rt_root; |
220 | range_seg_t *rs; | |
221 | uint64_t size, entries; | |
34dc7c2f | 222 | |
93cf2076 GW |
223 | /* |
224 | * All space_maps always have a debug entry so account for it here. | |
225 | */ | |
226 | entries = 1; | |
34dc7c2f | 227 | |
93cf2076 GW |
228 | /* |
229 | * Traverse the range tree and calculate the number of space map | |
230 | * entries that would be required to write out the range tree. | |
231 | */ | |
232 | for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) { | |
233 | size = (rs->rs_end - rs->rs_start) >> sm->sm_shift; | |
234 | entries += howmany(size, SM_RUN_MAX); | |
235 | } | |
236 | return (entries); | |
34dc7c2f BB |
237 | } |
238 | ||
239 | void | |
93cf2076 | 240 | space_map_set_blocksize(space_map_t *sm, uint64_t size, dmu_tx_t *tx) |
34dc7c2f | 241 | { |
93cf2076 GW |
242 | uint32_t blksz; |
243 | u_longlong_t blocks; | |
244 | ||
245 | ASSERT3U(sm->sm_blksz, !=, 0); | |
246 | ASSERT3U(space_map_object(sm), !=, 0); | |
247 | ASSERT(sm->sm_dbuf != NULL); | |
248 | VERIFY(ISP2(space_map_max_blksz)); | |
249 | ||
250 | if (sm->sm_blksz >= space_map_max_blksz) | |
251 | return; | |
252 | ||
253 | /* | |
254 | * The object contains more than one block so we can't adjust | |
255 | * its size. | |
256 | */ | |
257 | if (sm->sm_phys->smp_objsize > sm->sm_blksz) | |
258 | return; | |
259 | ||
260 | if (size > sm->sm_blksz) { | |
261 | uint64_t newsz; | |
262 | ||
263 | /* | |
264 | * Older software versions treat space map blocks as fixed | |
265 | * entities. The DMU is capable of handling different block | |
266 | * sizes making it possible for us to increase the | |
267 | * block size and maintain backwards compatibility. The | |
268 | * caveat is that the new block sizes must be a | |
269 | * power of 2 so that old software can append to the file, | |
270 | * adding more blocks. The block size can grow until it | |
271 | * reaches space_map_max_blksz. | |
272 | */ | |
9bd274dd | 273 | newsz = ISP2(size) ? size : 1ULL << highbit64(size); |
93cf2076 GW |
274 | if (newsz > space_map_max_blksz) |
275 | newsz = space_map_max_blksz; | |
276 | ||
277 | VERIFY0(dmu_object_set_blocksize(sm->sm_os, | |
278 | space_map_object(sm), newsz, 0, tx)); | |
279 | dmu_object_size_from_db(sm->sm_dbuf, &blksz, &blocks); | |
280 | ||
281 | zfs_dbgmsg("txg %llu, spa %s, increasing blksz from %d to %d", | |
282 | dmu_tx_get_txg(tx), spa_name(dmu_objset_spa(sm->sm_os)), | |
283 | sm->sm_blksz, blksz); | |
284 | ||
285 | VERIFY3U(newsz, ==, blksz); | |
286 | VERIFY3U(sm->sm_blksz, <, blksz); | |
287 | sm->sm_blksz = blksz; | |
288 | } | |
34dc7c2f BB |
289 | } |
290 | ||
291 | /* | |
93cf2076 | 292 | * Note: space_map_write() will drop sm_lock across dmu_write() calls. |
34dc7c2f BB |
293 | */ |
294 | void | |
93cf2076 GW |
295 | space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype, |
296 | dmu_tx_t *tx) | |
34dc7c2f | 297 | { |
93cf2076 | 298 | objset_t *os = sm->sm_os; |
34dc7c2f | 299 | spa_t *spa = dmu_objset_spa(os); |
93cf2076 GW |
300 | avl_tree_t *t = &rt->rt_root; |
301 | range_seg_t *rs; | |
302 | uint64_t size, total, rt_space, nodes; | |
34dc7c2f | 303 | uint64_t *entry, *entry_map, *entry_map_end; |
93cf2076 | 304 | uint64_t newsz, expected_entries, actual_entries = 1; |
34dc7c2f | 305 | |
93cf2076 GW |
306 | ASSERT(MUTEX_HELD(rt->rt_lock)); |
307 | ASSERT(dsl_pool_sync_context(dmu_objset_pool(os))); | |
308 | VERIFY3U(space_map_object(sm), !=, 0); | |
309 | dmu_buf_will_dirty(sm->sm_dbuf, tx); | |
34dc7c2f | 310 | |
93cf2076 GW |
311 | /* |
312 | * This field is no longer necessary since the in-core space map | |
313 | * now contains the object number but is maintained for backwards | |
314 | * compatibility. | |
315 | */ | |
316 | sm->sm_phys->smp_object = sm->sm_object; | |
34dc7c2f | 317 | |
93cf2076 GW |
318 | if (range_tree_space(rt) == 0) { |
319 | VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object); | |
320 | return; | |
321 | } | |
34dc7c2f BB |
322 | |
323 | if (maptype == SM_ALLOC) | |
93cf2076 | 324 | sm->sm_phys->smp_alloc += range_tree_space(rt); |
34dc7c2f | 325 | else |
93cf2076 | 326 | sm->sm_phys->smp_alloc -= range_tree_space(rt); |
34dc7c2f | 327 | |
93cf2076 GW |
328 | expected_entries = space_map_entries(sm, rt); |
329 | ||
330 | /* | |
331 | * Calculate the new size for the space map on-disk and see if | |
332 | * we can grow the block size to accommodate the new size. | |
333 | */ | |
334 | newsz = sm->sm_phys->smp_objsize + expected_entries * sizeof (uint64_t); | |
335 | space_map_set_blocksize(sm, newsz, tx); | |
336 | ||
337 | entry_map = zio_buf_alloc(sm->sm_blksz); | |
338 | entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t)); | |
34dc7c2f BB |
339 | entry = entry_map; |
340 | ||
341 | *entry++ = SM_DEBUG_ENCODE(1) | | |
342 | SM_DEBUG_ACTION_ENCODE(maptype) | | |
343 | SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) | | |
344 | SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); | |
345 | ||
e51be066 | 346 | total = 0; |
93cf2076 GW |
347 | nodes = avl_numnodes(&rt->rt_root); |
348 | rt_space = range_tree_space(rt); | |
349 | for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) { | |
350 | uint64_t start; | |
351 | ||
352 | size = (rs->rs_end - rs->rs_start) >> sm->sm_shift; | |
353 | start = (rs->rs_start - sm->sm_start) >> sm->sm_shift; | |
34dc7c2f | 354 | |
93cf2076 GW |
355 | total += size << sm->sm_shift; |
356 | ||
357 | while (size != 0) { | |
358 | uint64_t run_len; | |
34dc7c2f | 359 | |
34dc7c2f BB |
360 | run_len = MIN(size, SM_RUN_MAX); |
361 | ||
362 | if (entry == entry_map_end) { | |
93cf2076 GW |
363 | mutex_exit(rt->rt_lock); |
364 | dmu_write(os, space_map_object(sm), | |
365 | sm->sm_phys->smp_objsize, sm->sm_blksz, | |
366 | entry_map, tx); | |
367 | mutex_enter(rt->rt_lock); | |
368 | sm->sm_phys->smp_objsize += sm->sm_blksz; | |
34dc7c2f BB |
369 | entry = entry_map; |
370 | } | |
371 | ||
372 | *entry++ = SM_OFFSET_ENCODE(start) | | |
373 | SM_TYPE_ENCODE(maptype) | | |
374 | SM_RUN_ENCODE(run_len); | |
375 | ||
376 | start += run_len; | |
377 | size -= run_len; | |
93cf2076 | 378 | actual_entries++; |
34dc7c2f | 379 | } |
34dc7c2f BB |
380 | } |
381 | ||
382 | if (entry != entry_map) { | |
383 | size = (entry - entry_map) * sizeof (uint64_t); | |
93cf2076 GW |
384 | mutex_exit(rt->rt_lock); |
385 | dmu_write(os, space_map_object(sm), sm->sm_phys->smp_objsize, | |
34dc7c2f | 386 | size, entry_map, tx); |
93cf2076 GW |
387 | mutex_enter(rt->rt_lock); |
388 | sm->sm_phys->smp_objsize += size; | |
34dc7c2f | 389 | } |
93cf2076 | 390 | ASSERT3U(expected_entries, ==, actual_entries); |
34dc7c2f | 391 | |
55d85d5a GW |
392 | /* |
393 | * Ensure that the space_map's accounting wasn't changed | |
394 | * while we were in the middle of writing it out. | |
395 | */ | |
93cf2076 GW |
396 | VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root)); |
397 | VERIFY3U(range_tree_space(rt), ==, rt_space); | |
398 | VERIFY3U(range_tree_space(rt), ==, total); | |
55d85d5a | 399 | |
93cf2076 | 400 | zio_buf_free(entry_map, sm->sm_blksz); |
34dc7c2f BB |
401 | } |
402 | ||
93cf2076 GW |
403 | static int |
404 | space_map_open_impl(space_map_t *sm) | |
34dc7c2f | 405 | { |
93cf2076 GW |
406 | int error; |
407 | u_longlong_t blocks; | |
408 | ||
409 | error = dmu_bonus_hold(sm->sm_os, sm->sm_object, sm, &sm->sm_dbuf); | |
410 | if (error) | |
411 | return (error); | |
34dc7c2f | 412 | |
93cf2076 GW |
413 | dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks); |
414 | sm->sm_phys = sm->sm_dbuf->db_data; | |
415 | return (0); | |
34dc7c2f | 416 | } |
fb5f0bc8 | 417 | |
93cf2076 GW |
418 | int |
419 | space_map_open(space_map_t **smp, objset_t *os, uint64_t object, | |
420 | uint64_t start, uint64_t size, uint8_t shift, kmutex_t *lp) | |
fb5f0bc8 | 421 | { |
93cf2076 GW |
422 | space_map_t *sm; |
423 | int error; | |
fb5f0bc8 | 424 | |
93cf2076 GW |
425 | ASSERT(*smp == NULL); |
426 | ASSERT(os != NULL); | |
427 | ASSERT(object != 0); | |
fb5f0bc8 | 428 | |
93cf2076 | 429 | sm = kmem_alloc(sizeof (space_map_t), KM_PUSHPAGE); |
fb5f0bc8 | 430 | |
93cf2076 GW |
431 | sm->sm_start = start; |
432 | sm->sm_size = size; | |
433 | sm->sm_shift = shift; | |
434 | sm->sm_lock = lp; | |
435 | sm->sm_os = os; | |
436 | sm->sm_object = object; | |
437 | sm->sm_length = 0; | |
438 | sm->sm_alloc = 0; | |
439 | sm->sm_blksz = 0; | |
440 | sm->sm_dbuf = NULL; | |
441 | sm->sm_phys = NULL; | |
442 | ||
443 | error = space_map_open_impl(sm); | |
444 | if (error != 0) { | |
445 | space_map_close(sm); | |
446 | return (error); | |
447 | } | |
fb5f0bc8 | 448 | |
93cf2076 GW |
449 | *smp = sm; |
450 | ||
451 | return (0); | |
fb5f0bc8 BB |
452 | } |
453 | ||
454 | void | |
93cf2076 | 455 | space_map_close(space_map_t *sm) |
fb5f0bc8 | 456 | { |
93cf2076 GW |
457 | if (sm == NULL) |
458 | return; | |
fb5f0bc8 | 459 | |
93cf2076 GW |
460 | if (sm->sm_dbuf != NULL) |
461 | dmu_buf_rele(sm->sm_dbuf, sm); | |
462 | sm->sm_dbuf = NULL; | |
463 | sm->sm_phys = NULL; | |
fb5f0bc8 | 464 | |
93cf2076 | 465 | kmem_free(sm, sizeof (*sm)); |
fb5f0bc8 BB |
466 | } |
467 | ||
468 | static void | |
93cf2076 | 469 | space_map_reallocate(space_map_t *sm, dmu_tx_t *tx) |
fb5f0bc8 | 470 | { |
93cf2076 | 471 | ASSERT(dmu_tx_is_syncing(tx)); |
fb5f0bc8 | 472 | |
93cf2076 GW |
473 | space_map_free(sm, tx); |
474 | dmu_buf_rele(sm->sm_dbuf, sm); | |
fb5f0bc8 | 475 | |
93cf2076 GW |
476 | sm->sm_object = space_map_alloc(sm->sm_os, tx); |
477 | VERIFY0(space_map_open_impl(sm)); | |
fb5f0bc8 BB |
478 | } |
479 | ||
480 | void | |
93cf2076 | 481 | space_map_truncate(space_map_t *sm, dmu_tx_t *tx) |
fb5f0bc8 | 482 | { |
93cf2076 GW |
483 | objset_t *os = sm->sm_os; |
484 | spa_t *spa = dmu_objset_spa(os); | |
93cf2076 GW |
485 | dmu_object_info_t doi; |
486 | int bonuslen; | |
487 | ||
488 | ASSERT(dsl_pool_sync_context(dmu_objset_pool(os))); | |
489 | ASSERT(dmu_tx_is_syncing(tx)); | |
490 | ||
491 | VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx)); | |
492 | dmu_object_info_from_db(sm->sm_dbuf, &doi); | |
493 | ||
fa86b5db | 494 | if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { |
93cf2076 GW |
495 | bonuslen = sizeof (space_map_phys_t); |
496 | ASSERT3U(bonuslen, <=, dmu_bonus_max()); | |
497 | } else { | |
498 | bonuslen = SPACE_MAP_SIZE_V0; | |
499 | } | |
500 | ||
501 | if (bonuslen != doi.doi_bonus_size || | |
502 | doi.doi_data_block_size != SPACE_MAP_INITIAL_BLOCKSIZE) { | |
503 | zfs_dbgmsg("txg %llu, spa %s, reallocating: " | |
504 | "old bonus %u, old blocksz %u", dmu_tx_get_txg(tx), | |
505 | spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size); | |
506 | space_map_reallocate(sm, tx); | |
507 | VERIFY3U(sm->sm_blksz, ==, SPACE_MAP_INITIAL_BLOCKSIZE); | |
508 | } | |
509 | ||
510 | dmu_buf_will_dirty(sm->sm_dbuf, tx); | |
511 | sm->sm_phys->smp_objsize = 0; | |
512 | sm->sm_phys->smp_alloc = 0; | |
fb5f0bc8 BB |
513 | } |
514 | ||
515 | /* | |
93cf2076 | 516 | * Update the in-core space_map allocation and length values. |
fb5f0bc8 BB |
517 | */ |
518 | void | |
93cf2076 | 519 | space_map_update(space_map_t *sm) |
fb5f0bc8 | 520 | { |
93cf2076 GW |
521 | if (sm == NULL) |
522 | return; | |
fb5f0bc8 BB |
523 | |
524 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
525 | ||
93cf2076 GW |
526 | sm->sm_alloc = sm->sm_phys->smp_alloc; |
527 | sm->sm_length = sm->sm_phys->smp_objsize; | |
528 | } | |
529 | ||
530 | uint64_t | |
531 | space_map_alloc(objset_t *os, dmu_tx_t *tx) | |
532 | { | |
533 | spa_t *spa = dmu_objset_spa(os); | |
93cf2076 GW |
534 | uint64_t object; |
535 | int bonuslen; | |
536 | ||
fa86b5db MA |
537 | if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { |
538 | spa_feature_incr(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM, tx); | |
93cf2076 GW |
539 | bonuslen = sizeof (space_map_phys_t); |
540 | ASSERT3U(bonuslen, <=, dmu_bonus_max()); | |
541 | } else { | |
542 | bonuslen = SPACE_MAP_SIZE_V0; | |
543 | } | |
544 | ||
545 | object = dmu_object_alloc(os, | |
546 | DMU_OT_SPACE_MAP, SPACE_MAP_INITIAL_BLOCKSIZE, | |
547 | DMU_OT_SPACE_MAP_HEADER, bonuslen, tx); | |
548 | ||
549 | return (object); | |
fb5f0bc8 BB |
550 | } |
551 | ||
fb5f0bc8 | 552 | void |
93cf2076 | 553 | space_map_free(space_map_t *sm, dmu_tx_t *tx) |
fb5f0bc8 | 554 | { |
93cf2076 | 555 | spa_t *spa; |
fb5f0bc8 | 556 | |
93cf2076 GW |
557 | if (sm == NULL) |
558 | return; | |
fb5f0bc8 | 559 | |
93cf2076 | 560 | spa = dmu_objset_spa(sm->sm_os); |
fa86b5db | 561 | if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { |
93cf2076 | 562 | dmu_object_info_t doi; |
fb5f0bc8 | 563 | |
93cf2076 GW |
564 | dmu_object_info_from_db(sm->sm_dbuf, &doi); |
565 | if (doi.doi_bonus_size != SPACE_MAP_SIZE_V0) { | |
fa86b5db MA |
566 | VERIFY(spa_feature_is_active(spa, |
567 | SPA_FEATURE_SPACEMAP_HISTOGRAM)); | |
568 | spa_feature_decr(spa, | |
569 | SPA_FEATURE_SPACEMAP_HISTOGRAM, tx); | |
fb5f0bc8 BB |
570 | } |
571 | } | |
93cf2076 GW |
572 | |
573 | VERIFY3U(dmu_object_free(sm->sm_os, space_map_object(sm), tx), ==, 0); | |
574 | sm->sm_object = 0; | |
575 | } | |
576 | ||
577 | uint64_t | |
578 | space_map_object(space_map_t *sm) | |
579 | { | |
580 | return (sm != NULL ? sm->sm_object : 0); | |
581 | } | |
582 | ||
583 | /* | |
584 | * Returns the already synced, on-disk allocated space. | |
585 | */ | |
586 | uint64_t | |
587 | space_map_allocated(space_map_t *sm) | |
588 | { | |
589 | return (sm != NULL ? sm->sm_alloc : 0); | |
590 | } | |
591 | ||
592 | /* | |
593 | * Returns the already synced, on-disk length; | |
594 | */ | |
595 | uint64_t | |
596 | space_map_length(space_map_t *sm) | |
597 | { | |
598 | return (sm != NULL ? sm->sm_length : 0); | |
599 | } | |
600 | ||
601 | /* | |
602 | * Returns the allocated space that is currently syncing. | |
603 | */ | |
604 | int64_t | |
605 | space_map_alloc_delta(space_map_t *sm) | |
606 | { | |
607 | if (sm == NULL) | |
608 | return (0); | |
609 | ASSERT(sm->sm_dbuf != NULL); | |
610 | return (sm->sm_phys->smp_alloc - space_map_allocated(sm)); | |
fb5f0bc8 | 611 | } |