]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
9babb374 | 22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f BB |
23 | * Use is subject to license terms. |
24 | */ | |
c06d4368 AX |
25 | /* |
26 | * Copyright (c) 2012 by Delphix. All rights reserved. | |
27 | */ | |
34dc7c2f | 28 | |
34dc7c2f BB |
29 | #include <sys/zfs_context.h> |
30 | #include <sys/spa.h> | |
31 | #include <sys/dmu.h> | |
32 | #include <sys/zio.h> | |
33 | #include <sys/space_map.h> | |
34 | ||
c06d4368 AX |
35 | static kmem_cache_t *space_seg_cache; |
36 | ||
37 | void | |
38 | space_map_init(void) | |
39 | { | |
40 | ASSERT(space_seg_cache == NULL); | |
41 | space_seg_cache = kmem_cache_create("space_seg_cache", | |
42 | sizeof (space_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); | |
43 | } | |
44 | ||
45 | void | |
46 | space_map_fini(void) | |
47 | { | |
48 | kmem_cache_destroy(space_seg_cache); | |
49 | space_seg_cache = NULL; | |
50 | } | |
51 | ||
34dc7c2f BB |
52 | /* |
53 | * Space map routines. | |
54 | * NOTE: caller is responsible for all locking. | |
55 | */ | |
56 | static int | |
57 | space_map_seg_compare(const void *x1, const void *x2) | |
58 | { | |
59 | const space_seg_t *s1 = x1; | |
60 | const space_seg_t *s2 = x2; | |
61 | ||
62 | if (s1->ss_start < s2->ss_start) { | |
63 | if (s1->ss_end > s2->ss_start) | |
64 | return (0); | |
65 | return (-1); | |
66 | } | |
67 | if (s1->ss_start > s2->ss_start) { | |
68 | if (s1->ss_start < s2->ss_end) | |
69 | return (0); | |
70 | return (1); | |
71 | } | |
72 | return (0); | |
73 | } | |
74 | ||
75 | void | |
76 | space_map_create(space_map_t *sm, uint64_t start, uint64_t size, uint8_t shift, | |
77 | kmutex_t *lp) | |
78 | { | |
79 | bzero(sm, sizeof (*sm)); | |
80 | ||
fb5f0bc8 BB |
81 | cv_init(&sm->sm_load_cv, NULL, CV_DEFAULT, NULL); |
82 | ||
34dc7c2f BB |
83 | avl_create(&sm->sm_root, space_map_seg_compare, |
84 | sizeof (space_seg_t), offsetof(struct space_seg, ss_node)); | |
85 | ||
86 | sm->sm_start = start; | |
87 | sm->sm_size = size; | |
88 | sm->sm_shift = shift; | |
89 | sm->sm_lock = lp; | |
90 | } | |
91 | ||
92 | void | |
93 | space_map_destroy(space_map_t *sm) | |
94 | { | |
95 | ASSERT(!sm->sm_loaded && !sm->sm_loading); | |
c06d4368 | 96 | VERIFY0(sm->sm_space); |
34dc7c2f | 97 | avl_destroy(&sm->sm_root); |
fb5f0bc8 | 98 | cv_destroy(&sm->sm_load_cv); |
34dc7c2f BB |
99 | } |
100 | ||
101 | void | |
102 | space_map_add(space_map_t *sm, uint64_t start, uint64_t size) | |
103 | { | |
104 | avl_index_t where; | |
105 | space_seg_t ssearch, *ss_before, *ss_after, *ss; | |
106 | uint64_t end = start + size; | |
107 | int merge_before, merge_after; | |
108 | ||
109 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
c06d4368 | 110 | VERIFY(!sm->sm_condensing); |
34dc7c2f BB |
111 | VERIFY(size != 0); |
112 | VERIFY3U(start, >=, sm->sm_start); | |
113 | VERIFY3U(end, <=, sm->sm_start + sm->sm_size); | |
114 | VERIFY(sm->sm_space + size <= sm->sm_size); | |
115 | VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); | |
116 | VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); | |
117 | ||
118 | ssearch.ss_start = start; | |
119 | ssearch.ss_end = end; | |
120 | ss = avl_find(&sm->sm_root, &ssearch, &where); | |
121 | ||
122 | if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) { | |
123 | zfs_panic_recover("zfs: allocating allocated segment" | |
124 | "(offset=%llu size=%llu)\n", | |
125 | (longlong_t)start, (longlong_t)size); | |
126 | return; | |
127 | } | |
128 | ||
129 | /* Make sure we don't overlap with either of our neighbors */ | |
130 | VERIFY(ss == NULL); | |
131 | ||
132 | ss_before = avl_nearest(&sm->sm_root, where, AVL_BEFORE); | |
133 | ss_after = avl_nearest(&sm->sm_root, where, AVL_AFTER); | |
134 | ||
135 | merge_before = (ss_before != NULL && ss_before->ss_end == start); | |
136 | merge_after = (ss_after != NULL && ss_after->ss_start == end); | |
137 | ||
138 | if (merge_before && merge_after) { | |
139 | avl_remove(&sm->sm_root, ss_before); | |
9babb374 BB |
140 | if (sm->sm_pp_root) { |
141 | avl_remove(sm->sm_pp_root, ss_before); | |
142 | avl_remove(sm->sm_pp_root, ss_after); | |
143 | } | |
34dc7c2f | 144 | ss_after->ss_start = ss_before->ss_start; |
c06d4368 | 145 | kmem_cache_free(space_seg_cache, ss_before); |
9babb374 | 146 | ss = ss_after; |
34dc7c2f BB |
147 | } else if (merge_before) { |
148 | ss_before->ss_end = end; | |
9babb374 BB |
149 | if (sm->sm_pp_root) |
150 | avl_remove(sm->sm_pp_root, ss_before); | |
151 | ss = ss_before; | |
34dc7c2f BB |
152 | } else if (merge_after) { |
153 | ss_after->ss_start = start; | |
9babb374 BB |
154 | if (sm->sm_pp_root) |
155 | avl_remove(sm->sm_pp_root, ss_after); | |
156 | ss = ss_after; | |
34dc7c2f | 157 | } else { |
c06d4368 | 158 | ss = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE); |
34dc7c2f BB |
159 | ss->ss_start = start; |
160 | ss->ss_end = end; | |
161 | avl_insert(&sm->sm_root, ss, where); | |
162 | } | |
163 | ||
9babb374 BB |
164 | if (sm->sm_pp_root) |
165 | avl_add(sm->sm_pp_root, ss); | |
166 | ||
34dc7c2f BB |
167 | sm->sm_space += size; |
168 | } | |
169 | ||
170 | void | |
171 | space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) | |
172 | { | |
173 | avl_index_t where; | |
174 | space_seg_t ssearch, *ss, *newseg; | |
175 | uint64_t end = start + size; | |
176 | int left_over, right_over; | |
177 | ||
178 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
c06d4368 | 179 | VERIFY(!sm->sm_condensing); |
34dc7c2f BB |
180 | VERIFY(size != 0); |
181 | VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); | |
182 | VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); | |
183 | ||
184 | ssearch.ss_start = start; | |
185 | ssearch.ss_end = end; | |
186 | ss = avl_find(&sm->sm_root, &ssearch, &where); | |
187 | ||
188 | /* Make sure we completely overlap with someone */ | |
189 | if (ss == NULL) { | |
190 | zfs_panic_recover("zfs: freeing free segment " | |
191 | "(offset=%llu size=%llu)", | |
192 | (longlong_t)start, (longlong_t)size); | |
193 | return; | |
194 | } | |
195 | VERIFY3U(ss->ss_start, <=, start); | |
196 | VERIFY3U(ss->ss_end, >=, end); | |
197 | VERIFY(sm->sm_space - size <= sm->sm_size); | |
198 | ||
199 | left_over = (ss->ss_start != start); | |
200 | right_over = (ss->ss_end != end); | |
201 | ||
9babb374 BB |
202 | if (sm->sm_pp_root) |
203 | avl_remove(sm->sm_pp_root, ss); | |
204 | ||
34dc7c2f | 205 | if (left_over && right_over) { |
c06d4368 | 206 | newseg = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE); |
34dc7c2f BB |
207 | newseg->ss_start = end; |
208 | newseg->ss_end = ss->ss_end; | |
209 | ss->ss_end = start; | |
210 | avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER); | |
9babb374 BB |
211 | if (sm->sm_pp_root) |
212 | avl_add(sm->sm_pp_root, newseg); | |
34dc7c2f BB |
213 | } else if (left_over) { |
214 | ss->ss_end = start; | |
215 | } else if (right_over) { | |
216 | ss->ss_start = end; | |
217 | } else { | |
218 | avl_remove(&sm->sm_root, ss); | |
c06d4368 | 219 | kmem_cache_free(space_seg_cache, ss); |
9babb374 | 220 | ss = NULL; |
34dc7c2f BB |
221 | } |
222 | ||
9babb374 BB |
223 | if (sm->sm_pp_root && ss != NULL) |
224 | avl_add(sm->sm_pp_root, ss); | |
225 | ||
34dc7c2f BB |
226 | sm->sm_space -= size; |
227 | } | |
228 | ||
fb5f0bc8 | 229 | boolean_t |
34dc7c2f BB |
230 | space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) |
231 | { | |
232 | avl_index_t where; | |
233 | space_seg_t ssearch, *ss; | |
234 | uint64_t end = start + size; | |
235 | ||
236 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
237 | VERIFY(size != 0); | |
238 | VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); | |
239 | VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); | |
240 | ||
241 | ssearch.ss_start = start; | |
242 | ssearch.ss_end = end; | |
243 | ss = avl_find(&sm->sm_root, &ssearch, &where); | |
244 | ||
245 | return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end); | |
246 | } | |
247 | ||
c06d4368 AX |
248 | void |
249 | space_map_swap(space_map_t **msrc, space_map_t **mdst) | |
250 | { | |
251 | space_map_t *sm; | |
252 | ||
253 | ASSERT(MUTEX_HELD((*msrc)->sm_lock)); | |
254 | ASSERT0((*mdst)->sm_space); | |
255 | ASSERT0(avl_numnodes(&(*mdst)->sm_root)); | |
256 | ||
257 | sm = *msrc; | |
258 | *msrc = *mdst; | |
259 | *mdst = sm; | |
260 | } | |
261 | ||
34dc7c2f BB |
262 | void |
263 | space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest) | |
264 | { | |
265 | space_seg_t *ss; | |
266 | void *cookie = NULL; | |
267 | ||
268 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
269 | ||
270 | while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { | |
271 | if (func != NULL) | |
272 | func(mdest, ss->ss_start, ss->ss_end - ss->ss_start); | |
c06d4368 | 273 | kmem_cache_free(space_seg_cache, ss); |
34dc7c2f BB |
274 | } |
275 | sm->sm_space = 0; | |
276 | } | |
277 | ||
278 | void | |
279 | space_map_walk(space_map_t *sm, space_map_func_t *func, space_map_t *mdest) | |
280 | { | |
281 | space_seg_t *ss; | |
282 | ||
34dc7c2f BB |
283 | ASSERT(MUTEX_HELD(sm->sm_lock)); |
284 | ||
fb5f0bc8 BB |
285 | for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) |
286 | func(mdest, ss->ss_start, ss->ss_end - ss->ss_start); | |
34dc7c2f BB |
287 | } |
288 | ||
289 | /* | |
290 | * Wait for any in-progress space_map_load() to complete. | |
291 | */ | |
292 | void | |
293 | space_map_load_wait(space_map_t *sm) | |
294 | { | |
295 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
296 | ||
428870ff BB |
297 | while (sm->sm_loading) { |
298 | ASSERT(!sm->sm_loaded); | |
34dc7c2f | 299 | cv_wait(&sm->sm_load_cv, sm->sm_lock); |
428870ff | 300 | } |
34dc7c2f BB |
301 | } |
302 | ||
303 | /* | |
304 | * Note: space_map_load() will drop sm_lock across dmu_read() calls. | |
305 | * The caller must be OK with this. | |
306 | */ | |
307 | int | |
308 | space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype, | |
309 | space_map_obj_t *smo, objset_t *os) | |
310 | { | |
311 | uint64_t *entry, *entry_map, *entry_map_end; | |
312 | uint64_t bufsize, size, offset, end, space; | |
313 | uint64_t mapstart = sm->sm_start; | |
314 | int error = 0; | |
315 | ||
316 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
428870ff BB |
317 | ASSERT(!sm->sm_loaded); |
318 | ASSERT(!sm->sm_loading); | |
34dc7c2f BB |
319 | |
320 | sm->sm_loading = B_TRUE; | |
321 | end = smo->smo_objsize; | |
322 | space = smo->smo_alloc; | |
323 | ||
324 | ASSERT(sm->sm_ops == NULL); | |
c06d4368 | 325 | VERIFY0(sm->sm_space); |
34dc7c2f BB |
326 | |
327 | if (maptype == SM_FREE) { | |
328 | space_map_add(sm, sm->sm_start, sm->sm_size); | |
329 | space = sm->sm_size - space; | |
330 | } | |
331 | ||
332 | bufsize = 1ULL << SPACE_MAP_BLOCKSHIFT; | |
333 | entry_map = zio_buf_alloc(bufsize); | |
334 | ||
335 | mutex_exit(sm->sm_lock); | |
336 | if (end > bufsize) | |
337 | dmu_prefetch(os, smo->smo_object, bufsize, end - bufsize); | |
338 | mutex_enter(sm->sm_lock); | |
339 | ||
340 | for (offset = 0; offset < end; offset += bufsize) { | |
341 | size = MIN(end - offset, bufsize); | |
342 | VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0); | |
343 | VERIFY(size != 0); | |
344 | ||
345 | dprintf("object=%llu offset=%llx size=%llx\n", | |
346 | smo->smo_object, offset, size); | |
347 | ||
348 | mutex_exit(sm->sm_lock); | |
9babb374 BB |
349 | error = dmu_read(os, smo->smo_object, offset, size, entry_map, |
350 | DMU_READ_PREFETCH); | |
34dc7c2f BB |
351 | mutex_enter(sm->sm_lock); |
352 | if (error != 0) | |
353 | break; | |
354 | ||
355 | entry_map_end = entry_map + (size / sizeof (uint64_t)); | |
356 | for (entry = entry_map; entry < entry_map_end; entry++) { | |
357 | uint64_t e = *entry; | |
358 | ||
359 | if (SM_DEBUG_DECODE(e)) /* Skip debug entries */ | |
360 | continue; | |
361 | ||
362 | (SM_TYPE_DECODE(e) == maptype ? | |
363 | space_map_add : space_map_remove)(sm, | |
364 | (SM_OFFSET_DECODE(e) << sm->sm_shift) + mapstart, | |
365 | SM_RUN_DECODE(e) << sm->sm_shift); | |
366 | } | |
367 | } | |
368 | ||
369 | if (error == 0) { | |
370 | VERIFY3U(sm->sm_space, ==, space); | |
371 | ||
372 | sm->sm_loaded = B_TRUE; | |
373 | sm->sm_ops = ops; | |
374 | if (ops != NULL) | |
375 | ops->smop_load(sm); | |
376 | } else { | |
377 | space_map_vacate(sm, NULL, NULL); | |
378 | } | |
379 | ||
380 | zio_buf_free(entry_map, bufsize); | |
381 | ||
382 | sm->sm_loading = B_FALSE; | |
383 | ||
384 | cv_broadcast(&sm->sm_load_cv); | |
385 | ||
386 | return (error); | |
387 | } | |
388 | ||
389 | void | |
390 | space_map_unload(space_map_t *sm) | |
391 | { | |
392 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
393 | ||
394 | if (sm->sm_loaded && sm->sm_ops != NULL) | |
395 | sm->sm_ops->smop_unload(sm); | |
396 | ||
397 | sm->sm_loaded = B_FALSE; | |
398 | sm->sm_ops = NULL; | |
399 | ||
400 | space_map_vacate(sm, NULL, NULL); | |
401 | } | |
402 | ||
9babb374 BB |
403 | uint64_t |
404 | space_map_maxsize(space_map_t *sm) | |
405 | { | |
428870ff BB |
406 | ASSERT(sm->sm_ops != NULL); |
407 | return (sm->sm_ops->smop_max(sm)); | |
9babb374 BB |
408 | } |
409 | ||
34dc7c2f BB |
410 | uint64_t |
411 | space_map_alloc(space_map_t *sm, uint64_t size) | |
412 | { | |
413 | uint64_t start; | |
414 | ||
415 | start = sm->sm_ops->smop_alloc(sm, size); | |
416 | if (start != -1ULL) | |
417 | space_map_remove(sm, start, size); | |
418 | return (start); | |
419 | } | |
420 | ||
421 | void | |
422 | space_map_claim(space_map_t *sm, uint64_t start, uint64_t size) | |
423 | { | |
424 | sm->sm_ops->smop_claim(sm, start, size); | |
425 | space_map_remove(sm, start, size); | |
426 | } | |
427 | ||
428 | void | |
429 | space_map_free(space_map_t *sm, uint64_t start, uint64_t size) | |
430 | { | |
431 | space_map_add(sm, start, size); | |
432 | sm->sm_ops->smop_free(sm, start, size); | |
433 | } | |
434 | ||
435 | /* | |
436 | * Note: space_map_sync() will drop sm_lock across dmu_write() calls. | |
437 | */ | |
438 | void | |
439 | space_map_sync(space_map_t *sm, uint8_t maptype, | |
440 | space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx) | |
441 | { | |
442 | spa_t *spa = dmu_objset_spa(os); | |
c06d4368 | 443 | avl_tree_t *t = &sm->sm_root; |
34dc7c2f | 444 | space_seg_t *ss; |
c06d4368 | 445 | uint64_t bufsize, start, size, run_len, total, sm_space, nodes; |
34dc7c2f BB |
446 | uint64_t *entry, *entry_map, *entry_map_end; |
447 | ||
448 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
449 | ||
450 | if (sm->sm_space == 0) | |
451 | return; | |
452 | ||
453 | dprintf("object %4llu, txg %llu, pass %d, %c, count %lu, space %llx\n", | |
454 | smo->smo_object, dmu_tx_get_txg(tx), spa_sync_pass(spa), | |
455 | maptype == SM_ALLOC ? 'A' : 'F', avl_numnodes(&sm->sm_root), | |
456 | sm->sm_space); | |
457 | ||
458 | if (maptype == SM_ALLOC) | |
459 | smo->smo_alloc += sm->sm_space; | |
460 | else | |
461 | smo->smo_alloc -= sm->sm_space; | |
462 | ||
463 | bufsize = (8 + avl_numnodes(&sm->sm_root)) * sizeof (uint64_t); | |
464 | bufsize = MIN(bufsize, 1ULL << SPACE_MAP_BLOCKSHIFT); | |
465 | entry_map = zio_buf_alloc(bufsize); | |
466 | entry_map_end = entry_map + (bufsize / sizeof (uint64_t)); | |
467 | entry = entry_map; | |
468 | ||
469 | *entry++ = SM_DEBUG_ENCODE(1) | | |
470 | SM_DEBUG_ACTION_ENCODE(maptype) | | |
471 | SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) | | |
472 | SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); | |
473 | ||
c06d4368 AX |
474 | total = 0; |
475 | nodes = avl_numnodes(&sm->sm_root); | |
476 | sm_space = sm->sm_space; | |
477 | for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) { | |
34dc7c2f BB |
478 | size = ss->ss_end - ss->ss_start; |
479 | start = (ss->ss_start - sm->sm_start) >> sm->sm_shift; | |
480 | ||
c06d4368 | 481 | total += size; |
34dc7c2f BB |
482 | size >>= sm->sm_shift; |
483 | ||
484 | while (size) { | |
485 | run_len = MIN(size, SM_RUN_MAX); | |
486 | ||
487 | if (entry == entry_map_end) { | |
488 | mutex_exit(sm->sm_lock); | |
489 | dmu_write(os, smo->smo_object, smo->smo_objsize, | |
490 | bufsize, entry_map, tx); | |
491 | mutex_enter(sm->sm_lock); | |
492 | smo->smo_objsize += bufsize; | |
493 | entry = entry_map; | |
494 | } | |
495 | ||
496 | *entry++ = SM_OFFSET_ENCODE(start) | | |
497 | SM_TYPE_ENCODE(maptype) | | |
498 | SM_RUN_ENCODE(run_len); | |
499 | ||
500 | start += run_len; | |
501 | size -= run_len; | |
502 | } | |
34dc7c2f BB |
503 | } |
504 | ||
505 | if (entry != entry_map) { | |
506 | size = (entry - entry_map) * sizeof (uint64_t); | |
507 | mutex_exit(sm->sm_lock); | |
508 | dmu_write(os, smo->smo_object, smo->smo_objsize, | |
509 | size, entry_map, tx); | |
510 | mutex_enter(sm->sm_lock); | |
511 | smo->smo_objsize += size; | |
512 | } | |
513 | ||
c06d4368 AX |
514 | /* |
515 | * Ensure that the space_map's accounting wasn't changed | |
516 | * while we were in the middle of writing it out. | |
517 | */ | |
518 | VERIFY3U(nodes, ==, avl_numnodes(&sm->sm_root)); | |
519 | VERIFY3U(sm->sm_space, ==, sm_space); | |
520 | VERIFY3U(sm->sm_space, ==, total); | |
34dc7c2f | 521 | |
c06d4368 | 522 | zio_buf_free(entry_map, bufsize); |
34dc7c2f BB |
523 | } |
524 | ||
525 | void | |
526 | space_map_truncate(space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx) | |
527 | { | |
528 | VERIFY(dmu_free_range(os, smo->smo_object, 0, -1ULL, tx) == 0); | |
529 | ||
530 | smo->smo_objsize = 0; | |
531 | smo->smo_alloc = 0; | |
532 | } | |
fb5f0bc8 BB |
533 | |
534 | /* | |
535 | * Space map reference trees. | |
536 | * | |
537 | * A space map is a collection of integers. Every integer is either | |
538 | * in the map, or it's not. A space map reference tree generalizes | |
539 | * the idea: it allows its members to have arbitrary reference counts, | |
540 | * as opposed to the implicit reference count of 0 or 1 in a space map. | |
541 | * This representation comes in handy when computing the union or | |
542 | * intersection of multiple space maps. For example, the union of | |
543 | * N space maps is the subset of the reference tree with refcnt >= 1. | |
544 | * The intersection of N space maps is the subset with refcnt >= N. | |
545 | * | |
546 | * [It's very much like a Fourier transform. Unions and intersections | |
547 | * are hard to perform in the 'space map domain', so we convert the maps | |
548 | * into the 'reference count domain', where it's trivial, then invert.] | |
549 | * | |
550 | * vdev_dtl_reassess() uses computations of this form to determine | |
551 | * DTL_MISSING and DTL_OUTAGE for interior vdevs -- e.g. a RAID-Z vdev | |
552 | * has an outage wherever refcnt >= vdev_nparity + 1, and a mirror vdev | |
553 | * has an outage wherever refcnt >= vdev_children. | |
554 | */ | |
555 | static int | |
556 | space_map_ref_compare(const void *x1, const void *x2) | |
557 | { | |
558 | const space_ref_t *sr1 = x1; | |
559 | const space_ref_t *sr2 = x2; | |
560 | ||
561 | if (sr1->sr_offset < sr2->sr_offset) | |
562 | return (-1); | |
563 | if (sr1->sr_offset > sr2->sr_offset) | |
564 | return (1); | |
565 | ||
566 | if (sr1 < sr2) | |
567 | return (-1); | |
568 | if (sr1 > sr2) | |
569 | return (1); | |
570 | ||
571 | return (0); | |
572 | } | |
573 | ||
574 | void | |
575 | space_map_ref_create(avl_tree_t *t) | |
576 | { | |
577 | avl_create(t, space_map_ref_compare, | |
578 | sizeof (space_ref_t), offsetof(space_ref_t, sr_node)); | |
579 | } | |
580 | ||
581 | void | |
582 | space_map_ref_destroy(avl_tree_t *t) | |
583 | { | |
584 | space_ref_t *sr; | |
585 | void *cookie = NULL; | |
586 | ||
587 | while ((sr = avl_destroy_nodes(t, &cookie)) != NULL) | |
588 | kmem_free(sr, sizeof (*sr)); | |
589 | ||
590 | avl_destroy(t); | |
591 | } | |
592 | ||
593 | static void | |
594 | space_map_ref_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt) | |
595 | { | |
596 | space_ref_t *sr; | |
597 | ||
b8d06fca | 598 | sr = kmem_alloc(sizeof (*sr), KM_PUSHPAGE); |
fb5f0bc8 BB |
599 | sr->sr_offset = offset; |
600 | sr->sr_refcnt = refcnt; | |
601 | ||
602 | avl_add(t, sr); | |
603 | } | |
604 | ||
605 | void | |
606 | space_map_ref_add_seg(avl_tree_t *t, uint64_t start, uint64_t end, | |
607 | int64_t refcnt) | |
608 | { | |
609 | space_map_ref_add_node(t, start, refcnt); | |
610 | space_map_ref_add_node(t, end, -refcnt); | |
611 | } | |
612 | ||
613 | /* | |
614 | * Convert (or add) a space map into a reference tree. | |
615 | */ | |
616 | void | |
617 | space_map_ref_add_map(avl_tree_t *t, space_map_t *sm, int64_t refcnt) | |
618 | { | |
619 | space_seg_t *ss; | |
620 | ||
621 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
622 | ||
623 | for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) | |
624 | space_map_ref_add_seg(t, ss->ss_start, ss->ss_end, refcnt); | |
625 | } | |
626 | ||
627 | /* | |
628 | * Convert a reference tree into a space map. The space map will contain | |
629 | * all members of the reference tree for which refcnt >= minref. | |
630 | */ | |
631 | void | |
632 | space_map_ref_generate_map(avl_tree_t *t, space_map_t *sm, int64_t minref) | |
633 | { | |
634 | uint64_t start = -1ULL; | |
635 | int64_t refcnt = 0; | |
636 | space_ref_t *sr; | |
637 | ||
638 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
639 | ||
640 | space_map_vacate(sm, NULL, NULL); | |
641 | ||
642 | for (sr = avl_first(t); sr != NULL; sr = AVL_NEXT(t, sr)) { | |
643 | refcnt += sr->sr_refcnt; | |
644 | if (refcnt >= minref) { | |
645 | if (start == -1ULL) { | |
646 | start = sr->sr_offset; | |
647 | } | |
648 | } else { | |
649 | if (start != -1ULL) { | |
650 | uint64_t end = sr->sr_offset; | |
651 | ASSERT(start <= end); | |
652 | if (end > start) | |
653 | space_map_add(sm, start, end - start); | |
654 | start = -1ULL; | |
655 | } | |
656 | } | |
657 | } | |
658 | ASSERT(refcnt == 0); | |
659 | ASSERT(start == -1ULL); | |
660 | } |