4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
29 #include <sys/zfs_context.h>
32 #include <sys/dnode.h>
34 #include <sys/range_tree.h>
36 kmem_cache_t
*range_seg_cache
;
41 ASSERT(range_seg_cache
== NULL
);
42 range_seg_cache
= kmem_cache_create("range_seg_cache",
43 sizeof (range_seg_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
49 kmem_cache_destroy(range_seg_cache
);
50 range_seg_cache
= NULL
;
54 range_tree_stat_verify(range_tree_t
*rt
)
57 uint64_t hist
[RANGE_TREE_HISTOGRAM_SIZE
] = { 0 };
60 for (rs
= avl_first(&rt
->rt_root
); rs
!= NULL
;
61 rs
= AVL_NEXT(&rt
->rt_root
, rs
)) {
62 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
63 int idx
= highbit64(size
) - 1;
66 ASSERT3U(hist
[idx
], !=, 0);
69 for (i
= 0; i
< RANGE_TREE_HISTOGRAM_SIZE
; i
++) {
70 if (hist
[i
] != rt
->rt_histogram
[i
]) {
71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 i
, hist
, hist
[i
], rt
->rt_histogram
[i
]);
74 VERIFY3U(hist
[i
], ==, rt
->rt_histogram
[i
]);
79 range_tree_stat_incr(range_tree_t
*rt
, range_seg_t
*rs
)
81 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
82 int idx
= highbit64(size
) - 1;
86 sizeof (rt
->rt_histogram
) / sizeof (*rt
->rt_histogram
));
88 ASSERT(MUTEX_HELD(rt
->rt_lock
));
89 rt
->rt_histogram
[idx
]++;
90 ASSERT3U(rt
->rt_histogram
[idx
], !=, 0);
94 range_tree_stat_decr(range_tree_t
*rt
, range_seg_t
*rs
)
96 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
97 int idx
= highbit64(size
) - 1;
101 sizeof (rt
->rt_histogram
) / sizeof (*rt
->rt_histogram
));
103 ASSERT(MUTEX_HELD(rt
->rt_lock
));
104 ASSERT3U(rt
->rt_histogram
[idx
], !=, 0);
105 rt
->rt_histogram
[idx
]--;
109 * NOTE: caller is responsible for all locking.
112 range_tree_seg_compare(const void *x1
, const void *x2
)
114 const range_seg_t
*r1
= (const range_seg_t
*)x1
;
115 const range_seg_t
*r2
= (const range_seg_t
*)x2
;
117 ASSERT3U(r1
->rs_start
, <=, r1
->rs_end
);
118 ASSERT3U(r2
->rs_start
, <=, r2
->rs_end
);
120 return ((r1
->rs_start
>= r2
->rs_end
) - (r1
->rs_end
<= r2
->rs_start
));
124 range_tree_create(range_tree_ops_t
*ops
, void *arg
, kmutex_t
*lp
)
128 rt
= kmem_zalloc(sizeof (range_tree_t
), KM_SLEEP
);
130 avl_create(&rt
->rt_root
, range_tree_seg_compare
,
131 sizeof (range_seg_t
), offsetof(range_seg_t
, rs_node
));
137 if (rt
->rt_ops
!= NULL
)
138 rt
->rt_ops
->rtop_create(rt
, rt
->rt_arg
);
144 range_tree_destroy(range_tree_t
*rt
)
146 VERIFY0(rt
->rt_space
);
148 if (rt
->rt_ops
!= NULL
)
149 rt
->rt_ops
->rtop_destroy(rt
, rt
->rt_arg
);
151 avl_destroy(&rt
->rt_root
);
152 kmem_free(rt
, sizeof (*rt
));
156 range_tree_add(void *arg
, uint64_t start
, uint64_t size
)
158 range_tree_t
*rt
= arg
;
160 range_seg_t rsearch
, *rs_before
, *rs_after
, *rs
;
161 uint64_t end
= start
+ size
;
162 boolean_t merge_before
, merge_after
;
164 ASSERT(MUTEX_HELD(rt
->rt_lock
));
167 rsearch
.rs_start
= start
;
168 rsearch
.rs_end
= end
;
169 rs
= avl_find(&rt
->rt_root
, &rsearch
, &where
);
171 if (rs
!= NULL
&& rs
->rs_start
<= start
&& rs
->rs_end
>= end
) {
172 zfs_panic_recover("zfs: allocating allocated segment"
173 "(offset=%llu size=%llu)\n",
174 (longlong_t
)start
, (longlong_t
)size
);
178 /* Make sure we don't overlap with either of our neighbors */
181 rs_before
= avl_nearest(&rt
->rt_root
, where
, AVL_BEFORE
);
182 rs_after
= avl_nearest(&rt
->rt_root
, where
, AVL_AFTER
);
184 merge_before
= (rs_before
!= NULL
&& rs_before
->rs_end
== start
);
185 merge_after
= (rs_after
!= NULL
&& rs_after
->rs_start
== end
);
187 if (merge_before
&& merge_after
) {
188 avl_remove(&rt
->rt_root
, rs_before
);
189 if (rt
->rt_ops
!= NULL
) {
190 rt
->rt_ops
->rtop_remove(rt
, rs_before
, rt
->rt_arg
);
191 rt
->rt_ops
->rtop_remove(rt
, rs_after
, rt
->rt_arg
);
194 range_tree_stat_decr(rt
, rs_before
);
195 range_tree_stat_decr(rt
, rs_after
);
197 rs_after
->rs_start
= rs_before
->rs_start
;
198 kmem_cache_free(range_seg_cache
, rs_before
);
200 } else if (merge_before
) {
201 if (rt
->rt_ops
!= NULL
)
202 rt
->rt_ops
->rtop_remove(rt
, rs_before
, rt
->rt_arg
);
204 range_tree_stat_decr(rt
, rs_before
);
206 rs_before
->rs_end
= end
;
208 } else if (merge_after
) {
209 if (rt
->rt_ops
!= NULL
)
210 rt
->rt_ops
->rtop_remove(rt
, rs_after
, rt
->rt_arg
);
212 range_tree_stat_decr(rt
, rs_after
);
214 rs_after
->rs_start
= start
;
217 rs
= kmem_cache_alloc(range_seg_cache
, KM_SLEEP
);
218 rs
->rs_start
= start
;
220 avl_insert(&rt
->rt_root
, rs
, where
);
223 if (rt
->rt_ops
!= NULL
)
224 rt
->rt_ops
->rtop_add(rt
, rs
, rt
->rt_arg
);
226 range_tree_stat_incr(rt
, rs
);
227 rt
->rt_space
+= size
;
231 range_tree_remove(void *arg
, uint64_t start
, uint64_t size
)
233 range_tree_t
*rt
= arg
;
235 range_seg_t rsearch
, *rs
, *newseg
;
236 uint64_t end
= start
+ size
;
237 boolean_t left_over
, right_over
;
239 ASSERT(MUTEX_HELD(rt
->rt_lock
));
240 VERIFY3U(size
, !=, 0);
241 VERIFY3U(size
, <=, rt
->rt_space
);
243 rsearch
.rs_start
= start
;
244 rsearch
.rs_end
= end
;
245 rs
= avl_find(&rt
->rt_root
, &rsearch
, &where
);
247 /* Make sure we completely overlap with someone */
249 zfs_panic_recover("zfs: freeing free segment "
250 "(offset=%llu size=%llu)",
251 (longlong_t
)start
, (longlong_t
)size
);
254 VERIFY3U(rs
->rs_start
, <=, start
);
255 VERIFY3U(rs
->rs_end
, >=, end
);
257 left_over
= (rs
->rs_start
!= start
);
258 right_over
= (rs
->rs_end
!= end
);
260 range_tree_stat_decr(rt
, rs
);
262 if (rt
->rt_ops
!= NULL
)
263 rt
->rt_ops
->rtop_remove(rt
, rs
, rt
->rt_arg
);
265 if (left_over
&& right_over
) {
266 newseg
= kmem_cache_alloc(range_seg_cache
, KM_SLEEP
);
267 newseg
->rs_start
= end
;
268 newseg
->rs_end
= rs
->rs_end
;
269 range_tree_stat_incr(rt
, newseg
);
273 avl_insert_here(&rt
->rt_root
, newseg
, rs
, AVL_AFTER
);
274 if (rt
->rt_ops
!= NULL
)
275 rt
->rt_ops
->rtop_add(rt
, newseg
, rt
->rt_arg
);
276 } else if (left_over
) {
278 } else if (right_over
) {
281 avl_remove(&rt
->rt_root
, rs
);
282 kmem_cache_free(range_seg_cache
, rs
);
287 range_tree_stat_incr(rt
, rs
);
289 if (rt
->rt_ops
!= NULL
)
290 rt
->rt_ops
->rtop_add(rt
, rs
, rt
->rt_arg
);
293 rt
->rt_space
-= size
;
297 range_tree_find_impl(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
301 uint64_t end
= start
+ size
;
303 ASSERT(MUTEX_HELD(rt
->rt_lock
));
306 rsearch
.rs_start
= start
;
307 rsearch
.rs_end
= end
;
308 return (avl_find(&rt
->rt_root
, &rsearch
, &where
));
312 range_tree_find(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
314 range_seg_t
*rs
= range_tree_find_impl(rt
, start
, size
);
315 if (rs
!= NULL
&& rs
->rs_start
<= start
&& rs
->rs_end
>= start
+ size
)
321 range_tree_verify(range_tree_t
*rt
, uint64_t off
, uint64_t size
)
325 mutex_enter(rt
->rt_lock
);
326 rs
= range_tree_find(rt
, off
, size
);
328 panic("freeing free block; rs=%p", (void *)rs
);
329 mutex_exit(rt
->rt_lock
);
333 range_tree_contains(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
335 return (range_tree_find(rt
, start
, size
) != NULL
);
339 * Ensure that this range is not in the tree, regardless of whether
340 * it is currently in the tree.
343 range_tree_clear(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
347 while ((rs
= range_tree_find_impl(rt
, start
, size
)) != NULL
) {
348 uint64_t free_start
= MAX(rs
->rs_start
, start
);
349 uint64_t free_end
= MIN(rs
->rs_end
, start
+ size
);
350 range_tree_remove(rt
, free_start
, free_end
- free_start
);
355 range_tree_swap(range_tree_t
**rtsrc
, range_tree_t
**rtdst
)
359 ASSERT(MUTEX_HELD((*rtsrc
)->rt_lock
));
360 ASSERT0(range_tree_space(*rtdst
));
361 ASSERT0(avl_numnodes(&(*rtdst
)->rt_root
));
369 range_tree_vacate(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
)
374 ASSERT(MUTEX_HELD(rt
->rt_lock
));
376 if (rt
->rt_ops
!= NULL
)
377 rt
->rt_ops
->rtop_vacate(rt
, rt
->rt_arg
);
379 while ((rs
= avl_destroy_nodes(&rt
->rt_root
, &cookie
)) != NULL
) {
381 func(arg
, rs
->rs_start
, rs
->rs_end
- rs
->rs_start
);
382 kmem_cache_free(range_seg_cache
, rs
);
385 bzero(rt
->rt_histogram
, sizeof (rt
->rt_histogram
));
390 range_tree_walk(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
)
394 ASSERT(MUTEX_HELD(rt
->rt_lock
));
396 for (rs
= avl_first(&rt
->rt_root
); rs
; rs
= AVL_NEXT(&rt
->rt_root
, rs
))
397 func(arg
, rs
->rs_start
, rs
->rs_end
- rs
->rs_start
);
401 range_tree_space(range_tree_t
*rt
)
403 return (rt
->rt_space
);