]> git.proxmox.com Git - mirror_zfs-debian.git/blame - module/zfs/range_tree.c
New upstream version 0.7.11
[mirror_zfs-debian.git] / module / zfs / range_tree.c
CommitLineData
ea04106b
AX
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25/*
26 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
27 */
28
29#include <sys/zfs_context.h>
30#include <sys/spa.h>
31#include <sys/dmu.h>
32#include <sys/dnode.h>
33#include <sys/zio.h>
34#include <sys/range_tree.h>
35
e10b0808 36kmem_cache_t *range_seg_cache;
ea04106b
AX
37
38void
39range_tree_init(void)
40{
41 ASSERT(range_seg_cache == NULL);
42 range_seg_cache = kmem_cache_create("range_seg_cache",
43 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
44}
45
46void
47range_tree_fini(void)
48{
49 kmem_cache_destroy(range_seg_cache);
50 range_seg_cache = NULL;
51}
52
53void
54range_tree_stat_verify(range_tree_t *rt)
55{
56 range_seg_t *rs;
57 uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
58 int i;
59
60 for (rs = avl_first(&rt->rt_root); rs != NULL;
61 rs = AVL_NEXT(&rt->rt_root, rs)) {
62 uint64_t size = rs->rs_end - rs->rs_start;
63 int idx = highbit64(size) - 1;
64
65 hist[idx]++;
66 ASSERT3U(hist[idx], !=, 0);
67 }
68
69 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
70 if (hist[i] != rt->rt_histogram[i]) {
71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 i, hist, hist[i], rt->rt_histogram[i]);
73 }
74 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
75 }
76}
77
78static void
79range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
80{
81 uint64_t size = rs->rs_end - rs->rs_start;
82 int idx = highbit64(size) - 1;
83
84 ASSERT(size != 0);
85 ASSERT3U(idx, <,
86 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
87
88 ASSERT(MUTEX_HELD(rt->rt_lock));
89 rt->rt_histogram[idx]++;
90 ASSERT3U(rt->rt_histogram[idx], !=, 0);
91}
92
93static void
94range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
95{
96 uint64_t size = rs->rs_end - rs->rs_start;
97 int idx = highbit64(size) - 1;
98
99 ASSERT(size != 0);
100 ASSERT3U(idx, <,
101 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
102
103 ASSERT(MUTEX_HELD(rt->rt_lock));
104 ASSERT3U(rt->rt_histogram[idx], !=, 0);
105 rt->rt_histogram[idx]--;
106}
107
108/*
109 * NOTE: caller is responsible for all locking.
110 */
111static int
112range_tree_seg_compare(const void *x1, const void *x2)
113{
cae5b340
AX
114 const range_seg_t *r1 = (const range_seg_t *)x1;
115 const range_seg_t *r2 = (const range_seg_t *)x2;
ea04106b 116
cae5b340
AX
117 ASSERT3U(r1->rs_start, <=, r1->rs_end);
118 ASSERT3U(r2->rs_start, <=, r2->rs_end);
119
120 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
ea04106b
AX
121}
122
123range_tree_t *
124range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp)
125{
126 range_tree_t *rt;
127
128 rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
129
130 avl_create(&rt->rt_root, range_tree_seg_compare,
131 sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
132
133 rt->rt_lock = lp;
134 rt->rt_ops = ops;
135 rt->rt_arg = arg;
136
137 if (rt->rt_ops != NULL)
138 rt->rt_ops->rtop_create(rt, rt->rt_arg);
139
140 return (rt);
141}
142
143void
144range_tree_destroy(range_tree_t *rt)
145{
146 VERIFY0(rt->rt_space);
147
148 if (rt->rt_ops != NULL)
149 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
150
151 avl_destroy(&rt->rt_root);
152 kmem_free(rt, sizeof (*rt));
153}
154
155void
156range_tree_add(void *arg, uint64_t start, uint64_t size)
157{
158 range_tree_t *rt = arg;
159 avl_index_t where;
160 range_seg_t rsearch, *rs_before, *rs_after, *rs;
161 uint64_t end = start + size;
162 boolean_t merge_before, merge_after;
163
164 ASSERT(MUTEX_HELD(rt->rt_lock));
165 VERIFY(size != 0);
166
167 rsearch.rs_start = start;
168 rsearch.rs_end = end;
169 rs = avl_find(&rt->rt_root, &rsearch, &where);
170
171 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
172 zfs_panic_recover("zfs: allocating allocated segment"
173 "(offset=%llu size=%llu)\n",
174 (longlong_t)start, (longlong_t)size);
175 return;
176 }
177
178 /* Make sure we don't overlap with either of our neighbors */
179 VERIFY(rs == NULL);
180
181 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
182 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
183
184 merge_before = (rs_before != NULL && rs_before->rs_end == start);
185 merge_after = (rs_after != NULL && rs_after->rs_start == end);
186
187 if (merge_before && merge_after) {
188 avl_remove(&rt->rt_root, rs_before);
189 if (rt->rt_ops != NULL) {
190 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
191 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
192 }
193
194 range_tree_stat_decr(rt, rs_before);
195 range_tree_stat_decr(rt, rs_after);
196
197 rs_after->rs_start = rs_before->rs_start;
198 kmem_cache_free(range_seg_cache, rs_before);
199 rs = rs_after;
200 } else if (merge_before) {
201 if (rt->rt_ops != NULL)
202 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
203
204 range_tree_stat_decr(rt, rs_before);
205
206 rs_before->rs_end = end;
207 rs = rs_before;
208 } else if (merge_after) {
209 if (rt->rt_ops != NULL)
210 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
211
212 range_tree_stat_decr(rt, rs_after);
213
214 rs_after->rs_start = start;
215 rs = rs_after;
216 } else {
217 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
218 rs->rs_start = start;
219 rs->rs_end = end;
220 avl_insert(&rt->rt_root, rs, where);
221 }
222
223 if (rt->rt_ops != NULL)
224 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
225
226 range_tree_stat_incr(rt, rs);
227 rt->rt_space += size;
228}
229
230void
231range_tree_remove(void *arg, uint64_t start, uint64_t size)
232{
233 range_tree_t *rt = arg;
234 avl_index_t where;
235 range_seg_t rsearch, *rs, *newseg;
236 uint64_t end = start + size;
237 boolean_t left_over, right_over;
238
239 ASSERT(MUTEX_HELD(rt->rt_lock));
240 VERIFY3U(size, !=, 0);
241 VERIFY3U(size, <=, rt->rt_space);
242
243 rsearch.rs_start = start;
244 rsearch.rs_end = end;
245 rs = avl_find(&rt->rt_root, &rsearch, &where);
246
247 /* Make sure we completely overlap with someone */
248 if (rs == NULL) {
249 zfs_panic_recover("zfs: freeing free segment "
250 "(offset=%llu size=%llu)",
251 (longlong_t)start, (longlong_t)size);
252 return;
253 }
254 VERIFY3U(rs->rs_start, <=, start);
255 VERIFY3U(rs->rs_end, >=, end);
256
257 left_over = (rs->rs_start != start);
258 right_over = (rs->rs_end != end);
259
260 range_tree_stat_decr(rt, rs);
261
262 if (rt->rt_ops != NULL)
263 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
264
265 if (left_over && right_over) {
266 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
267 newseg->rs_start = end;
268 newseg->rs_end = rs->rs_end;
269 range_tree_stat_incr(rt, newseg);
270
271 rs->rs_end = start;
272
273 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
274 if (rt->rt_ops != NULL)
275 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
276 } else if (left_over) {
277 rs->rs_end = start;
278 } else if (right_over) {
279 rs->rs_start = end;
280 } else {
281 avl_remove(&rt->rt_root, rs);
282 kmem_cache_free(range_seg_cache, rs);
283 rs = NULL;
284 }
285
286 if (rs != NULL) {
287 range_tree_stat_incr(rt, rs);
288
289 if (rt->rt_ops != NULL)
290 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
291 }
292
293 rt->rt_space -= size;
294}
295
296static range_seg_t *
297range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
298{
299 avl_index_t where;
300 range_seg_t rsearch;
301 uint64_t end = start + size;
302
303 ASSERT(MUTEX_HELD(rt->rt_lock));
304 VERIFY(size != 0);
305
306 rsearch.rs_start = start;
307 rsearch.rs_end = end;
308 return (avl_find(&rt->rt_root, &rsearch, &where));
309}
310
311static range_seg_t *
312range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
313{
314 range_seg_t *rs = range_tree_find_impl(rt, start, size);
315 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
316 return (rs);
317 return (NULL);
318}
319
320void
321range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
322{
323 range_seg_t *rs;
324
325 mutex_enter(rt->rt_lock);
326 rs = range_tree_find(rt, off, size);
327 if (rs != NULL)
328 panic("freeing free block; rs=%p", (void *)rs);
329 mutex_exit(rt->rt_lock);
330}
331
332boolean_t
333range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
334{
335 return (range_tree_find(rt, start, size) != NULL);
336}
337
338/*
339 * Ensure that this range is not in the tree, regardless of whether
340 * it is currently in the tree.
341 */
342void
343range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
344{
345 range_seg_t *rs;
346
347 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
348 uint64_t free_start = MAX(rs->rs_start, start);
349 uint64_t free_end = MIN(rs->rs_end, start + size);
350 range_tree_remove(rt, free_start, free_end - free_start);
351 }
352}
353
354void
355range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
356{
357 range_tree_t *rt;
358
359 ASSERT(MUTEX_HELD((*rtsrc)->rt_lock));
360 ASSERT0(range_tree_space(*rtdst));
361 ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
362
363 rt = *rtsrc;
364 *rtsrc = *rtdst;
365 *rtdst = rt;
366}
367
368void
369range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
370{
371 range_seg_t *rs;
372 void *cookie = NULL;
373
374 ASSERT(MUTEX_HELD(rt->rt_lock));
375
376 if (rt->rt_ops != NULL)
377 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
378
379 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
380 if (func != NULL)
381 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
382 kmem_cache_free(range_seg_cache, rs);
383 }
384
385 bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
386 rt->rt_space = 0;
387}
388
389void
390range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
391{
392 range_seg_t *rs;
393
394 ASSERT(MUTEX_HELD(rt->rt_lock));
395
396 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
397 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
398}
399
400uint64_t
401range_tree_space(range_tree_t *rt)
402{
403 return (rt->rt_space);
404}