]>
Commit | Line | Data |
---|---|---|
ea04106b AX |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. | |
23 | * Use is subject to license terms. | |
24 | */ | |
25 | /* | |
26 | * Copyright (c) 2013, 2014 by Delphix. All rights reserved. | |
27 | */ | |
28 | ||
29 | #include <sys/zfs_context.h> | |
30 | #include <sys/spa.h> | |
31 | #include <sys/dmu.h> | |
32 | #include <sys/dnode.h> | |
33 | #include <sys/zio.h> | |
34 | #include <sys/range_tree.h> | |
35 | ||
e10b0808 | 36 | kmem_cache_t *range_seg_cache; |
ea04106b AX |
37 | |
38 | void | |
39 | range_tree_init(void) | |
40 | { | |
41 | ASSERT(range_seg_cache == NULL); | |
42 | range_seg_cache = kmem_cache_create("range_seg_cache", | |
43 | sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); | |
44 | } | |
45 | ||
46 | void | |
47 | range_tree_fini(void) | |
48 | { | |
49 | kmem_cache_destroy(range_seg_cache); | |
50 | range_seg_cache = NULL; | |
51 | } | |
52 | ||
53 | void | |
54 | range_tree_stat_verify(range_tree_t *rt) | |
55 | { | |
56 | range_seg_t *rs; | |
57 | uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; | |
58 | int i; | |
59 | ||
60 | for (rs = avl_first(&rt->rt_root); rs != NULL; | |
61 | rs = AVL_NEXT(&rt->rt_root, rs)) { | |
62 | uint64_t size = rs->rs_end - rs->rs_start; | |
63 | int idx = highbit64(size) - 1; | |
64 | ||
65 | hist[idx]++; | |
66 | ASSERT3U(hist[idx], !=, 0); | |
67 | } | |
68 | ||
69 | for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { | |
70 | if (hist[i] != rt->rt_histogram[i]) { | |
71 | zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu", | |
72 | i, hist, hist[i], rt->rt_histogram[i]); | |
73 | } | |
74 | VERIFY3U(hist[i], ==, rt->rt_histogram[i]); | |
75 | } | |
76 | } | |
77 | ||
78 | static void | |
79 | range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) | |
80 | { | |
81 | uint64_t size = rs->rs_end - rs->rs_start; | |
82 | int idx = highbit64(size) - 1; | |
83 | ||
84 | ASSERT(size != 0); | |
85 | ASSERT3U(idx, <, | |
86 | sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); | |
87 | ||
88 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
89 | rt->rt_histogram[idx]++; | |
90 | ASSERT3U(rt->rt_histogram[idx], !=, 0); | |
91 | } | |
92 | ||
93 | static void | |
94 | range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) | |
95 | { | |
96 | uint64_t size = rs->rs_end - rs->rs_start; | |
97 | int idx = highbit64(size) - 1; | |
98 | ||
99 | ASSERT(size != 0); | |
100 | ASSERT3U(idx, <, | |
101 | sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); | |
102 | ||
103 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
104 | ASSERT3U(rt->rt_histogram[idx], !=, 0); | |
105 | rt->rt_histogram[idx]--; | |
106 | } | |
107 | ||
108 | /* | |
109 | * NOTE: caller is responsible for all locking. | |
110 | */ | |
111 | static int | |
112 | range_tree_seg_compare(const void *x1, const void *x2) | |
113 | { | |
cae5b340 AX |
114 | const range_seg_t *r1 = (const range_seg_t *)x1; |
115 | const range_seg_t *r2 = (const range_seg_t *)x2; | |
ea04106b | 116 | |
cae5b340 AX |
117 | ASSERT3U(r1->rs_start, <=, r1->rs_end); |
118 | ASSERT3U(r2->rs_start, <=, r2->rs_end); | |
119 | ||
120 | return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); | |
ea04106b AX |
121 | } |
122 | ||
123 | range_tree_t * | |
124 | range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp) | |
125 | { | |
126 | range_tree_t *rt; | |
127 | ||
128 | rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); | |
129 | ||
130 | avl_create(&rt->rt_root, range_tree_seg_compare, | |
131 | sizeof (range_seg_t), offsetof(range_seg_t, rs_node)); | |
132 | ||
133 | rt->rt_lock = lp; | |
134 | rt->rt_ops = ops; | |
135 | rt->rt_arg = arg; | |
136 | ||
137 | if (rt->rt_ops != NULL) | |
138 | rt->rt_ops->rtop_create(rt, rt->rt_arg); | |
139 | ||
140 | return (rt); | |
141 | } | |
142 | ||
143 | void | |
144 | range_tree_destroy(range_tree_t *rt) | |
145 | { | |
146 | VERIFY0(rt->rt_space); | |
147 | ||
148 | if (rt->rt_ops != NULL) | |
149 | rt->rt_ops->rtop_destroy(rt, rt->rt_arg); | |
150 | ||
151 | avl_destroy(&rt->rt_root); | |
152 | kmem_free(rt, sizeof (*rt)); | |
153 | } | |
154 | ||
155 | void | |
156 | range_tree_add(void *arg, uint64_t start, uint64_t size) | |
157 | { | |
158 | range_tree_t *rt = arg; | |
159 | avl_index_t where; | |
160 | range_seg_t rsearch, *rs_before, *rs_after, *rs; | |
161 | uint64_t end = start + size; | |
162 | boolean_t merge_before, merge_after; | |
163 | ||
164 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
165 | VERIFY(size != 0); | |
166 | ||
167 | rsearch.rs_start = start; | |
168 | rsearch.rs_end = end; | |
169 | rs = avl_find(&rt->rt_root, &rsearch, &where); | |
170 | ||
171 | if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) { | |
172 | zfs_panic_recover("zfs: allocating allocated segment" | |
173 | "(offset=%llu size=%llu)\n", | |
174 | (longlong_t)start, (longlong_t)size); | |
175 | return; | |
176 | } | |
177 | ||
178 | /* Make sure we don't overlap with either of our neighbors */ | |
179 | VERIFY(rs == NULL); | |
180 | ||
181 | rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE); | |
182 | rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER); | |
183 | ||
184 | merge_before = (rs_before != NULL && rs_before->rs_end == start); | |
185 | merge_after = (rs_after != NULL && rs_after->rs_start == end); | |
186 | ||
187 | if (merge_before && merge_after) { | |
188 | avl_remove(&rt->rt_root, rs_before); | |
189 | if (rt->rt_ops != NULL) { | |
190 | rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); | |
191 | rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); | |
192 | } | |
193 | ||
194 | range_tree_stat_decr(rt, rs_before); | |
195 | range_tree_stat_decr(rt, rs_after); | |
196 | ||
197 | rs_after->rs_start = rs_before->rs_start; | |
198 | kmem_cache_free(range_seg_cache, rs_before); | |
199 | rs = rs_after; | |
200 | } else if (merge_before) { | |
201 | if (rt->rt_ops != NULL) | |
202 | rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); | |
203 | ||
204 | range_tree_stat_decr(rt, rs_before); | |
205 | ||
206 | rs_before->rs_end = end; | |
207 | rs = rs_before; | |
208 | } else if (merge_after) { | |
209 | if (rt->rt_ops != NULL) | |
210 | rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); | |
211 | ||
212 | range_tree_stat_decr(rt, rs_after); | |
213 | ||
214 | rs_after->rs_start = start; | |
215 | rs = rs_after; | |
216 | } else { | |
217 | rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP); | |
218 | rs->rs_start = start; | |
219 | rs->rs_end = end; | |
220 | avl_insert(&rt->rt_root, rs, where); | |
221 | } | |
222 | ||
223 | if (rt->rt_ops != NULL) | |
224 | rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); | |
225 | ||
226 | range_tree_stat_incr(rt, rs); | |
227 | rt->rt_space += size; | |
228 | } | |
229 | ||
230 | void | |
231 | range_tree_remove(void *arg, uint64_t start, uint64_t size) | |
232 | { | |
233 | range_tree_t *rt = arg; | |
234 | avl_index_t where; | |
235 | range_seg_t rsearch, *rs, *newseg; | |
236 | uint64_t end = start + size; | |
237 | boolean_t left_over, right_over; | |
238 | ||
239 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
240 | VERIFY3U(size, !=, 0); | |
241 | VERIFY3U(size, <=, rt->rt_space); | |
242 | ||
243 | rsearch.rs_start = start; | |
244 | rsearch.rs_end = end; | |
245 | rs = avl_find(&rt->rt_root, &rsearch, &where); | |
246 | ||
247 | /* Make sure we completely overlap with someone */ | |
248 | if (rs == NULL) { | |
249 | zfs_panic_recover("zfs: freeing free segment " | |
250 | "(offset=%llu size=%llu)", | |
251 | (longlong_t)start, (longlong_t)size); | |
252 | return; | |
253 | } | |
254 | VERIFY3U(rs->rs_start, <=, start); | |
255 | VERIFY3U(rs->rs_end, >=, end); | |
256 | ||
257 | left_over = (rs->rs_start != start); | |
258 | right_over = (rs->rs_end != end); | |
259 | ||
260 | range_tree_stat_decr(rt, rs); | |
261 | ||
262 | if (rt->rt_ops != NULL) | |
263 | rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); | |
264 | ||
265 | if (left_over && right_over) { | |
266 | newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP); | |
267 | newseg->rs_start = end; | |
268 | newseg->rs_end = rs->rs_end; | |
269 | range_tree_stat_incr(rt, newseg); | |
270 | ||
271 | rs->rs_end = start; | |
272 | ||
273 | avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER); | |
274 | if (rt->rt_ops != NULL) | |
275 | rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg); | |
276 | } else if (left_over) { | |
277 | rs->rs_end = start; | |
278 | } else if (right_over) { | |
279 | rs->rs_start = end; | |
280 | } else { | |
281 | avl_remove(&rt->rt_root, rs); | |
282 | kmem_cache_free(range_seg_cache, rs); | |
283 | rs = NULL; | |
284 | } | |
285 | ||
286 | if (rs != NULL) { | |
287 | range_tree_stat_incr(rt, rs); | |
288 | ||
289 | if (rt->rt_ops != NULL) | |
290 | rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); | |
291 | } | |
292 | ||
293 | rt->rt_space -= size; | |
294 | } | |
295 | ||
296 | static range_seg_t * | |
297 | range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size) | |
298 | { | |
299 | avl_index_t where; | |
300 | range_seg_t rsearch; | |
301 | uint64_t end = start + size; | |
302 | ||
303 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
304 | VERIFY(size != 0); | |
305 | ||
306 | rsearch.rs_start = start; | |
307 | rsearch.rs_end = end; | |
308 | return (avl_find(&rt->rt_root, &rsearch, &where)); | |
309 | } | |
310 | ||
311 | static range_seg_t * | |
312 | range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size) | |
313 | { | |
314 | range_seg_t *rs = range_tree_find_impl(rt, start, size); | |
315 | if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size) | |
316 | return (rs); | |
317 | return (NULL); | |
318 | } | |
319 | ||
320 | void | |
321 | range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size) | |
322 | { | |
323 | range_seg_t *rs; | |
324 | ||
325 | mutex_enter(rt->rt_lock); | |
326 | rs = range_tree_find(rt, off, size); | |
327 | if (rs != NULL) | |
328 | panic("freeing free block; rs=%p", (void *)rs); | |
329 | mutex_exit(rt->rt_lock); | |
330 | } | |
331 | ||
332 | boolean_t | |
333 | range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) | |
334 | { | |
335 | return (range_tree_find(rt, start, size) != NULL); | |
336 | } | |
337 | ||
338 | /* | |
339 | * Ensure that this range is not in the tree, regardless of whether | |
340 | * it is currently in the tree. | |
341 | */ | |
342 | void | |
343 | range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size) | |
344 | { | |
345 | range_seg_t *rs; | |
346 | ||
347 | while ((rs = range_tree_find_impl(rt, start, size)) != NULL) { | |
348 | uint64_t free_start = MAX(rs->rs_start, start); | |
349 | uint64_t free_end = MIN(rs->rs_end, start + size); | |
350 | range_tree_remove(rt, free_start, free_end - free_start); | |
351 | } | |
352 | } | |
353 | ||
354 | void | |
355 | range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) | |
356 | { | |
357 | range_tree_t *rt; | |
358 | ||
359 | ASSERT(MUTEX_HELD((*rtsrc)->rt_lock)); | |
360 | ASSERT0(range_tree_space(*rtdst)); | |
361 | ASSERT0(avl_numnodes(&(*rtdst)->rt_root)); | |
362 | ||
363 | rt = *rtsrc; | |
364 | *rtsrc = *rtdst; | |
365 | *rtdst = rt; | |
366 | } | |
367 | ||
368 | void | |
369 | range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) | |
370 | { | |
371 | range_seg_t *rs; | |
372 | void *cookie = NULL; | |
373 | ||
374 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
375 | ||
376 | if (rt->rt_ops != NULL) | |
377 | rt->rt_ops->rtop_vacate(rt, rt->rt_arg); | |
378 | ||
379 | while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) { | |
380 | if (func != NULL) | |
381 | func(arg, rs->rs_start, rs->rs_end - rs->rs_start); | |
382 | kmem_cache_free(range_seg_cache, rs); | |
383 | } | |
384 | ||
385 | bzero(rt->rt_histogram, sizeof (rt->rt_histogram)); | |
386 | rt->rt_space = 0; | |
387 | } | |
388 | ||
389 | void | |
390 | range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg) | |
391 | { | |
392 | range_seg_t *rs; | |
393 | ||
394 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
395 | ||
396 | for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs)) | |
397 | func(arg, rs->rs_start, rs->rs_end - rs->rs_start); | |
398 | } | |
399 | ||
400 | uint64_t | |
401 | range_tree_space(range_tree_t *rt) | |
402 | { | |
403 | return (rt->rt_space); | |
404 | } |