]>
Commit | Line | Data |
---|---|---|
93cf2076 GW |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. | |
23 | * Use is subject to license terms. | |
24 | */ | |
25 | /* | |
26 | * Copyright (c) 2013 by Delphix. All rights reserved. | |
27 | */ | |
28 | ||
29 | #include <sys/zfs_context.h> | |
30 | #include <sys/spa.h> | |
31 | #include <sys/dmu.h> | |
32 | #include <sys/dnode.h> | |
33 | #include <sys/zio.h> | |
34 | #include <sys/range_tree.h> | |
35 | ||
36 | static kmem_cache_t *range_seg_cache; | |
37 | ||
38 | void | |
39 | range_tree_init(void) | |
40 | { | |
41 | ASSERT(range_seg_cache == NULL); | |
42 | range_seg_cache = kmem_cache_create("range_seg_cache", | |
43 | sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); | |
44 | } | |
45 | ||
46 | void | |
47 | range_tree_fini(void) | |
48 | { | |
49 | kmem_cache_destroy(range_seg_cache); | |
50 | range_seg_cache = NULL; | |
51 | } | |
52 | ||
53 | void | |
54 | range_tree_stat_verify(range_tree_t *rt) | |
55 | { | |
56 | range_seg_t *rs; | |
57 | uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; | |
58 | int i; | |
59 | ||
60 | for (rs = avl_first(&rt->rt_root); rs != NULL; | |
61 | rs = AVL_NEXT(&rt->rt_root, rs)) { | |
62 | uint64_t size = rs->rs_end - rs->rs_start; | |
63 | int idx = highbit(size) - 1; | |
64 | ||
65 | hist[idx]++; | |
66 | ASSERT3U(hist[idx], !=, 0); | |
67 | } | |
68 | ||
69 | for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { | |
70 | if (hist[i] != rt->rt_histogram[i]) { | |
71 | zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu", | |
72 | i, hist, hist[i], rt->rt_histogram[i]); | |
73 | } | |
74 | VERIFY3U(hist[i], ==, rt->rt_histogram[i]); | |
75 | } | |
76 | } | |
77 | ||
78 | static void | |
79 | range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) | |
80 | { | |
81 | uint64_t size = rs->rs_end - rs->rs_start; | |
82 | int idx = highbit(size) - 1; | |
83 | ||
84 | ASSERT3U(idx, <, | |
85 | sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); | |
86 | ||
87 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
88 | rt->rt_histogram[idx]++; | |
89 | ASSERT3U(rt->rt_histogram[idx], !=, 0); | |
90 | } | |
91 | ||
92 | static void | |
93 | range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) | |
94 | { | |
95 | uint64_t size = rs->rs_end - rs->rs_start; | |
96 | int idx = highbit(size) - 1; | |
97 | ||
98 | ASSERT3U(idx, <, | |
99 | sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); | |
100 | ||
101 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
102 | ASSERT3U(rt->rt_histogram[idx], !=, 0); | |
103 | rt->rt_histogram[idx]--; | |
104 | } | |
105 | ||
106 | /* | |
107 | * NOTE: caller is responsible for all locking. | |
108 | */ | |
109 | static int | |
110 | range_tree_seg_compare(const void *x1, const void *x2) | |
111 | { | |
112 | const range_seg_t *r1 = x1; | |
113 | const range_seg_t *r2 = x2; | |
114 | ||
115 | if (r1->rs_start < r2->rs_start) { | |
116 | if (r1->rs_end > r2->rs_start) | |
117 | return (0); | |
118 | return (-1); | |
119 | } | |
120 | if (r1->rs_start > r2->rs_start) { | |
121 | if (r1->rs_start < r2->rs_end) | |
122 | return (0); | |
123 | return (1); | |
124 | } | |
125 | return (0); | |
126 | } | |
127 | ||
128 | range_tree_t * | |
129 | range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp) | |
130 | { | |
131 | range_tree_t *rt; | |
132 | ||
133 | rt = kmem_zalloc(sizeof (range_tree_t), KM_PUSHPAGE); | |
134 | ||
135 | avl_create(&rt->rt_root, range_tree_seg_compare, | |
136 | sizeof (range_seg_t), offsetof(range_seg_t, rs_node)); | |
137 | ||
138 | rt->rt_lock = lp; | |
139 | rt->rt_ops = ops; | |
140 | rt->rt_arg = arg; | |
141 | ||
142 | if (rt->rt_ops != NULL) | |
143 | rt->rt_ops->rtop_create(rt, rt->rt_arg); | |
144 | ||
145 | return (rt); | |
146 | } | |
147 | ||
148 | void | |
149 | range_tree_destroy(range_tree_t *rt) | |
150 | { | |
151 | VERIFY0(rt->rt_space); | |
152 | ||
153 | if (rt->rt_ops != NULL) | |
154 | rt->rt_ops->rtop_destroy(rt, rt->rt_arg); | |
155 | ||
156 | avl_destroy(&rt->rt_root); | |
157 | kmem_free(rt, sizeof (*rt)); | |
158 | } | |
159 | ||
160 | void | |
161 | range_tree_add(void *arg, uint64_t start, uint64_t size) | |
162 | { | |
163 | range_tree_t *rt = arg; | |
164 | avl_index_t where; | |
165 | range_seg_t rsearch, *rs_before, *rs_after, *rs; | |
166 | uint64_t end = start + size; | |
167 | boolean_t merge_before, merge_after; | |
168 | ||
169 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
170 | VERIFY(size != 0); | |
171 | ||
172 | rsearch.rs_start = start; | |
173 | rsearch.rs_end = end; | |
174 | rs = avl_find(&rt->rt_root, &rsearch, &where); | |
175 | ||
176 | if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) { | |
177 | zfs_panic_recover("zfs: allocating allocated segment" | |
178 | "(offset=%llu size=%llu)\n", | |
179 | (longlong_t)start, (longlong_t)size); | |
180 | return; | |
181 | } | |
182 | ||
183 | /* Make sure we don't overlap with either of our neighbors */ | |
184 | VERIFY(rs == NULL); | |
185 | ||
186 | rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE); | |
187 | rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER); | |
188 | ||
189 | merge_before = (rs_before != NULL && rs_before->rs_end == start); | |
190 | merge_after = (rs_after != NULL && rs_after->rs_start == end); | |
191 | ||
192 | if (merge_before && merge_after) { | |
193 | avl_remove(&rt->rt_root, rs_before); | |
194 | if (rt->rt_ops != NULL) { | |
195 | rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); | |
196 | rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); | |
197 | } | |
198 | ||
199 | range_tree_stat_decr(rt, rs_before); | |
200 | range_tree_stat_decr(rt, rs_after); | |
201 | ||
202 | rs_after->rs_start = rs_before->rs_start; | |
203 | kmem_cache_free(range_seg_cache, rs_before); | |
204 | rs = rs_after; | |
205 | } else if (merge_before) { | |
206 | if (rt->rt_ops != NULL) | |
207 | rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); | |
208 | ||
209 | range_tree_stat_decr(rt, rs_before); | |
210 | ||
211 | rs_before->rs_end = end; | |
212 | rs = rs_before; | |
213 | } else if (merge_after) { | |
214 | if (rt->rt_ops != NULL) | |
215 | rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); | |
216 | ||
217 | range_tree_stat_decr(rt, rs_after); | |
218 | ||
219 | rs_after->rs_start = start; | |
220 | rs = rs_after; | |
221 | } else { | |
222 | rs = kmem_cache_alloc(range_seg_cache, KM_PUSHPAGE); | |
223 | rs->rs_start = start; | |
224 | rs->rs_end = end; | |
225 | avl_insert(&rt->rt_root, rs, where); | |
226 | } | |
227 | ||
228 | if (rt->rt_ops != NULL) | |
229 | rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); | |
230 | ||
231 | range_tree_stat_incr(rt, rs); | |
232 | rt->rt_space += size; | |
233 | } | |
234 | ||
235 | void | |
236 | range_tree_remove(void *arg, uint64_t start, uint64_t size) | |
237 | { | |
238 | range_tree_t *rt = arg; | |
239 | avl_index_t where; | |
240 | range_seg_t rsearch, *rs, *newseg; | |
241 | uint64_t end = start + size; | |
242 | boolean_t left_over, right_over; | |
243 | ||
244 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
245 | VERIFY3U(size, !=, 0); | |
246 | VERIFY3U(size, <=, rt->rt_space); | |
247 | ||
248 | rsearch.rs_start = start; | |
249 | rsearch.rs_end = end; | |
250 | rs = avl_find(&rt->rt_root, &rsearch, &where); | |
251 | ||
252 | /* Make sure we completely overlap with someone */ | |
253 | if (rs == NULL) { | |
254 | zfs_panic_recover("zfs: freeing free segment " | |
255 | "(offset=%llu size=%llu)", | |
256 | (longlong_t)start, (longlong_t)size); | |
257 | return; | |
258 | } | |
259 | VERIFY3U(rs->rs_start, <=, start); | |
260 | VERIFY3U(rs->rs_end, >=, end); | |
261 | ||
262 | left_over = (rs->rs_start != start); | |
263 | right_over = (rs->rs_end != end); | |
264 | ||
265 | range_tree_stat_decr(rt, rs); | |
266 | ||
267 | if (rt->rt_ops != NULL) | |
268 | rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); | |
269 | ||
270 | if (left_over && right_over) { | |
271 | newseg = kmem_cache_alloc(range_seg_cache, KM_PUSHPAGE); | |
272 | newseg->rs_start = end; | |
273 | newseg->rs_end = rs->rs_end; | |
274 | range_tree_stat_incr(rt, newseg); | |
275 | ||
276 | rs->rs_end = start; | |
277 | ||
278 | avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER); | |
279 | if (rt->rt_ops != NULL) | |
280 | rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg); | |
281 | } else if (left_over) { | |
282 | rs->rs_end = start; | |
283 | } else if (right_over) { | |
284 | rs->rs_start = end; | |
285 | } else { | |
286 | avl_remove(&rt->rt_root, rs); | |
287 | kmem_cache_free(range_seg_cache, rs); | |
288 | rs = NULL; | |
289 | } | |
290 | ||
291 | if (rs != NULL) { | |
292 | range_tree_stat_incr(rt, rs); | |
293 | ||
294 | if (rt->rt_ops != NULL) | |
295 | rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); | |
296 | } | |
297 | ||
298 | rt->rt_space -= size; | |
299 | } | |
300 | ||
301 | static range_seg_t * | |
302 | range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size, | |
303 | avl_index_t *wherep) | |
304 | { | |
305 | range_seg_t rsearch, *rs; | |
306 | uint64_t end = start + size; | |
307 | ||
308 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
309 | VERIFY(size != 0); | |
310 | ||
311 | rsearch.rs_start = start; | |
312 | rsearch.rs_end = end; | |
313 | rs = avl_find(&rt->rt_root, &rsearch, wherep); | |
314 | ||
315 | if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) | |
316 | return (rs); | |
317 | return (NULL); | |
318 | } | |
319 | ||
320 | void | |
321 | range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size) | |
322 | { | |
323 | range_seg_t *rs; | |
324 | avl_index_t where; | |
325 | ||
326 | mutex_enter(rt->rt_lock); | |
327 | rs = range_tree_find(rt, off, size, &where); | |
328 | if (rs != NULL) | |
329 | panic("freeing free block; rs=%p", (void *)rs); | |
330 | mutex_exit(rt->rt_lock); | |
331 | } | |
332 | ||
333 | boolean_t | |
334 | range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) | |
335 | { | |
336 | avl_index_t where; | |
337 | ||
338 | return (range_tree_find(rt, start, size, &where) != NULL); | |
339 | } | |
340 | ||
341 | void | |
342 | range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) | |
343 | { | |
344 | range_tree_t *rt; | |
345 | ||
346 | ASSERT(MUTEX_HELD((*rtsrc)->rt_lock)); | |
347 | ASSERT0(range_tree_space(*rtdst)); | |
348 | ASSERT0(avl_numnodes(&(*rtdst)->rt_root)); | |
349 | ||
350 | rt = *rtsrc; | |
351 | *rtsrc = *rtdst; | |
352 | *rtdst = rt; | |
353 | } | |
354 | ||
355 | void | |
356 | range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) | |
357 | { | |
358 | range_seg_t *rs; | |
359 | void *cookie = NULL; | |
360 | ||
361 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
362 | ||
363 | if (rt->rt_ops != NULL) | |
364 | rt->rt_ops->rtop_vacate(rt, rt->rt_arg); | |
365 | ||
366 | while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) { | |
367 | if (func != NULL) | |
368 | func(arg, rs->rs_start, rs->rs_end - rs->rs_start); | |
369 | kmem_cache_free(range_seg_cache, rs); | |
370 | } | |
371 | ||
372 | bzero(rt->rt_histogram, sizeof (rt->rt_histogram)); | |
373 | rt->rt_space = 0; | |
374 | } | |
375 | ||
376 | void | |
377 | range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg) | |
378 | { | |
379 | range_seg_t *rs; | |
380 | ||
381 | ASSERT(MUTEX_HELD(rt->rt_lock)); | |
382 | ||
383 | for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs)) | |
384 | func(arg, rs->rs_start, rs->rs_end - rs->rs_start); | |
385 | } | |
386 | ||
387 | uint64_t | |
388 | range_tree_space(range_tree_t *rt) | |
389 | { | |
390 | return (rt->rt_space); | |
391 | } |