]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. | |
23 | * Use is subject to license terms. | |
24 | */ | |
25 | ||
b128c09f | 26 | #pragma ident "%Z%%M% %I% %E% SMI" |
34dc7c2f BB |
27 | |
28 | #include <sys/zfs_context.h> | |
29 | #include <sys/spa.h> | |
30 | #include <sys/dmu.h> | |
31 | #include <sys/zio.h> | |
32 | #include <sys/space_map.h> | |
33 | ||
34 | /* | |
35 | * Space map routines. | |
36 | * NOTE: caller is responsible for all locking. | |
37 | */ | |
38 | static int | |
39 | space_map_seg_compare(const void *x1, const void *x2) | |
40 | { | |
41 | const space_seg_t *s1 = x1; | |
42 | const space_seg_t *s2 = x2; | |
43 | ||
44 | if (s1->ss_start < s2->ss_start) { | |
45 | if (s1->ss_end > s2->ss_start) | |
46 | return (0); | |
47 | return (-1); | |
48 | } | |
49 | if (s1->ss_start > s2->ss_start) { | |
50 | if (s1->ss_start < s2->ss_end) | |
51 | return (0); | |
52 | return (1); | |
53 | } | |
54 | return (0); | |
55 | } | |
56 | ||
57 | void | |
58 | space_map_create(space_map_t *sm, uint64_t start, uint64_t size, uint8_t shift, | |
59 | kmutex_t *lp) | |
60 | { | |
61 | bzero(sm, sizeof (*sm)); | |
62 | ||
63 | avl_create(&sm->sm_root, space_map_seg_compare, | |
64 | sizeof (space_seg_t), offsetof(struct space_seg, ss_node)); | |
65 | ||
66 | sm->sm_start = start; | |
67 | sm->sm_size = size; | |
68 | sm->sm_shift = shift; | |
69 | sm->sm_lock = lp; | |
70 | } | |
71 | ||
72 | void | |
73 | space_map_destroy(space_map_t *sm) | |
74 | { | |
75 | ASSERT(!sm->sm_loaded && !sm->sm_loading); | |
76 | VERIFY3U(sm->sm_space, ==, 0); | |
77 | avl_destroy(&sm->sm_root); | |
78 | } | |
79 | ||
80 | void | |
81 | space_map_add(space_map_t *sm, uint64_t start, uint64_t size) | |
82 | { | |
83 | avl_index_t where; | |
84 | space_seg_t ssearch, *ss_before, *ss_after, *ss; | |
85 | uint64_t end = start + size; | |
86 | int merge_before, merge_after; | |
87 | ||
88 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
89 | VERIFY(size != 0); | |
90 | VERIFY3U(start, >=, sm->sm_start); | |
91 | VERIFY3U(end, <=, sm->sm_start + sm->sm_size); | |
92 | VERIFY(sm->sm_space + size <= sm->sm_size); | |
93 | VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); | |
94 | VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); | |
95 | ||
96 | ssearch.ss_start = start; | |
97 | ssearch.ss_end = end; | |
98 | ss = avl_find(&sm->sm_root, &ssearch, &where); | |
99 | ||
100 | if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) { | |
101 | zfs_panic_recover("zfs: allocating allocated segment" | |
102 | "(offset=%llu size=%llu)\n", | |
103 | (longlong_t)start, (longlong_t)size); | |
104 | return; | |
105 | } | |
106 | ||
107 | /* Make sure we don't overlap with either of our neighbors */ | |
108 | VERIFY(ss == NULL); | |
109 | ||
110 | ss_before = avl_nearest(&sm->sm_root, where, AVL_BEFORE); | |
111 | ss_after = avl_nearest(&sm->sm_root, where, AVL_AFTER); | |
112 | ||
113 | merge_before = (ss_before != NULL && ss_before->ss_end == start); | |
114 | merge_after = (ss_after != NULL && ss_after->ss_start == end); | |
115 | ||
116 | if (merge_before && merge_after) { | |
117 | avl_remove(&sm->sm_root, ss_before); | |
118 | ss_after->ss_start = ss_before->ss_start; | |
119 | kmem_free(ss_before, sizeof (*ss_before)); | |
120 | } else if (merge_before) { | |
121 | ss_before->ss_end = end; | |
122 | } else if (merge_after) { | |
123 | ss_after->ss_start = start; | |
124 | } else { | |
125 | ss = kmem_alloc(sizeof (*ss), KM_SLEEP); | |
126 | ss->ss_start = start; | |
127 | ss->ss_end = end; | |
128 | avl_insert(&sm->sm_root, ss, where); | |
129 | } | |
130 | ||
131 | sm->sm_space += size; | |
132 | } | |
133 | ||
134 | void | |
135 | space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) | |
136 | { | |
137 | avl_index_t where; | |
138 | space_seg_t ssearch, *ss, *newseg; | |
139 | uint64_t end = start + size; | |
140 | int left_over, right_over; | |
141 | ||
142 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
143 | VERIFY(size != 0); | |
144 | VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); | |
145 | VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); | |
146 | ||
147 | ssearch.ss_start = start; | |
148 | ssearch.ss_end = end; | |
149 | ss = avl_find(&sm->sm_root, &ssearch, &where); | |
150 | ||
151 | /* Make sure we completely overlap with someone */ | |
152 | if (ss == NULL) { | |
153 | zfs_panic_recover("zfs: freeing free segment " | |
154 | "(offset=%llu size=%llu)", | |
155 | (longlong_t)start, (longlong_t)size); | |
156 | return; | |
157 | } | |
158 | VERIFY3U(ss->ss_start, <=, start); | |
159 | VERIFY3U(ss->ss_end, >=, end); | |
160 | VERIFY(sm->sm_space - size <= sm->sm_size); | |
161 | ||
162 | left_over = (ss->ss_start != start); | |
163 | right_over = (ss->ss_end != end); | |
164 | ||
165 | if (left_over && right_over) { | |
166 | newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP); | |
167 | newseg->ss_start = end; | |
168 | newseg->ss_end = ss->ss_end; | |
169 | ss->ss_end = start; | |
170 | avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER); | |
171 | } else if (left_over) { | |
172 | ss->ss_end = start; | |
173 | } else if (right_over) { | |
174 | ss->ss_start = end; | |
175 | } else { | |
176 | avl_remove(&sm->sm_root, ss); | |
177 | kmem_free(ss, sizeof (*ss)); | |
178 | } | |
179 | ||
180 | sm->sm_space -= size; | |
181 | } | |
182 | ||
183 | int | |
184 | space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) | |
185 | { | |
186 | avl_index_t where; | |
187 | space_seg_t ssearch, *ss; | |
188 | uint64_t end = start + size; | |
189 | ||
190 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
191 | VERIFY(size != 0); | |
192 | VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); | |
193 | VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); | |
194 | ||
195 | ssearch.ss_start = start; | |
196 | ssearch.ss_end = end; | |
197 | ss = avl_find(&sm->sm_root, &ssearch, &where); | |
198 | ||
199 | return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end); | |
200 | } | |
201 | ||
202 | void | |
203 | space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest) | |
204 | { | |
205 | space_seg_t *ss; | |
206 | void *cookie = NULL; | |
207 | ||
208 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
209 | ||
210 | while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { | |
211 | if (func != NULL) | |
212 | func(mdest, ss->ss_start, ss->ss_end - ss->ss_start); | |
213 | kmem_free(ss, sizeof (*ss)); | |
214 | } | |
215 | sm->sm_space = 0; | |
216 | } | |
217 | ||
218 | void | |
219 | space_map_walk(space_map_t *sm, space_map_func_t *func, space_map_t *mdest) | |
220 | { | |
221 | space_seg_t *ss; | |
222 | ||
223 | for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) | |
224 | func(mdest, ss->ss_start, ss->ss_end - ss->ss_start); | |
225 | } | |
226 | ||
227 | void | |
228 | space_map_excise(space_map_t *sm, uint64_t start, uint64_t size) | |
229 | { | |
230 | avl_tree_t *t = &sm->sm_root; | |
231 | avl_index_t where; | |
232 | space_seg_t *ss, search; | |
233 | uint64_t end = start + size; | |
234 | uint64_t rm_start, rm_end; | |
235 | ||
236 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
237 | ||
238 | search.ss_start = start; | |
239 | search.ss_end = start; | |
240 | ||
241 | for (;;) { | |
242 | ss = avl_find(t, &search, &where); | |
243 | ||
244 | if (ss == NULL) | |
245 | ss = avl_nearest(t, where, AVL_AFTER); | |
246 | ||
247 | if (ss == NULL || ss->ss_start >= end) | |
248 | break; | |
249 | ||
250 | rm_start = MAX(ss->ss_start, start); | |
251 | rm_end = MIN(ss->ss_end, end); | |
252 | ||
253 | space_map_remove(sm, rm_start, rm_end - rm_start); | |
254 | } | |
255 | } | |
256 | ||
257 | /* | |
258 | * Replace smd with the union of smd and sms. | |
259 | */ | |
260 | void | |
261 | space_map_union(space_map_t *smd, space_map_t *sms) | |
262 | { | |
263 | avl_tree_t *t = &sms->sm_root; | |
264 | space_seg_t *ss; | |
265 | ||
266 | ASSERT(MUTEX_HELD(smd->sm_lock)); | |
267 | ||
268 | /* | |
269 | * For each source segment, remove any intersections with the | |
270 | * destination, then add the source segment to the destination. | |
271 | */ | |
272 | for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) { | |
273 | space_map_excise(smd, ss->ss_start, ss->ss_end - ss->ss_start); | |
274 | space_map_add(smd, ss->ss_start, ss->ss_end - ss->ss_start); | |
275 | } | |
276 | } | |
277 | ||
278 | /* | |
279 | * Wait for any in-progress space_map_load() to complete. | |
280 | */ | |
281 | void | |
282 | space_map_load_wait(space_map_t *sm) | |
283 | { | |
284 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
285 | ||
286 | while (sm->sm_loading) | |
287 | cv_wait(&sm->sm_load_cv, sm->sm_lock); | |
288 | } | |
289 | ||
290 | /* | |
291 | * Note: space_map_load() will drop sm_lock across dmu_read() calls. | |
292 | * The caller must be OK with this. | |
293 | */ | |
294 | int | |
295 | space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype, | |
296 | space_map_obj_t *smo, objset_t *os) | |
297 | { | |
298 | uint64_t *entry, *entry_map, *entry_map_end; | |
299 | uint64_t bufsize, size, offset, end, space; | |
300 | uint64_t mapstart = sm->sm_start; | |
301 | int error = 0; | |
302 | ||
303 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
304 | ||
305 | space_map_load_wait(sm); | |
306 | ||
307 | if (sm->sm_loaded) | |
308 | return (0); | |
309 | ||
310 | sm->sm_loading = B_TRUE; | |
311 | end = smo->smo_objsize; | |
312 | space = smo->smo_alloc; | |
313 | ||
314 | ASSERT(sm->sm_ops == NULL); | |
315 | VERIFY3U(sm->sm_space, ==, 0); | |
316 | ||
317 | if (maptype == SM_FREE) { | |
318 | space_map_add(sm, sm->sm_start, sm->sm_size); | |
319 | space = sm->sm_size - space; | |
320 | } | |
321 | ||
322 | bufsize = 1ULL << SPACE_MAP_BLOCKSHIFT; | |
323 | entry_map = zio_buf_alloc(bufsize); | |
324 | ||
325 | mutex_exit(sm->sm_lock); | |
326 | if (end > bufsize) | |
327 | dmu_prefetch(os, smo->smo_object, bufsize, end - bufsize); | |
328 | mutex_enter(sm->sm_lock); | |
329 | ||
330 | for (offset = 0; offset < end; offset += bufsize) { | |
331 | size = MIN(end - offset, bufsize); | |
332 | VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0); | |
333 | VERIFY(size != 0); | |
334 | ||
335 | dprintf("object=%llu offset=%llx size=%llx\n", | |
336 | smo->smo_object, offset, size); | |
337 | ||
338 | mutex_exit(sm->sm_lock); | |
339 | error = dmu_read(os, smo->smo_object, offset, size, entry_map); | |
340 | mutex_enter(sm->sm_lock); | |
341 | if (error != 0) | |
342 | break; | |
343 | ||
344 | entry_map_end = entry_map + (size / sizeof (uint64_t)); | |
345 | for (entry = entry_map; entry < entry_map_end; entry++) { | |
346 | uint64_t e = *entry; | |
347 | ||
348 | if (SM_DEBUG_DECODE(e)) /* Skip debug entries */ | |
349 | continue; | |
350 | ||
351 | (SM_TYPE_DECODE(e) == maptype ? | |
352 | space_map_add : space_map_remove)(sm, | |
353 | (SM_OFFSET_DECODE(e) << sm->sm_shift) + mapstart, | |
354 | SM_RUN_DECODE(e) << sm->sm_shift); | |
355 | } | |
356 | } | |
357 | ||
358 | if (error == 0) { | |
359 | VERIFY3U(sm->sm_space, ==, space); | |
360 | ||
361 | sm->sm_loaded = B_TRUE; | |
362 | sm->sm_ops = ops; | |
363 | if (ops != NULL) | |
364 | ops->smop_load(sm); | |
365 | } else { | |
366 | space_map_vacate(sm, NULL, NULL); | |
367 | } | |
368 | ||
369 | zio_buf_free(entry_map, bufsize); | |
370 | ||
371 | sm->sm_loading = B_FALSE; | |
372 | ||
373 | cv_broadcast(&sm->sm_load_cv); | |
374 | ||
375 | return (error); | |
376 | } | |
377 | ||
378 | void | |
379 | space_map_unload(space_map_t *sm) | |
380 | { | |
381 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
382 | ||
383 | if (sm->sm_loaded && sm->sm_ops != NULL) | |
384 | sm->sm_ops->smop_unload(sm); | |
385 | ||
386 | sm->sm_loaded = B_FALSE; | |
387 | sm->sm_ops = NULL; | |
388 | ||
389 | space_map_vacate(sm, NULL, NULL); | |
390 | } | |
391 | ||
392 | uint64_t | |
393 | space_map_alloc(space_map_t *sm, uint64_t size) | |
394 | { | |
395 | uint64_t start; | |
396 | ||
397 | start = sm->sm_ops->smop_alloc(sm, size); | |
398 | if (start != -1ULL) | |
399 | space_map_remove(sm, start, size); | |
400 | return (start); | |
401 | } | |
402 | ||
403 | void | |
404 | space_map_claim(space_map_t *sm, uint64_t start, uint64_t size) | |
405 | { | |
406 | sm->sm_ops->smop_claim(sm, start, size); | |
407 | space_map_remove(sm, start, size); | |
408 | } | |
409 | ||
410 | void | |
411 | space_map_free(space_map_t *sm, uint64_t start, uint64_t size) | |
412 | { | |
413 | space_map_add(sm, start, size); | |
414 | sm->sm_ops->smop_free(sm, start, size); | |
415 | } | |
416 | ||
417 | /* | |
418 | * Note: space_map_sync() will drop sm_lock across dmu_write() calls. | |
419 | */ | |
420 | void | |
421 | space_map_sync(space_map_t *sm, uint8_t maptype, | |
422 | space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx) | |
423 | { | |
424 | spa_t *spa = dmu_objset_spa(os); | |
425 | void *cookie = NULL; | |
426 | space_seg_t *ss; | |
427 | uint64_t bufsize, start, size, run_len; | |
428 | uint64_t *entry, *entry_map, *entry_map_end; | |
429 | ||
430 | ASSERT(MUTEX_HELD(sm->sm_lock)); | |
431 | ||
432 | if (sm->sm_space == 0) | |
433 | return; | |
434 | ||
435 | dprintf("object %4llu, txg %llu, pass %d, %c, count %lu, space %llx\n", | |
436 | smo->smo_object, dmu_tx_get_txg(tx), spa_sync_pass(spa), | |
437 | maptype == SM_ALLOC ? 'A' : 'F', avl_numnodes(&sm->sm_root), | |
438 | sm->sm_space); | |
439 | ||
440 | if (maptype == SM_ALLOC) | |
441 | smo->smo_alloc += sm->sm_space; | |
442 | else | |
443 | smo->smo_alloc -= sm->sm_space; | |
444 | ||
445 | bufsize = (8 + avl_numnodes(&sm->sm_root)) * sizeof (uint64_t); | |
446 | bufsize = MIN(bufsize, 1ULL << SPACE_MAP_BLOCKSHIFT); | |
447 | entry_map = zio_buf_alloc(bufsize); | |
448 | entry_map_end = entry_map + (bufsize / sizeof (uint64_t)); | |
449 | entry = entry_map; | |
450 | ||
451 | *entry++ = SM_DEBUG_ENCODE(1) | | |
452 | SM_DEBUG_ACTION_ENCODE(maptype) | | |
453 | SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) | | |
454 | SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); | |
455 | ||
456 | while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { | |
457 | size = ss->ss_end - ss->ss_start; | |
458 | start = (ss->ss_start - sm->sm_start) >> sm->sm_shift; | |
459 | ||
460 | sm->sm_space -= size; | |
461 | size >>= sm->sm_shift; | |
462 | ||
463 | while (size) { | |
464 | run_len = MIN(size, SM_RUN_MAX); | |
465 | ||
466 | if (entry == entry_map_end) { | |
467 | mutex_exit(sm->sm_lock); | |
468 | dmu_write(os, smo->smo_object, smo->smo_objsize, | |
469 | bufsize, entry_map, tx); | |
470 | mutex_enter(sm->sm_lock); | |
471 | smo->smo_objsize += bufsize; | |
472 | entry = entry_map; | |
473 | } | |
474 | ||
475 | *entry++ = SM_OFFSET_ENCODE(start) | | |
476 | SM_TYPE_ENCODE(maptype) | | |
477 | SM_RUN_ENCODE(run_len); | |
478 | ||
479 | start += run_len; | |
480 | size -= run_len; | |
481 | } | |
482 | kmem_free(ss, sizeof (*ss)); | |
483 | } | |
484 | ||
485 | if (entry != entry_map) { | |
486 | size = (entry - entry_map) * sizeof (uint64_t); | |
487 | mutex_exit(sm->sm_lock); | |
488 | dmu_write(os, smo->smo_object, smo->smo_objsize, | |
489 | size, entry_map, tx); | |
490 | mutex_enter(sm->sm_lock); | |
491 | smo->smo_objsize += size; | |
492 | } | |
493 | ||
494 | zio_buf_free(entry_map, bufsize); | |
495 | ||
496 | VERIFY3U(sm->sm_space, ==, 0); | |
497 | } | |
498 | ||
499 | void | |
500 | space_map_truncate(space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx) | |
501 | { | |
502 | VERIFY(dmu_free_range(os, smo->smo_object, 0, -1ULL, tx) == 0); | |
503 | ||
504 | smo->smo_objsize = 0; | |
505 | smo->smo_alloc = 0; | |
506 | } |