]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright 2010 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f BB |
23 | * Use is subject to license terms. |
24 | */ | |
25 | ||
34dc7c2f BB |
26 | /* |
27 | * This file contains the code to implement file range locking in | |
28 | * ZFS, although there isn't much specific to ZFS (all that comes to mind | |
29 | * support for growing the blocksize). | |
30 | * | |
31 | * Interface | |
32 | * --------- | |
33 | * Defined in zfs_rlock.h but essentially: | |
34 | * rl = zfs_range_lock(zp, off, len, lock_type); | |
35 | * zfs_range_unlock(rl); | |
36 | * zfs_range_reduce(rl, off, len); | |
37 | * | |
38 | * AVL tree | |
39 | * -------- | |
40 | * An AVL tree is used to maintain the state of the existing ranges | |
41 | * that are locked for exclusive (writer) or shared (reader) use. | |
42 | * The starting range offset is used for searching and sorting the tree. | |
43 | * | |
44 | * Common case | |
45 | * ----------- | |
46 | * The (hopefully) usual case is of no overlaps or contention for | |
47 | * locks. On entry to zfs_lock_range() a rl_t is allocated; the tree | |
48 | * searched that finds no overlap, and *this* rl_t is placed in the tree. | |
49 | * | |
50 | * Overlaps/Reference counting/Proxy locks | |
51 | * --------------------------------------- | |
52 | * The avl code only allows one node at a particular offset. Also it's very | |
53 | * inefficient to search through all previous entries looking for overlaps | |
54 | * (because the very 1st in the ordered list might be at offset 0 but | |
55 | * cover the whole file). | |
56 | * So this implementation uses reference counts and proxy range locks. | |
57 | * Firstly, only reader locks use reference counts and proxy locks, | |
58 | * because writer locks are exclusive. | |
59 | * When a reader lock overlaps with another then a proxy lock is created | |
60 | * for that range and replaces the original lock. If the overlap | |
61 | * is exact then the reference count of the proxy is simply incremented. | |
62 | * Otherwise, the proxy lock is split into smaller lock ranges and | |
63 | * new proxy locks created for non overlapping ranges. | |
64 | * The reference counts are adjusted accordingly. | |
65 | * Meanwhile, the orginal lock is kept around (this is the callers handle) | |
66 | * and its offset and length are used when releasing the lock. | |
67 | * | |
68 | * Thread coordination | |
69 | * ------------------- | |
70 | * In order to make wakeups efficient and to ensure multiple continuous | |
71 | * readers on a range don't starve a writer for the same range lock, | |
72 | * two condition variables are allocated in each rl_t. | |
73 | * If a writer (or reader) can't get a range it initialises the writer | |
74 | * (or reader) cv; sets a flag saying there's a writer (or reader) waiting; | |
75 | * and waits on that cv. When a thread unlocks that range it wakes up all | |
76 | * writers then all readers before destroying the lock. | |
77 | * | |
78 | * Append mode writes | |
79 | * ------------------ | |
80 | * Append mode writes need to lock a range at the end of a file. | |
81 | * The offset of the end of the file is determined under the | |
82 | * range locking mutex, and the lock type converted from RL_APPEND to | |
83 | * RL_WRITER and the range locked. | |
84 | * | |
85 | * Grow block handling | |
86 | * ------------------- | |
87 | * ZFS supports multiple block sizes currently upto 128K. The smallest | |
88 | * block size is used for the file which is grown as needed. During this | |
89 | * growth all other writers and readers must be excluded. | |
90 | * So if the block size needs to be grown then the whole file is | |
91 | * exclusively locked, then later the caller will reduce the lock | |
92 | * range to just the range to be written using zfs_reduce_range. | |
93 | */ | |
94 | ||
95 | #include <sys/zfs_rlock.h> | |
96 | ||
97 | /* | |
98 | * Check if a write lock can be grabbed, or wait and recheck until available. | |
99 | */ | |
100 | static void | |
101 | zfs_range_lock_writer(znode_t *zp, rl_t *new) | |
102 | { | |
103 | avl_tree_t *tree = &zp->z_range_avl; | |
104 | rl_t *rl; | |
105 | avl_index_t where; | |
106 | uint64_t end_size; | |
107 | uint64_t off = new->r_off; | |
108 | uint64_t len = new->r_len; | |
109 | ||
110 | for (;;) { | |
111 | /* | |
112 | * Range locking is also used by zvol and uses a | |
113 | * dummied up znode. However, for zvol, we don't need to | |
114 | * append or grow blocksize, and besides we don't have | |
428870ff | 115 | * a "sa" data or z_zfsvfs - so skip that processing. |
34dc7c2f BB |
116 | * |
117 | * Yes, this is ugly, and would be solved by not handling | |
118 | * grow or append in range lock code. If that was done then | |
119 | * we could make the range locking code generically available | |
120 | * to other non-zfs consumers. | |
121 | */ | |
122 | if (zp->z_vnode) { /* caller is ZPL */ | |
123 | /* | |
124 | * If in append mode pick up the current end of file. | |
125 | * This is done under z_range_lock to avoid races. | |
126 | */ | |
127 | if (new->r_type == RL_APPEND) | |
428870ff | 128 | new->r_off = zp->z_size; |
34dc7c2f BB |
129 | |
130 | /* | |
131 | * If we need to grow the block size then grab the whole | |
132 | * file range. This is also done under z_range_lock to | |
133 | * avoid races. | |
134 | */ | |
428870ff | 135 | end_size = MAX(zp->z_size, new->r_off + len); |
34dc7c2f | 136 | if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || |
3558fd73 | 137 | zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) { |
34dc7c2f BB |
138 | new->r_off = 0; |
139 | new->r_len = UINT64_MAX; | |
140 | } | |
141 | } | |
142 | ||
143 | /* | |
144 | * First check for the usual case of no locks | |
145 | */ | |
146 | if (avl_numnodes(tree) == 0) { | |
147 | new->r_type = RL_WRITER; /* convert to writer */ | |
148 | avl_add(tree, new); | |
149 | return; | |
150 | } | |
151 | ||
152 | /* | |
153 | * Look for any locks in the range. | |
154 | */ | |
155 | rl = avl_find(tree, new, &where); | |
156 | if (rl) | |
157 | goto wait; /* already locked at same offset */ | |
158 | ||
159 | rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER); | |
160 | if (rl && (rl->r_off < new->r_off + new->r_len)) | |
161 | goto wait; | |
162 | ||
163 | rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE); | |
164 | if (rl && rl->r_off + rl->r_len > new->r_off) | |
165 | goto wait; | |
166 | ||
167 | new->r_type = RL_WRITER; /* convert possible RL_APPEND */ | |
168 | avl_insert(tree, new, where); | |
169 | return; | |
170 | wait: | |
171 | if (!rl->r_write_wanted) { | |
172 | cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL); | |
173 | rl->r_write_wanted = B_TRUE; | |
174 | } | |
175 | cv_wait(&rl->r_wr_cv, &zp->z_range_lock); | |
176 | ||
177 | /* reset to original */ | |
178 | new->r_off = off; | |
179 | new->r_len = len; | |
180 | } | |
181 | } | |
182 | ||
183 | /* | |
184 | * If this is an original (non-proxy) lock then replace it by | |
185 | * a proxy and return the proxy. | |
186 | */ | |
187 | static rl_t * | |
188 | zfs_range_proxify(avl_tree_t *tree, rl_t *rl) | |
189 | { | |
190 | rl_t *proxy; | |
191 | ||
192 | if (rl->r_proxy) | |
193 | return (rl); /* already a proxy */ | |
194 | ||
195 | ASSERT3U(rl->r_cnt, ==, 1); | |
196 | ASSERT(rl->r_write_wanted == B_FALSE); | |
197 | ASSERT(rl->r_read_wanted == B_FALSE); | |
198 | avl_remove(tree, rl); | |
199 | rl->r_cnt = 0; | |
200 | ||
201 | /* create a proxy range lock */ | |
202 | proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP); | |
203 | proxy->r_off = rl->r_off; | |
204 | proxy->r_len = rl->r_len; | |
205 | proxy->r_cnt = 1; | |
206 | proxy->r_type = RL_READER; | |
207 | proxy->r_proxy = B_TRUE; | |
208 | proxy->r_write_wanted = B_FALSE; | |
209 | proxy->r_read_wanted = B_FALSE; | |
210 | avl_add(tree, proxy); | |
211 | ||
212 | return (proxy); | |
213 | } | |
214 | ||
215 | /* | |
216 | * Split the range lock at the supplied offset | |
217 | * returning the *front* proxy. | |
218 | */ | |
219 | static rl_t * | |
220 | zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off) | |
221 | { | |
222 | rl_t *front, *rear; | |
223 | ||
224 | ASSERT3U(rl->r_len, >, 1); | |
225 | ASSERT3U(off, >, rl->r_off); | |
226 | ASSERT3U(off, <, rl->r_off + rl->r_len); | |
227 | ASSERT(rl->r_write_wanted == B_FALSE); | |
228 | ASSERT(rl->r_read_wanted == B_FALSE); | |
229 | ||
230 | /* create the rear proxy range lock */ | |
231 | rear = kmem_alloc(sizeof (rl_t), KM_SLEEP); | |
232 | rear->r_off = off; | |
233 | rear->r_len = rl->r_off + rl->r_len - off; | |
234 | rear->r_cnt = rl->r_cnt; | |
235 | rear->r_type = RL_READER; | |
236 | rear->r_proxy = B_TRUE; | |
237 | rear->r_write_wanted = B_FALSE; | |
238 | rear->r_read_wanted = B_FALSE; | |
239 | ||
240 | front = zfs_range_proxify(tree, rl); | |
241 | front->r_len = off - rl->r_off; | |
242 | ||
243 | avl_insert_here(tree, rear, front, AVL_AFTER); | |
244 | return (front); | |
245 | } | |
246 | ||
247 | /* | |
248 | * Create and add a new proxy range lock for the supplied range. | |
249 | */ | |
250 | static void | |
251 | zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) | |
252 | { | |
253 | rl_t *rl; | |
254 | ||
255 | ASSERT(len); | |
256 | rl = kmem_alloc(sizeof (rl_t), KM_SLEEP); | |
257 | rl->r_off = off; | |
258 | rl->r_len = len; | |
259 | rl->r_cnt = 1; | |
260 | rl->r_type = RL_READER; | |
261 | rl->r_proxy = B_TRUE; | |
262 | rl->r_write_wanted = B_FALSE; | |
263 | rl->r_read_wanted = B_FALSE; | |
264 | avl_add(tree, rl); | |
265 | } | |
266 | ||
267 | static void | |
268 | zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where) | |
269 | { | |
270 | rl_t *next; | |
271 | uint64_t off = new->r_off; | |
272 | uint64_t len = new->r_len; | |
273 | ||
274 | /* | |
275 | * prev arrives either: | |
276 | * - pointing to an entry at the same offset | |
277 | * - pointing to the entry with the closest previous offset whose | |
278 | * range may overlap with the new range | |
279 | * - null, if there were no ranges starting before the new one | |
280 | */ | |
281 | if (prev) { | |
282 | if (prev->r_off + prev->r_len <= off) { | |
283 | prev = NULL; | |
284 | } else if (prev->r_off != off) { | |
285 | /* | |
286 | * convert to proxy if needed then | |
287 | * split this entry and bump ref count | |
288 | */ | |
289 | prev = zfs_range_split(tree, prev, off); | |
290 | prev = AVL_NEXT(tree, prev); /* move to rear range */ | |
291 | } | |
292 | } | |
293 | ASSERT((prev == NULL) || (prev->r_off == off)); | |
294 | ||
295 | if (prev) | |
296 | next = prev; | |
297 | else | |
298 | next = (rl_t *)avl_nearest(tree, where, AVL_AFTER); | |
299 | ||
300 | if (next == NULL || off + len <= next->r_off) { | |
301 | /* no overlaps, use the original new rl_t in the tree */ | |
302 | avl_insert(tree, new, where); | |
303 | return; | |
304 | } | |
305 | ||
306 | if (off < next->r_off) { | |
307 | /* Add a proxy for initial range before the overlap */ | |
308 | zfs_range_new_proxy(tree, off, next->r_off - off); | |
309 | } | |
310 | ||
311 | new->r_cnt = 0; /* will use proxies in tree */ | |
312 | /* | |
313 | * We now search forward through the ranges, until we go past the end | |
314 | * of the new range. For each entry we make it a proxy if it | |
315 | * isn't already, then bump its reference count. If there's any | |
316 | * gaps between the ranges then we create a new proxy range. | |
317 | */ | |
318 | for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) { | |
319 | if (off + len <= next->r_off) | |
320 | break; | |
321 | if (prev && prev->r_off + prev->r_len < next->r_off) { | |
322 | /* there's a gap */ | |
323 | ASSERT3U(next->r_off, >, prev->r_off + prev->r_len); | |
324 | zfs_range_new_proxy(tree, prev->r_off + prev->r_len, | |
325 | next->r_off - (prev->r_off + prev->r_len)); | |
326 | } | |
327 | if (off + len == next->r_off + next->r_len) { | |
328 | /* exact overlap with end */ | |
329 | next = zfs_range_proxify(tree, next); | |
330 | next->r_cnt++; | |
331 | return; | |
332 | } | |
333 | if (off + len < next->r_off + next->r_len) { | |
334 | /* new range ends in the middle of this block */ | |
335 | next = zfs_range_split(tree, next, off + len); | |
336 | next->r_cnt++; | |
337 | return; | |
338 | } | |
339 | ASSERT3U(off + len, >, next->r_off + next->r_len); | |
340 | next = zfs_range_proxify(tree, next); | |
341 | next->r_cnt++; | |
342 | } | |
343 | ||
344 | /* Add the remaining end range. */ | |
345 | zfs_range_new_proxy(tree, prev->r_off + prev->r_len, | |
346 | (off + len) - (prev->r_off + prev->r_len)); | |
347 | } | |
348 | ||
349 | /* | |
350 | * Check if a reader lock can be grabbed, or wait and recheck until available. | |
351 | */ | |
352 | static void | |
353 | zfs_range_lock_reader(znode_t *zp, rl_t *new) | |
354 | { | |
355 | avl_tree_t *tree = &zp->z_range_avl; | |
356 | rl_t *prev, *next; | |
357 | avl_index_t where; | |
358 | uint64_t off = new->r_off; | |
359 | uint64_t len = new->r_len; | |
360 | ||
361 | /* | |
362 | * Look for any writer locks in the range. | |
363 | */ | |
364 | retry: | |
365 | prev = avl_find(tree, new, &where); | |
366 | if (prev == NULL) | |
367 | prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE); | |
368 | ||
369 | /* | |
370 | * Check the previous range for a writer lock overlap. | |
371 | */ | |
372 | if (prev && (off < prev->r_off + prev->r_len)) { | |
373 | if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) { | |
374 | if (!prev->r_read_wanted) { | |
375 | cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL); | |
376 | prev->r_read_wanted = B_TRUE; | |
377 | } | |
378 | cv_wait(&prev->r_rd_cv, &zp->z_range_lock); | |
379 | goto retry; | |
380 | } | |
381 | if (off + len < prev->r_off + prev->r_len) | |
382 | goto got_lock; | |
383 | } | |
384 | ||
385 | /* | |
386 | * Search through the following ranges to see if there's | |
387 | * write lock any overlap. | |
388 | */ | |
389 | if (prev) | |
390 | next = AVL_NEXT(tree, prev); | |
391 | else | |
392 | next = (rl_t *)avl_nearest(tree, where, AVL_AFTER); | |
393 | for (; next; next = AVL_NEXT(tree, next)) { | |
394 | if (off + len <= next->r_off) | |
395 | goto got_lock; | |
396 | if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) { | |
397 | if (!next->r_read_wanted) { | |
398 | cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL); | |
399 | next->r_read_wanted = B_TRUE; | |
400 | } | |
401 | cv_wait(&next->r_rd_cv, &zp->z_range_lock); | |
402 | goto retry; | |
403 | } | |
404 | if (off + len <= next->r_off + next->r_len) | |
405 | goto got_lock; | |
406 | } | |
407 | ||
408 | got_lock: | |
409 | /* | |
410 | * Add the read lock, which may involve splitting existing | |
411 | * locks and bumping ref counts (r_cnt). | |
412 | */ | |
413 | zfs_range_add_reader(tree, new, prev, where); | |
414 | } | |
415 | ||
416 | /* | |
417 | * Lock a range (offset, length) as either shared (RL_READER) | |
418 | * or exclusive (RL_WRITER). Returns the range lock structure | |
419 | * for later unlocking or reduce range (if entire file | |
420 | * previously locked as RL_WRITER). | |
421 | */ | |
422 | rl_t * | |
423 | zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) | |
424 | { | |
425 | rl_t *new; | |
426 | ||
427 | ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); | |
428 | ||
429 | new = kmem_alloc(sizeof (rl_t), KM_SLEEP); | |
430 | new->r_zp = zp; | |
431 | new->r_off = off; | |
d164b209 BB |
432 | if (len + off < off) /* overflow */ |
433 | len = UINT64_MAX - off; | |
34dc7c2f BB |
434 | new->r_len = len; |
435 | new->r_cnt = 1; /* assume it's going to be in the tree */ | |
436 | new->r_type = type; | |
437 | new->r_proxy = B_FALSE; | |
438 | new->r_write_wanted = B_FALSE; | |
439 | new->r_read_wanted = B_FALSE; | |
440 | ||
441 | mutex_enter(&zp->z_range_lock); | |
442 | if (type == RL_READER) { | |
443 | /* | |
444 | * First check for the usual case of no locks | |
445 | */ | |
446 | if (avl_numnodes(&zp->z_range_avl) == 0) | |
447 | avl_add(&zp->z_range_avl, new); | |
448 | else | |
449 | zfs_range_lock_reader(zp, new); | |
450 | } else | |
451 | zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */ | |
452 | mutex_exit(&zp->z_range_lock); | |
453 | return (new); | |
454 | } | |
455 | ||
456 | /* | |
457 | * Unlock a reader lock | |
458 | */ | |
459 | static void | |
460 | zfs_range_unlock_reader(znode_t *zp, rl_t *remove) | |
461 | { | |
462 | avl_tree_t *tree = &zp->z_range_avl; | |
d4ed6673 | 463 | rl_t *rl, *next = NULL; |
34dc7c2f BB |
464 | uint64_t len; |
465 | ||
466 | /* | |
467 | * The common case is when the remove entry is in the tree | |
468 | * (cnt == 1) meaning there's been no other reader locks overlapping | |
469 | * with this one. Otherwise the remove entry will have been | |
470 | * removed from the tree and replaced by proxies (one or | |
471 | * more ranges mapping to the entire range). | |
472 | */ | |
473 | if (remove->r_cnt == 1) { | |
474 | avl_remove(tree, remove); | |
475 | if (remove->r_write_wanted) { | |
476 | cv_broadcast(&remove->r_wr_cv); | |
477 | cv_destroy(&remove->r_wr_cv); | |
478 | } | |
479 | if (remove->r_read_wanted) { | |
480 | cv_broadcast(&remove->r_rd_cv); | |
481 | cv_destroy(&remove->r_rd_cv); | |
482 | } | |
483 | } else { | |
484 | ASSERT3U(remove->r_cnt, ==, 0); | |
485 | ASSERT3U(remove->r_write_wanted, ==, 0); | |
486 | ASSERT3U(remove->r_read_wanted, ==, 0); | |
487 | /* | |
488 | * Find start proxy representing this reader lock, | |
489 | * then decrement ref count on all proxies | |
490 | * that make up this range, freeing them as needed. | |
491 | */ | |
492 | rl = avl_find(tree, remove, NULL); | |
493 | ASSERT(rl); | |
494 | ASSERT(rl->r_cnt); | |
495 | ASSERT(rl->r_type == RL_READER); | |
496 | for (len = remove->r_len; len != 0; rl = next) { | |
497 | len -= rl->r_len; | |
498 | if (len) { | |
499 | next = AVL_NEXT(tree, rl); | |
500 | ASSERT(next); | |
501 | ASSERT(rl->r_off + rl->r_len == next->r_off); | |
502 | ASSERT(next->r_cnt); | |
503 | ASSERT(next->r_type == RL_READER); | |
504 | } | |
505 | rl->r_cnt--; | |
506 | if (rl->r_cnt == 0) { | |
507 | avl_remove(tree, rl); | |
508 | if (rl->r_write_wanted) { | |
509 | cv_broadcast(&rl->r_wr_cv); | |
510 | cv_destroy(&rl->r_wr_cv); | |
511 | } | |
512 | if (rl->r_read_wanted) { | |
513 | cv_broadcast(&rl->r_rd_cv); | |
514 | cv_destroy(&rl->r_rd_cv); | |
515 | } | |
516 | kmem_free(rl, sizeof (rl_t)); | |
517 | } | |
518 | } | |
519 | } | |
520 | kmem_free(remove, sizeof (rl_t)); | |
521 | } | |
522 | ||
523 | /* | |
524 | * Unlock range and destroy range lock structure. | |
525 | */ | |
526 | void | |
527 | zfs_range_unlock(rl_t *rl) | |
528 | { | |
529 | znode_t *zp = rl->r_zp; | |
530 | ||
531 | ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER); | |
532 | ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0); | |
533 | ASSERT(!rl->r_proxy); | |
534 | ||
535 | mutex_enter(&zp->z_range_lock); | |
536 | if (rl->r_type == RL_WRITER) { | |
537 | /* writer locks can't be shared or split */ | |
538 | avl_remove(&zp->z_range_avl, rl); | |
539 | mutex_exit(&zp->z_range_lock); | |
540 | if (rl->r_write_wanted) { | |
541 | cv_broadcast(&rl->r_wr_cv); | |
542 | cv_destroy(&rl->r_wr_cv); | |
543 | } | |
544 | if (rl->r_read_wanted) { | |
545 | cv_broadcast(&rl->r_rd_cv); | |
546 | cv_destroy(&rl->r_rd_cv); | |
547 | } | |
548 | kmem_free(rl, sizeof (rl_t)); | |
549 | } else { | |
550 | /* | |
551 | * lock may be shared, let zfs_range_unlock_reader() | |
552 | * release the lock and free the rl_t | |
553 | */ | |
554 | zfs_range_unlock_reader(zp, rl); | |
555 | mutex_exit(&zp->z_range_lock); | |
556 | } | |
557 | } | |
558 | ||
559 | /* | |
560 | * Reduce range locked as RL_WRITER from whole file to specified range. | |
561 | * Asserts the whole file is exclusivly locked and so there's only one | |
562 | * entry in the tree. | |
563 | */ | |
564 | void | |
565 | zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len) | |
566 | { | |
567 | znode_t *zp = rl->r_zp; | |
568 | ||
569 | /* Ensure there are no other locks */ | |
570 | ASSERT(avl_numnodes(&zp->z_range_avl) == 1); | |
571 | ASSERT(rl->r_off == 0); | |
572 | ASSERT(rl->r_type == RL_WRITER); | |
573 | ASSERT(!rl->r_proxy); | |
574 | ASSERT3U(rl->r_len, ==, UINT64_MAX); | |
575 | ASSERT3U(rl->r_cnt, ==, 1); | |
576 | ||
577 | mutex_enter(&zp->z_range_lock); | |
578 | rl->r_off = off; | |
579 | rl->r_len = len; | |
580 | mutex_exit(&zp->z_range_lock); | |
581 | if (rl->r_write_wanted) | |
582 | cv_broadcast(&rl->r_wr_cv); | |
583 | if (rl->r_read_wanted) | |
584 | cv_broadcast(&rl->r_rd_cv); | |
585 | } | |
586 | ||
587 | /* | |
588 | * AVL comparison function used to order range locks | |
589 | * Locks are ordered on the start offset of the range. | |
590 | */ | |
591 | int | |
592 | zfs_range_compare(const void *arg1, const void *arg2) | |
593 | { | |
594 | const rl_t *rl1 = arg1; | |
595 | const rl_t *rl2 = arg2; | |
596 | ||
597 | if (rl1->r_off > rl2->r_off) | |
598 | return (1); | |
599 | if (rl1->r_off < rl2->r_off) | |
600 | return (-1); | |
601 | return (0); | |
602 | } |