1 /* AFS file locking support
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
14 #define AFS_LOCK_GRANTED 0
15 #define AFS_LOCK_PENDING 1
16 #define AFS_LOCK_YOUR_TRY 2
18 struct workqueue_struct
*afs_lock_manager
;
20 static void afs_next_locker(struct afs_vnode
*vnode
, int error
);
21 static void afs_fl_copy_lock(struct file_lock
*new, struct file_lock
*fl
);
22 static void afs_fl_release_private(struct file_lock
*fl
);
24 static const struct file_lock_operations afs_lock_ops
= {
25 .fl_copy_lock
= afs_fl_copy_lock
,
26 .fl_release_private
= afs_fl_release_private
,
29 static inline void afs_set_lock_state(struct afs_vnode
*vnode
, enum afs_lock_state state
)
31 _debug("STATE %u -> %u", vnode
->lock_state
, state
);
32 vnode
->lock_state
= state
;
36 * if the callback is broken on this vnode, then the lock may now be available
38 void afs_lock_may_be_available(struct afs_vnode
*vnode
)
40 _enter("{%x:%u}", vnode
->fid
.vid
, vnode
->fid
.vnode
);
42 spin_lock(&vnode
->lock
);
43 if (vnode
->lock_state
== AFS_VNODE_LOCK_WAITING_FOR_CB
)
44 afs_next_locker(vnode
, 0);
45 spin_unlock(&vnode
->lock
);
49 * the lock will time out in 5 minutes unless we extend it, so schedule
50 * extension in a bit less than that time
52 static void __maybe_unused
afs_schedule_lock_extension(struct afs_vnode
*vnode
)
54 queue_delayed_work(afs_lock_manager
, &vnode
->lock_work
,
55 AFS_LOCKWAIT
* HZ
/ 2);
59 * grant one or more locks (readlocks are allowed to jump the queue if the
60 * first lock in the queue is itself a readlock)
61 * - the caller must hold the vnode lock
63 static void afs_grant_locks(struct afs_vnode
*vnode
)
65 struct file_lock
*p
, *_p
;
66 bool exclusive
= (vnode
->lock_type
== AFS_LOCK_WRITE
);
68 list_for_each_entry_safe(p
, _p
, &vnode
->pending_locks
, fl_u
.afs
.link
) {
69 if (!exclusive
&& p
->fl_type
== F_WRLCK
)
72 list_move_tail(&p
->fl_u
.afs
.link
, &vnode
->granted_locks
);
73 p
->fl_u
.afs
.state
= AFS_LOCK_GRANTED
;
79 * If an error is specified, reject every pending lock that matches the
80 * authentication and type of the lock we failed to get. If there are any
81 * remaining lockers, try to wake up one of them to have a go.
83 static void afs_next_locker(struct afs_vnode
*vnode
, int error
)
85 struct file_lock
*p
, *_p
, *next
= NULL
;
86 struct key
*key
= vnode
->lock_key
;
87 unsigned int fl_type
= F_RDLCK
;
91 if (vnode
->lock_type
== AFS_LOCK_WRITE
)
94 list_for_each_entry_safe(p
, _p
, &vnode
->pending_locks
, fl_u
.afs
.link
) {
96 p
->fl_type
== fl_type
&&
97 afs_file_key(p
->fl_file
) == key
) {
98 list_del_init(&p
->fl_u
.afs
.link
);
99 p
->fl_u
.afs
.state
= error
;
100 wake_up(&p
->fl_wait
);
103 /* Select the next locker to hand off to. */
105 (next
->fl_type
== F_WRLCK
|| p
->fl_type
== F_RDLCK
))
110 vnode
->lock_key
= NULL
;
114 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_SETTING
);
115 next
->fl_u
.afs
.state
= AFS_LOCK_YOUR_TRY
;
116 wake_up(&next
->fl_wait
);
118 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_NONE
);
125 * Get a lock on a file
127 static int afs_set_lock(struct afs_vnode
*vnode
, struct key
*key
,
128 afs_lock_type_t type
)
130 struct afs_fs_cursor fc
;
133 _enter("%s{%x:%u.%u},%x,%u",
138 key_serial(key
), type
);
141 if (afs_begin_vnode_operation(&fc
, vnode
, key
)) {
142 while (afs_select_fileserver(&fc
)) {
143 fc
.cb_break
= vnode
->cb_break
+ vnode
->cb_s_break
;
144 afs_fs_set_lock(&fc
, type
);
147 afs_check_for_remote_deletion(&fc
, fc
.vnode
);
148 afs_vnode_commit_status(&fc
, vnode
, fc
.cb_break
);
149 ret
= afs_end_vnode_operation(&fc
);
152 _leave(" = %d", ret
);
157 * Extend a lock on a file
159 static int afs_extend_lock(struct afs_vnode
*vnode
, struct key
*key
)
161 struct afs_fs_cursor fc
;
164 _enter("%s{%x:%u.%u},%x",
172 if (afs_begin_vnode_operation(&fc
, vnode
, key
)) {
173 while (afs_select_current_fileserver(&fc
)) {
174 fc
.cb_break
= vnode
->cb_break
+ vnode
->cb_s_break
;
175 afs_fs_extend_lock(&fc
);
178 afs_check_for_remote_deletion(&fc
, fc
.vnode
);
179 afs_vnode_commit_status(&fc
, vnode
, fc
.cb_break
);
180 ret
= afs_end_vnode_operation(&fc
);
183 _leave(" = %d", ret
);
188 * Release a lock on a file
190 static int afs_release_lock(struct afs_vnode
*vnode
, struct key
*key
)
192 struct afs_fs_cursor fc
;
195 _enter("%s{%x:%u.%u},%x",
203 if (afs_begin_vnode_operation(&fc
, vnode
, key
)) {
204 while (afs_select_current_fileserver(&fc
)) {
205 fc
.cb_break
= vnode
->cb_break
+ vnode
->cb_s_break
;
206 afs_fs_release_lock(&fc
);
209 afs_check_for_remote_deletion(&fc
, fc
.vnode
);
210 afs_vnode_commit_status(&fc
, vnode
, fc
.cb_break
);
211 ret
= afs_end_vnode_operation(&fc
);
214 _leave(" = %d", ret
);
219 * do work for a lock, including:
220 * - probing for a lock we're waiting on but didn't get immediately
221 * - extending a lock that's close to timing out
223 void afs_lock_work(struct work_struct
*work
)
225 struct afs_vnode
*vnode
=
226 container_of(work
, struct afs_vnode
, lock_work
.work
);
230 _enter("{%x:%u}", vnode
->fid
.vid
, vnode
->fid
.vnode
);
232 spin_lock(&vnode
->lock
);
235 _debug("wstate %u for %p", vnode
->lock_state
, vnode
);
236 switch (vnode
->lock_state
) {
237 case AFS_VNODE_LOCK_NEED_UNLOCK
:
239 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_UNLOCKING
);
240 spin_unlock(&vnode
->lock
);
242 /* attempt to release the server lock; if it fails, we just
243 * wait 5 minutes and it'll expire anyway */
244 ret
= afs_release_lock(vnode
, vnode
->lock_key
);
246 printk(KERN_WARNING
"AFS:"
247 " Failed to release lock on {%x:%x} error %d\n",
248 vnode
->fid
.vid
, vnode
->fid
.vnode
, ret
);
250 spin_lock(&vnode
->lock
);
251 afs_next_locker(vnode
, 0);
252 spin_unlock(&vnode
->lock
);
255 /* If we've already got a lock, then it must be time to extend that
256 * lock as AFS locks time out after 5 minutes.
258 case AFS_VNODE_LOCK_GRANTED
:
261 ASSERT(!list_empty(&vnode
->granted_locks
));
263 key
= key_get(vnode
->lock_key
);
264 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_EXTENDING
);
265 spin_unlock(&vnode
->lock
);
267 ret
= afs_extend_lock(vnode
, key
); /* RPC */
271 pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
272 vnode
->fid
.vid
, vnode
->fid
.vnode
, ret
);
274 spin_lock(&vnode
->lock
);
276 if (vnode
->lock_state
!= AFS_VNODE_LOCK_EXTENDING
)
278 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_GRANTED
);
281 queue_delayed_work(afs_lock_manager
, &vnode
->lock_work
,
283 spin_unlock(&vnode
->lock
);
287 /* If we're waiting for a callback to indicate lock release, we can't
288 * actually rely on this, so need to recheck at regular intervals. The
289 * problem is that the server might not notify us if the lock just
290 * expires (say because a client died) rather than being explicitly
293 case AFS_VNODE_LOCK_WAITING_FOR_CB
:
295 afs_next_locker(vnode
, 0);
296 spin_unlock(&vnode
->lock
);
300 /* Looks like a lock request was withdrawn. */
301 spin_unlock(&vnode
->lock
);
308 * pass responsibility for the unlocking of a vnode on the server to the
309 * manager thread, lest a pending signal in the calling thread interrupt
311 * - the caller must hold the vnode lock
313 static void afs_defer_unlock(struct afs_vnode
*vnode
)
315 _enter("%u", vnode
->lock_state
);
317 if (list_empty(&vnode
->granted_locks
) &&
318 (vnode
->lock_state
== AFS_VNODE_LOCK_GRANTED
||
319 vnode
->lock_state
== AFS_VNODE_LOCK_EXTENDING
)) {
320 cancel_delayed_work(&vnode
->lock_work
);
322 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_NEED_UNLOCK
);
323 queue_delayed_work(afs_lock_manager
, &vnode
->lock_work
, 0);
328 * Check that our view of the file metadata is up to date and check to see
329 * whether we think that we have a locking permit.
331 static int afs_do_setlk_check(struct afs_vnode
*vnode
, struct key
*key
,
332 afs_lock_type_t type
, bool can_sleep
)
337 /* Make sure we've got a callback on this file and that our view of the
338 * data version is up to date.
340 ret
= afs_validate(vnode
, key
);
344 /* Check the permission set to see if we're actually going to be
345 * allowed to get a lock on this file.
347 ret
= afs_check_permit(vnode
, key
, &access
);
351 /* At a rough estimation, you need LOCK, WRITE or INSERT perm to
352 * read-lock a file and WRITE or INSERT perm to write-lock a file.
354 * We can't rely on the server to do this for us since if we want to
355 * share a read lock that we already have, we won't go the server.
357 if (type
== AFS_LOCK_READ
) {
358 if (!(access
& (AFS_ACE_INSERT
| AFS_ACE_WRITE
| AFS_ACE_LOCK
)))
360 if (vnode
->status
.lock_count
== -1 && !can_sleep
)
361 return -EAGAIN
; /* Write locked */
363 if (!(access
& (AFS_ACE_INSERT
| AFS_ACE_WRITE
)))
365 if (vnode
->status
.lock_count
!= 0 && !can_sleep
)
366 return -EAGAIN
; /* Locked */
373 * request a lock on a file on the server
375 static int afs_do_setlk(struct file
*file
, struct file_lock
*fl
)
377 struct inode
*inode
= locks_inode(file
);
378 struct afs_vnode
*vnode
= AFS_FS_I(inode
);
379 afs_lock_type_t type
;
380 struct key
*key
= afs_file_key(file
);
383 _enter("{%x:%u},%u", vnode
->fid
.vid
, vnode
->fid
.vnode
, fl
->fl_type
);
385 fl
->fl_ops
= &afs_lock_ops
;
386 INIT_LIST_HEAD(&fl
->fl_u
.afs
.link
);
387 fl
->fl_u
.afs
.state
= AFS_LOCK_PENDING
;
389 type
= (fl
->fl_type
== F_RDLCK
) ? AFS_LOCK_READ
: AFS_LOCK_WRITE
;
391 ret
= afs_do_setlk_check(vnode
, key
, type
, fl
->fl_flags
& FL_SLEEP
);
395 spin_lock(&vnode
->lock
);
396 list_add_tail(&fl
->fl_u
.afs
.link
, &vnode
->pending_locks
);
398 /* If we've already got a lock on the server then try to move to having
399 * the VFS grant the requested lock. Note that this means that other
400 * clients may get starved out.
402 _debug("try %u", vnode
->lock_state
);
403 if (vnode
->lock_state
== AFS_VNODE_LOCK_GRANTED
) {
404 if (type
== AFS_LOCK_READ
) {
405 _debug("instant readlock");
406 list_move_tail(&fl
->fl_u
.afs
.link
, &vnode
->granted_locks
);
407 fl
->fl_u
.afs
.state
= AFS_LOCK_GRANTED
;
408 goto vnode_is_locked_u
;
411 if (vnode
->lock_type
== AFS_LOCK_WRITE
) {
412 _debug("instant writelock");
413 list_move_tail(&fl
->fl_u
.afs
.link
, &vnode
->granted_locks
);
414 fl
->fl_u
.afs
.state
= AFS_LOCK_GRANTED
;
415 goto vnode_is_locked_u
;
419 if (vnode
->lock_state
!= AFS_VNODE_LOCK_NONE
)
423 /* We don't have a lock on this vnode and we aren't currently waiting
424 * for one either, so ask the server for a lock.
426 * Note that we need to be careful if we get interrupted by a signal
427 * after dispatching the request as we may still get the lock, even
428 * though we don't wait for the reply (it's not too bad a problem - the
429 * lock will expire in 5 mins anyway).
431 _debug("not locked");
432 vnode
->lock_key
= key_get(key
);
433 vnode
->lock_type
= type
;
434 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_SETTING
);
435 spin_unlock(&vnode
->lock
);
437 ret
= afs_set_lock(vnode
, key
, type
); /* RPC */
439 spin_lock(&vnode
->lock
);
446 fl
->fl_u
.afs
.state
= ret
;
447 list_del_init(&fl
->fl_u
.afs
.link
);
448 afs_next_locker(vnode
, ret
);
452 fl
->fl_u
.afs
.state
= ret
;
453 list_del_init(&fl
->fl_u
.afs
.link
);
454 afs_next_locker(vnode
, 0);
458 /* The server doesn't have a lock-waiting queue, so the client
459 * will have to retry. The server will break the outstanding
460 * callbacks on a file when a lock is released.
462 _debug("would block");
463 ASSERT(list_empty(&vnode
->granted_locks
));
464 ASSERTCMP(vnode
->pending_locks
.next
, ==, &fl
->fl_u
.afs
.link
);
465 goto lock_is_contended
;
469 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_GRANTED
);
470 afs_grant_locks(vnode
);
471 goto vnode_is_locked_u
;
475 spin_unlock(&vnode
->lock
);
477 /* the lock has been granted by the server... */
478 ASSERTCMP(fl
->fl_u
.afs
.state
, ==, AFS_LOCK_GRANTED
);
480 /* ... but the VFS still needs to distribute access on this client. */
481 ret
= locks_lock_file_wait(file
, fl
);
483 goto vfs_rejected_lock
;
485 /* Again, make sure we've got a callback on this file and, again, make
486 * sure that our view of the data version is up to date (we ignore
487 * errors incurred here and deal with the consequences elsewhere).
489 afs_validate(vnode
, key
);
494 if (!(fl
->fl_flags
& FL_SLEEP
)) {
495 list_del_init(&fl
->fl_u
.afs
.link
);
496 afs_next_locker(vnode
, 0);
501 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_WAITING_FOR_CB
);
502 queue_delayed_work(afs_lock_manager
, &vnode
->lock_work
, HZ
* 5);
505 /* We're going to have to wait. Either this client doesn't have a lock
506 * on the server yet and we need to wait for a callback to occur, or
507 * the client does have a lock on the server, but it's shared and we
508 * need an exclusive lock.
510 spin_unlock(&vnode
->lock
);
513 ret
= wait_event_interruptible(fl
->fl_wait
,
514 fl
->fl_u
.afs
.state
!= AFS_LOCK_PENDING
);
515 _debug("wait = %d", ret
);
517 if (fl
->fl_u
.afs
.state
>= 0 && fl
->fl_u
.afs
.state
!= AFS_LOCK_GRANTED
) {
518 spin_lock(&vnode
->lock
);
520 switch (fl
->fl_u
.afs
.state
) {
521 case AFS_LOCK_YOUR_TRY
:
522 fl
->fl_u
.afs
.state
= AFS_LOCK_PENDING
;
524 case AFS_LOCK_PENDING
:
526 /* We need to retry the lock. We may not be
527 * notified by the server if it just expired
528 * rather than being released.
530 ASSERTCMP(vnode
->lock_state
, ==, AFS_VNODE_LOCK_WAITING_FOR_CB
);
531 afs_set_lock_state(vnode
, AFS_VNODE_LOCK_SETTING
);
532 fl
->fl_u
.afs
.state
= AFS_LOCK_PENDING
;
536 case AFS_LOCK_GRANTED
:
541 spin_unlock(&vnode
->lock
);
544 if (fl
->fl_u
.afs
.state
== AFS_LOCK_GRANTED
)
545 goto vnode_is_locked
;
546 ret
= fl
->fl_u
.afs
.state
;
550 /* The VFS rejected the lock we just obtained, so we have to discard
551 * what we just got. We defer this to the lock manager work item to
554 _debug("vfs refused %d", ret
);
555 spin_lock(&vnode
->lock
);
556 list_del_init(&fl
->fl_u
.afs
.link
);
557 afs_defer_unlock(vnode
);
560 spin_unlock(&vnode
->lock
);
562 _leave(" = %d", ret
);
567 * unlock on a file on the server
569 static int afs_do_unlk(struct file
*file
, struct file_lock
*fl
)
571 struct afs_vnode
*vnode
= AFS_FS_I(locks_inode(file
));
574 _enter("{%x:%u},%u", vnode
->fid
.vid
, vnode
->fid
.vnode
, fl
->fl_type
);
576 /* Flush all pending writes before doing anything with locks. */
579 ret
= locks_lock_file_wait(file
, fl
);
580 _leave(" = %d [%u]", ret
, vnode
->lock_state
);
585 * return information about a lock we currently hold, if indeed we hold one
587 static int afs_do_getlk(struct file
*file
, struct file_lock
*fl
)
589 struct afs_vnode
*vnode
= AFS_FS_I(locks_inode(file
));
590 struct key
*key
= afs_file_key(file
);
595 fl
->fl_type
= F_UNLCK
;
597 /* check local lock records first */
598 posix_test_lock(file
, fl
);
599 if (fl
->fl_type
== F_UNLCK
) {
600 /* no local locks; consult the server */
601 ret
= afs_fetch_status(vnode
, key
);
605 lock_count
= READ_ONCE(vnode
->status
.lock_count
);
606 if (lock_count
!= 0) {
608 fl
->fl_type
= F_RDLCK
;
610 fl
->fl_type
= F_WRLCK
;
612 fl
->fl_end
= OFFSET_MAX
;
619 _leave(" = %d [%hd]", ret
, fl
->fl_type
);
624 * manage POSIX locks on a file
626 int afs_lock(struct file
*file
, int cmd
, struct file_lock
*fl
)
628 struct afs_vnode
*vnode
= AFS_FS_I(locks_inode(file
));
630 _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
631 vnode
->fid
.vid
, vnode
->fid
.vnode
, cmd
,
632 fl
->fl_type
, fl
->fl_flags
,
633 (long long) fl
->fl_start
, (long long) fl
->fl_end
);
635 /* AFS doesn't support mandatory locks */
636 if (__mandatory_lock(&vnode
->vfs_inode
) && fl
->fl_type
!= F_UNLCK
)
640 return afs_do_getlk(file
, fl
);
641 if (fl
->fl_type
== F_UNLCK
)
642 return afs_do_unlk(file
, fl
);
643 return afs_do_setlk(file
, fl
);
647 * manage FLOCK locks on a file
649 int afs_flock(struct file
*file
, int cmd
, struct file_lock
*fl
)
651 struct afs_vnode
*vnode
= AFS_FS_I(locks_inode(file
));
653 _enter("{%x:%u},%d,{t=%x,fl=%x}",
654 vnode
->fid
.vid
, vnode
->fid
.vnode
, cmd
,
655 fl
->fl_type
, fl
->fl_flags
);
658 * No BSD flocks over NFS allowed.
659 * Note: we could try to fake a POSIX lock request here by
660 * using ((u32) filp | 0x80000000) or some such as the pid.
661 * Not sure whether that would be unique, though, or whether
662 * that would break in other places.
664 if (!(fl
->fl_flags
& FL_FLOCK
))
667 /* we're simulating flock() locks using posix locks on the server */
668 if (fl
->fl_type
== F_UNLCK
)
669 return afs_do_unlk(file
, fl
);
670 return afs_do_setlk(file
, fl
);
674 * the POSIX lock management core VFS code copies the lock record and adds the
675 * copy into its own list, so we need to add that copy to the vnode's lock
676 * queue in the same place as the original (which will be deleted shortly
679 static void afs_fl_copy_lock(struct file_lock
*new, struct file_lock
*fl
)
681 struct afs_vnode
*vnode
= AFS_FS_I(locks_inode(fl
->fl_file
));
685 spin_lock(&vnode
->lock
);
686 list_add(&new->fl_u
.afs
.link
, &fl
->fl_u
.afs
.link
);
687 spin_unlock(&vnode
->lock
);
691 * need to remove this lock from the vnode queue when it's removed from the
694 static void afs_fl_release_private(struct file_lock
*fl
)
696 struct afs_vnode
*vnode
= AFS_FS_I(locks_inode(fl
->fl_file
));
700 spin_lock(&vnode
->lock
);
702 list_del_init(&fl
->fl_u
.afs
.link
);
703 if (list_empty(&vnode
->granted_locks
))
704 afs_defer_unlock(vnode
);
706 _debug("state %u for %p", vnode
->lock_state
, vnode
);
707 spin_unlock(&vnode
->lock
);