]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* rwsem.c: R/W semaphores: contention handling functions |
2 | * | |
3 | * Written by David Howells (dhowells@redhat.com). | |
4 | * Derived from arch/i386/kernel/semaphore.c | |
5 | */ | |
6 | #include <linux/rwsem.h> | |
7 | #include <linux/sched.h> | |
8 | #include <linux/init.h> | |
9 | #include <linux/module.h> | |
10 | ||
4ea2176d IM |
11 | /* |
12 | * Initialize an rwsem: | |
13 | */ | |
14 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | |
15 | struct lock_class_key *key) | |
16 | { | |
17 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
18 | /* | |
19 | * Make sure we are not reinitializing a held semaphore: | |
20 | */ | |
21 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
4dfbb9d8 | 22 | lockdep_init_map(&sem->dep_map, name, key, 0); |
4ea2176d IM |
23 | #endif |
24 | sem->count = RWSEM_UNLOCKED_VALUE; | |
25 | spin_lock_init(&sem->wait_lock); | |
26 | INIT_LIST_HEAD(&sem->wait_list); | |
27 | } | |
28 | ||
29 | EXPORT_SYMBOL(__init_rwsem); | |
30 | ||
1da177e4 LT |
31 | struct rwsem_waiter { |
32 | struct list_head list; | |
33 | struct task_struct *task; | |
34 | unsigned int flags; | |
35 | #define RWSEM_WAITING_FOR_READ 0x00000001 | |
36 | #define RWSEM_WAITING_FOR_WRITE 0x00000002 | |
37 | }; | |
38 | ||
70bdc6e0 ML |
39 | /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and |
40 | * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held | |
41 | * since the rwsem value was observed. | |
42 | */ | |
43 | #define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ | |
44 | #define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ | |
45 | #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ | |
46 | ||
1da177e4 LT |
47 | /* |
48 | * handle the lock release when processes blocked on it that can now run | |
49 | * - if we come here from up_xxxx(), then: | |
50 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) | |
51 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) | |
345af7bf | 52 | * - there must be someone on the queue |
1da177e4 LT |
53 | * - the spinlock must be held by the caller |
54 | * - woken process blocks are discarded from the list after having task zeroed | |
55 | * - writers are only woken if downgrading is false | |
56 | */ | |
70bdc6e0 ML |
57 | static struct rw_semaphore * |
58 | __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) | |
1da177e4 LT |
59 | { |
60 | struct rwsem_waiter *waiter; | |
61 | struct task_struct *tsk; | |
62 | struct list_head *next; | |
fd41b334 | 63 | signed long oldcount, woken, loop, adjustment; |
1da177e4 | 64 | |
345af7bf ML |
65 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
66 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) | |
67 | goto readers_only; | |
68 | ||
70bdc6e0 | 69 | if (wake_type == RWSEM_WAKE_READ_OWNED) |
345af7bf | 70 | goto out; |
1da177e4 | 71 | |
345af7bf ML |
72 | /* There's a writer at the front of the queue - try to grant it the |
73 | * write lock. However, we only wake this writer if we can transition | |
74 | * the active part of the count from 0 -> 1 | |
1da177e4 | 75 | */ |
fd41b334 ML |
76 | adjustment = RWSEM_ACTIVE_WRITE_BIAS; |
77 | if (waiter->list.next == &sem->wait_list) | |
78 | adjustment -= RWSEM_WAITING_BIAS; | |
79 | ||
345af7bf | 80 | try_again_write: |
fd41b334 | 81 | oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; |
1da177e4 | 82 | if (oldcount & RWSEM_ACTIVE_MASK) |
345af7bf ML |
83 | /* Someone grabbed the sem already */ |
84 | goto undo_write; | |
1da177e4 LT |
85 | |
86 | /* We must be careful not to touch 'waiter' after we set ->task = NULL. | |
87 | * It is an allocated on the waiter's stack and may become invalid at | |
88 | * any time after that point (due to a wakeup from another source). | |
89 | */ | |
90 | list_del(&waiter->list); | |
91 | tsk = waiter->task; | |
d59dd462 | 92 | smp_mb(); |
1da177e4 LT |
93 | waiter->task = NULL; |
94 | wake_up_process(tsk); | |
95 | put_task_struct(tsk); | |
96 | goto out; | |
97 | ||
345af7bf | 98 | readers_only: |
70bdc6e0 ML |
99 | /* If we come here from up_xxxx(), another thread might have reached |
100 | * rwsem_down_failed_common() before we acquired the spinlock and | |
101 | * woken up a waiter, making it now active. We prefer to check for | |
102 | * this first in order to not spend too much time with the spinlock | |
103 | * held if we're not going to be able to wake up readers in the end. | |
104 | * | |
105 | * Note that we do not need to update the rwsem count: any writer | |
106 | * trying to acquire rwsem will run rwsem_down_write_failed() due | |
107 | * to the waiting threads and block trying to acquire the spinlock. | |
108 | * | |
109 | * We use a dummy atomic update in order to acquire the cache line | |
110 | * exclusively since we expect to succeed and run the final rwsem | |
111 | * count adjustment pretty soon. | |
112 | */ | |
113 | if (wake_type == RWSEM_WAKE_ANY && | |
114 | (rwsem_atomic_update(0, sem) & RWSEM_ACTIVE_MASK)) | |
345af7bf | 115 | /* Someone grabbed the sem already */ |
70bdc6e0 | 116 | goto out; |
1da177e4 | 117 | |
345af7bf ML |
118 | /* Grant an infinite number of read locks to the readers at the front |
119 | * of the queue. Note we increment the 'active part' of the count by | |
120 | * the number of readers before waking any processes up. | |
1da177e4 | 121 | */ |
1da177e4 LT |
122 | woken = 0; |
123 | do { | |
124 | woken++; | |
125 | ||
126 | if (waiter->list.next == &sem->wait_list) | |
127 | break; | |
128 | ||
129 | waiter = list_entry(waiter->list.next, | |
130 | struct rwsem_waiter, list); | |
131 | ||
132 | } while (waiter->flags & RWSEM_WAITING_FOR_READ); | |
133 | ||
fd41b334 ML |
134 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS; |
135 | if (waiter->flags & RWSEM_WAITING_FOR_READ) | |
136 | /* hit end of list above */ | |
137 | adjustment -= RWSEM_WAITING_BIAS; | |
1da177e4 | 138 | |
fd41b334 | 139 | rwsem_atomic_add(adjustment, sem); |
1da177e4 LT |
140 | |
141 | next = sem->wait_list.next; | |
fd41b334 | 142 | for (loop = woken; loop > 0; loop--) { |
1da177e4 LT |
143 | waiter = list_entry(next, struct rwsem_waiter, list); |
144 | next = waiter->list.next; | |
145 | tsk = waiter->task; | |
d59dd462 | 146 | smp_mb(); |
1da177e4 LT |
147 | waiter->task = NULL; |
148 | wake_up_process(tsk); | |
149 | put_task_struct(tsk); | |
150 | } | |
151 | ||
152 | sem->wait_list.next = next; | |
153 | next->prev = &sem->wait_list; | |
154 | ||
155 | out: | |
1da177e4 LT |
156 | return sem; |
157 | ||
91af7081 ML |
158 | /* undo the change to the active count, but check for a transition |
159 | * 1->0 */ | |
345af7bf | 160 | undo_write: |
fd41b334 | 161 | if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) |
345af7bf ML |
162 | goto out; |
163 | goto try_again_write; | |
1da177e4 LT |
164 | } |
165 | ||
166 | /* | |
167 | * wait for a lock to be granted | |
168 | */ | |
c7af77b5 | 169 | static struct rw_semaphore __sched * |
1da177e4 LT |
170 | rwsem_down_failed_common(struct rw_semaphore *sem, |
171 | struct rwsem_waiter *waiter, signed long adjustment) | |
172 | { | |
173 | struct task_struct *tsk = current; | |
174 | signed long count; | |
175 | ||
176 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | |
177 | ||
178 | /* set up my own style of waitqueue */ | |
179 | spin_lock_irq(&sem->wait_lock); | |
180 | waiter->task = tsk; | |
181 | get_task_struct(tsk); | |
182 | ||
fd41b334 ML |
183 | if (list_empty(&sem->wait_list)) |
184 | adjustment += RWSEM_WAITING_BIAS; | |
1da177e4 LT |
185 | list_add_tail(&waiter->list, &sem->wait_list); |
186 | ||
70bdc6e0 | 187 | /* we're now waiting on the lock, but no longer actively locking */ |
1da177e4 LT |
188 | count = rwsem_atomic_update(adjustment, sem); |
189 | ||
190 | /* if there are no active locks, wake the front queued process(es) up */ | |
191 | if (!(count & RWSEM_ACTIVE_MASK)) | |
70bdc6e0 | 192 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); |
1da177e4 LT |
193 | |
194 | spin_unlock_irq(&sem->wait_lock); | |
195 | ||
196 | /* wait to be given the lock */ | |
197 | for (;;) { | |
198 | if (!waiter->task) | |
199 | break; | |
200 | schedule(); | |
201 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | |
202 | } | |
203 | ||
204 | tsk->state = TASK_RUNNING; | |
205 | ||
206 | return sem; | |
207 | } | |
208 | ||
209 | /* | |
210 | * wait for the read lock to be granted | |
211 | */ | |
d50efc6c | 212 | asmregparm struct rw_semaphore __sched * |
1da177e4 LT |
213 | rwsem_down_read_failed(struct rw_semaphore *sem) |
214 | { | |
215 | struct rwsem_waiter waiter; | |
216 | ||
1da177e4 | 217 | waiter.flags = RWSEM_WAITING_FOR_READ; |
fd41b334 | 218 | rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_READ_BIAS); |
1da177e4 LT |
219 | return sem; |
220 | } | |
221 | ||
222 | /* | |
223 | * wait for the write lock to be granted | |
224 | */ | |
d50efc6c | 225 | asmregparm struct rw_semaphore __sched * |
1da177e4 LT |
226 | rwsem_down_write_failed(struct rw_semaphore *sem) |
227 | { | |
228 | struct rwsem_waiter waiter; | |
229 | ||
1da177e4 | 230 | waiter.flags = RWSEM_WAITING_FOR_WRITE; |
fd41b334 | 231 | rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_WRITE_BIAS); |
1da177e4 | 232 | |
1da177e4 LT |
233 | return sem; |
234 | } | |
235 | ||
236 | /* | |
237 | * handle waking up a waiter on the semaphore | |
238 | * - up_read/up_write has decremented the active part of count if we come here | |
239 | */ | |
d50efc6c | 240 | asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
1da177e4 LT |
241 | { |
242 | unsigned long flags; | |
243 | ||
1da177e4 LT |
244 | spin_lock_irqsave(&sem->wait_lock, flags); |
245 | ||
246 | /* do nothing if list empty */ | |
247 | if (!list_empty(&sem->wait_list)) | |
70bdc6e0 | 248 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); |
1da177e4 LT |
249 | |
250 | spin_unlock_irqrestore(&sem->wait_lock, flags); | |
251 | ||
1da177e4 LT |
252 | return sem; |
253 | } | |
254 | ||
255 | /* | |
256 | * downgrade a write lock into a read lock | |
257 | * - caller incremented waiting part of count and discovered it still negative | |
258 | * - just wake up any readers at the front of the queue | |
259 | */ | |
d50efc6c | 260 | asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
1da177e4 LT |
261 | { |
262 | unsigned long flags; | |
263 | ||
1da177e4 LT |
264 | spin_lock_irqsave(&sem->wait_lock, flags); |
265 | ||
266 | /* do nothing if list empty */ | |
267 | if (!list_empty(&sem->wait_list)) | |
70bdc6e0 | 268 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); |
1da177e4 LT |
269 | |
270 | spin_unlock_irqrestore(&sem->wait_lock, flags); | |
271 | ||
1da177e4 LT |
272 | return sem; |
273 | } | |
274 | ||
275 | EXPORT_SYMBOL(rwsem_down_read_failed); | |
276 | EXPORT_SYMBOL(rwsem_down_write_failed); | |
277 | EXPORT_SYMBOL(rwsem_wake); | |
278 | EXPORT_SYMBOL(rwsem_downgrade_wake); |