]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - kernel/locking/rwsem.h
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[mirror_ubuntu-jammy-kernel.git] / kernel / locking / rwsem.h
CommitLineData
b2441318 1/* SPDX-License-Identifier: GPL-2.0 */
19c5d690 2/*
925b9cd1
WL
3 * The least significant 2 bits of the owner value has the following
4 * meanings when set.
5 * - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
6 * - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
7 * i.e. the owner(s) cannot be readily determined. It can be reader
8 * owned or the owning writer is indeterminate.
19c5d690 9 *
925b9cd1
WL
10 * When a writer acquires a rwsem, it puts its task_struct pointer
11 * into the owner field. It is cleared after an unlock.
12 *
13 * When a reader acquires a rwsem, it will also puts its task_struct
14 * pointer into the owner field with both the RWSEM_READER_OWNED and
15 * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will
16 * largely be left untouched. So for a free or reader-owned rwsem,
17 * the owner value may contain information about the last reader that
18 * acquires the rwsem. The anonymous bit is set because that particular
19 * reader may or may not still own the lock.
20 *
21 * That information may be helpful in debugging cases where the system
22 * seems to hang on a reader owned rwsem especially if only one reader
23 * is involved. Ideally we would like to track all the readers that own
24 * a rwsem, but the overhead is simply too big.
19c5d690 25 */
a8654596
WL
26#include "lock_events.h"
27
925b9cd1
WL
28#define RWSEM_READER_OWNED (1UL << 0)
29#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
19c5d690 30
5149cbac 31#ifdef CONFIG_DEBUG_RWSEMS
3b4ba664 32# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
26536e7c
WL
33 if (!debug_locks_silent && \
34 WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
3b4ba664
WL
35 #c, atomic_long_read(&(sem)->count), \
36 (long)((sem)->owner), (long)current, \
37 list_empty(&(sem)->wait_list) ? "" : "not ")) \
38 debug_locks_off(); \
39 } while (0)
5149cbac 40#else
3b4ba664 41# define DEBUG_RWSEMS_WARN_ON(c, sem)
5149cbac
WL
42#endif
43
46ad0840
WL
44/*
45 * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
46 * Adapted largely from include/asm-i386/rwsem.h
47 * by Paul Mackerras <paulus@samba.org>.
48 */
49
50/*
51 * the semaphore definition
52 */
53#ifdef CONFIG_64BIT
54# define RWSEM_ACTIVE_MASK 0xffffffffL
55#else
56# define RWSEM_ACTIVE_MASK 0x0000ffffL
57#endif
58
59#define RWSEM_ACTIVE_BIAS 0x00000001L
60#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
61#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
62#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
63
7a215f89 64#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
fb6a44f3
WL
65/*
66 * All writes to owner are protected by WRITE_ONCE() to make sure that
67 * store tearing can't happen as optimistic spinners may read and use
68 * the owner value concurrently without lock. Read from owner, however,
69 * may not need READ_ONCE() as long as the pointer value is only used
70 * for comparison and isn't being dereferenced.
71 */
7a215f89
DB
72static inline void rwsem_set_owner(struct rw_semaphore *sem)
73{
fb6a44f3 74 WRITE_ONCE(sem->owner, current);
7a215f89
DB
75}
76
77static inline void rwsem_clear_owner(struct rw_semaphore *sem)
78{
fb6a44f3 79 WRITE_ONCE(sem->owner, NULL);
7a215f89
DB
80}
81
925b9cd1
WL
82/*
83 * The task_struct pointer of the last owning reader will be left in
84 * the owner field.
85 *
86 * Note that the owner value just indicates the task has owned the rwsem
87 * previously, it may not be the real owner or one of the real owners
88 * anymore when that field is examined, so take it with a grain of salt.
89 */
90static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
91 struct task_struct *owner)
92{
93 unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
94 | RWSEM_ANONYMOUSLY_OWNED;
95
96 WRITE_ONCE(sem->owner, (struct task_struct *)val);
97}
98
19c5d690
WL
99static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
100{
925b9cd1 101 __rwsem_set_reader_owned(sem, current);
19c5d690
WL
102}
103
d7d760ef
WL
104/*
105 * Return true if the a rwsem waiter can spin on the rwsem's owner
106 * and steal the lock, i.e. the lock is not anonymously owned.
107 * N.B. !owner is considered spinnable.
108 */
109static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
19c5d690 110{
d7d760ef 111 return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
19c5d690
WL
112}
113
d7d760ef
WL
114/*
115 * Return true if rwsem is owned by an anonymous writer or readers.
116 */
117static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
19c5d690 118{
d7d760ef 119 return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
19c5d690 120}
925b9cd1
WL
121
122#ifdef CONFIG_DEBUG_RWSEMS
123/*
124 * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
125 * is a task pointer in owner of a reader-owned rwsem, it will be the
126 * real owner or one of the real owners. The only exception is when the
127 * unlock is done by up_read_non_owner().
128 */
129#define rwsem_clear_reader_owned rwsem_clear_reader_owned
130static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
131{
132 unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
133 | RWSEM_ANONYMOUSLY_OWNED;
134 if (READ_ONCE(sem->owner) == (struct task_struct *)val)
135 cmpxchg_relaxed((unsigned long *)&sem->owner, val,
136 RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
137}
138#endif
139
7a215f89
DB
140#else
141static inline void rwsem_set_owner(struct rw_semaphore *sem)
142{
143}
144
145static inline void rwsem_clear_owner(struct rw_semaphore *sem)
146{
147}
19c5d690 148
925b9cd1
WL
149static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
150 struct task_struct *owner)
151{
152}
153
19c5d690
WL
154static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
155{
156}
7a215f89 157#endif
925b9cd1
WL
158
159#ifndef rwsem_clear_reader_owned
160static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
161{
162}
163#endif
46ad0840 164
12a30a7f
WL
165extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
166extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
167extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
168extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
169extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
170extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
171
46ad0840
WL
172/*
173 * lock for reading
174 */
175static inline void __down_read(struct rw_semaphore *sem)
176{
a68e2c4c 177 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
46ad0840 178 rwsem_down_read_failed(sem);
a68e2c4c 179 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
3b4ba664 180 RWSEM_READER_OWNED), sem);
a68e2c4c 181 } else {
c7580c1e 182 rwsem_set_reader_owned(sem);
a68e2c4c 183 }
46ad0840
WL
184}
185
186static inline int __down_read_killable(struct rw_semaphore *sem)
187{
188 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
189 if (IS_ERR(rwsem_down_read_failed_killable(sem)))
190 return -EINTR;
a68e2c4c 191 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
3b4ba664 192 RWSEM_READER_OWNED), sem);
c7580c1e
WL
193 } else {
194 rwsem_set_reader_owned(sem);
46ad0840 195 }
46ad0840
WL
196 return 0;
197}
198
199static inline int __down_read_trylock(struct rw_semaphore *sem)
200{
ddb20d1d
WL
201 /*
202 * Optimize for the case when the rwsem is not locked at all.
203 */
204 long tmp = RWSEM_UNLOCKED_VALUE;
205
a8654596 206 lockevent_inc(rwsem_rtrylock);
ddb20d1d
WL
207 do {
208 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
209 tmp + RWSEM_ACTIVE_READ_BIAS)) {
c7580c1e 210 rwsem_set_reader_owned(sem);
46ad0840
WL
211 return 1;
212 }
ddb20d1d 213 } while (tmp >= 0);
46ad0840
WL
214 return 0;
215}
216
217/*
218 * lock for writing
219 */
220static inline void __down_write(struct rw_semaphore *sem)
221{
222 long tmp;
223
224 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
225 &sem->count);
226 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
227 rwsem_down_write_failed(sem);
c7580c1e 228 rwsem_set_owner(sem);
46ad0840
WL
229}
230
231static inline int __down_write_killable(struct rw_semaphore *sem)
232{
233 long tmp;
234
235 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
236 &sem->count);
237 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
238 if (IS_ERR(rwsem_down_write_failed_killable(sem)))
239 return -EINTR;
c7580c1e 240 rwsem_set_owner(sem);
46ad0840
WL
241 return 0;
242}
243
244static inline int __down_write_trylock(struct rw_semaphore *sem)
245{
246 long tmp;
247
a8654596 248 lockevent_inc(rwsem_wtrylock);
46ad0840
WL
249 tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
250 RWSEM_ACTIVE_WRITE_BIAS);
c7580c1e
WL
251 if (tmp == RWSEM_UNLOCKED_VALUE) {
252 rwsem_set_owner(sem);
253 return true;
254 }
255 return false;
46ad0840
WL
256}
257
258/*
259 * unlock after reading
260 */
261static inline void __up_read(struct rw_semaphore *sem)
262{
263 long tmp;
264
3b4ba664
WL
265 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
266 sem);
c7580c1e 267 rwsem_clear_reader_owned(sem);
46ad0840
WL
268 tmp = atomic_long_dec_return_release(&sem->count);
269 if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
270 rwsem_wake(sem);
271}
272
273/*
274 * unlock after writing
275 */
276static inline void __up_write(struct rw_semaphore *sem)
277{
3b4ba664 278 DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
c7580c1e 279 rwsem_clear_owner(sem);
46ad0840
WL
280 if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
281 &sem->count) < 0))
282 rwsem_wake(sem);
283}
284
285/*
286 * downgrade write lock to read lock
287 */
288static inline void __downgrade_write(struct rw_semaphore *sem)
289{
290 long tmp;
291
292 /*
293 * When downgrading from exclusive to shared ownership,
294 * anything inside the write-locked region cannot leak
295 * into the read side. In contrast, anything in the
296 * read-locked region is ok to be re-ordered into the
297 * write side. As such, rely on RELEASE semantics.
298 */
3b4ba664 299 DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
46ad0840 300 tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
c7580c1e 301 rwsem_set_reader_owned(sem);
46ad0840
WL
302 if (tmp < 0)
303 rwsem_downgrade_wake(sem);
304}