]> git.proxmox.com Git - mirror_zfs.git/blob - module/spl/spl-condvar.c
kernel timer API rework
[mirror_zfs.git] / module / spl / spl-condvar.c
1 /*
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6 * UCRL-CODE-235197
7 *
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://zfsonlinux.org/>.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *
24 * Solaris Porting Layer (SPL) Credential Implementation.
25 */
26
27 #include <sys/condvar.h>
28 #include <sys/time.h>
29 #include <linux/hrtimer.h>
30 #include <linux/compiler_compat.h>
31
32 void
33 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
34 {
35 ASSERT(cvp);
36 ASSERT(name == NULL);
37 ASSERT(type == CV_DEFAULT);
38 ASSERT(arg == NULL);
39
40 cvp->cv_magic = CV_MAGIC;
41 init_waitqueue_head(&cvp->cv_event);
42 init_waitqueue_head(&cvp->cv_destroy);
43 atomic_set(&cvp->cv_waiters, 0);
44 atomic_set(&cvp->cv_refs, 1);
45 cvp->cv_mutex = NULL;
46 }
47 EXPORT_SYMBOL(__cv_init);
48
49 static int
50 cv_destroy_wakeup(kcondvar_t *cvp)
51 {
52 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
53 ASSERT(cvp->cv_mutex == NULL);
54 ASSERT(!waitqueue_active(&cvp->cv_event));
55 return (1);
56 }
57
58 return (0);
59 }
60
61 void
62 __cv_destroy(kcondvar_t *cvp)
63 {
64 ASSERT(cvp);
65 ASSERT(cvp->cv_magic == CV_MAGIC);
66
67 cvp->cv_magic = CV_DESTROY;
68 atomic_dec(&cvp->cv_refs);
69
70 /* Block until all waiters are woken and references dropped. */
71 while (cv_destroy_wakeup(cvp) == 0)
72 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
73
74 ASSERT3P(cvp->cv_mutex, ==, NULL);
75 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
76 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
77 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
78 }
79 EXPORT_SYMBOL(__cv_destroy);
80
81 static void
82 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
83 {
84 DEFINE_WAIT(wait);
85 kmutex_t *m;
86
87 ASSERT(cvp);
88 ASSERT(mp);
89 ASSERT(cvp->cv_magic == CV_MAGIC);
90 ASSERT(mutex_owned(mp));
91 atomic_inc(&cvp->cv_refs);
92
93 m = READ_ONCE(cvp->cv_mutex);
94 if (!m)
95 m = xchg(&cvp->cv_mutex, mp);
96 /* Ensure the same mutex is used by all callers */
97 ASSERT(m == NULL || m == mp);
98
99 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
100 atomic_inc(&cvp->cv_waiters);
101
102 /*
103 * Mutex should be dropped after prepare_to_wait() this
104 * ensures we're linked in to the waiters list and avoids the
105 * race where 'cvp->cv_waiters > 0' but the list is empty.
106 */
107 mutex_exit(mp);
108 if (io)
109 io_schedule();
110 else
111 schedule();
112
113 /* No more waiters a different mutex could be used */
114 if (atomic_dec_and_test(&cvp->cv_waiters)) {
115 /*
116 * This is set without any lock, so it's racy. But this is
117 * just for debug anyway, so make it best-effort
118 */
119 cvp->cv_mutex = NULL;
120 wake_up(&cvp->cv_destroy);
121 }
122
123 finish_wait(&cvp->cv_event, &wait);
124 atomic_dec(&cvp->cv_refs);
125
126 /*
127 * Hold mutex after we release the cvp, otherwise we could dead lock
128 * with a thread holding the mutex and call cv_destroy.
129 */
130 mutex_enter(mp);
131 }
132
133 void
134 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
135 {
136 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
137 }
138 EXPORT_SYMBOL(__cv_wait);
139
140 void
141 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
142 {
143 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
144 }
145 EXPORT_SYMBOL(__cv_wait_io);
146
147 void
148 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
149 {
150 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
151 }
152 EXPORT_SYMBOL(__cv_wait_sig);
153
154 #if defined(HAVE_IO_SCHEDULE_TIMEOUT)
155 #define spl_io_schedule_timeout(t) io_schedule_timeout(t)
156 #else
157
158 struct spl_task_timer {
159 struct timer_list timer;
160 struct task_struct *task;
161 };
162
163 static void
164 __cv_wakeup(spl_timer_list_t t)
165 {
166 struct timer_list *tmr = (struct timer_list *)t;
167 struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
168
169 wake_up_process(task_timer->task);
170 }
171
172 static long
173 spl_io_schedule_timeout(long time_left)
174 {
175 long expire_time = jiffies + time_left;
176 struct spl_task_timer task_timer;
177 struct timer_list *timer = &task_timer.timer;
178
179 task_timer.task = current;
180
181 timer_setup(timer, __cv_wakeup, 0);
182
183 timer->expires = expire_time;
184 add_timer(timer);
185
186 io_schedule();
187
188 del_timer_sync(timer);
189
190 time_left = expire_time - jiffies;
191
192 return (time_left < 0 ? 0 : time_left);
193 }
194 #endif
195
196 /*
197 * 'expire_time' argument is an absolute wall clock time in jiffies.
198 * Return value is time left (expire_time - now) or -1 if timeout occurred.
199 */
200 static clock_t
201 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
202 int state, int io)
203 {
204 DEFINE_WAIT(wait);
205 kmutex_t *m;
206 clock_t time_left;
207
208 ASSERT(cvp);
209 ASSERT(mp);
210 ASSERT(cvp->cv_magic == CV_MAGIC);
211 ASSERT(mutex_owned(mp));
212
213 /* XXX - Does not handle jiffie wrap properly */
214 time_left = expire_time - jiffies;
215 if (time_left <= 0)
216 return (-1);
217
218 atomic_inc(&cvp->cv_refs);
219 m = READ_ONCE(cvp->cv_mutex);
220 if (!m)
221 m = xchg(&cvp->cv_mutex, mp);
222 /* Ensure the same mutex is used by all callers */
223 ASSERT(m == NULL || m == mp);
224
225 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
226 atomic_inc(&cvp->cv_waiters);
227
228 /*
229 * Mutex should be dropped after prepare_to_wait() this
230 * ensures we're linked in to the waiters list and avoids the
231 * race where 'cvp->cv_waiters > 0' but the list is empty.
232 */
233 mutex_exit(mp);
234 if (io)
235 time_left = spl_io_schedule_timeout(time_left);
236 else
237 time_left = schedule_timeout(time_left);
238
239 /* No more waiters a different mutex could be used */
240 if (atomic_dec_and_test(&cvp->cv_waiters)) {
241 /*
242 * This is set without any lock, so it's racy. But this is
243 * just for debug anyway, so make it best-effort
244 */
245 cvp->cv_mutex = NULL;
246 wake_up(&cvp->cv_destroy);
247 }
248
249 finish_wait(&cvp->cv_event, &wait);
250 atomic_dec(&cvp->cv_refs);
251
252 /*
253 * Hold mutex after we release the cvp, otherwise we could dead lock
254 * with a thread holding the mutex and call cv_destroy.
255 */
256 mutex_enter(mp);
257 return (time_left > 0 ? time_left : -1);
258 }
259
260 clock_t
261 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
262 {
263 return (__cv_timedwait_common(cvp, mp, exp_time,
264 TASK_UNINTERRUPTIBLE, 0));
265 }
266 EXPORT_SYMBOL(__cv_timedwait);
267
268 clock_t
269 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
270 {
271 return (__cv_timedwait_common(cvp, mp, exp_time,
272 TASK_UNINTERRUPTIBLE, 1));
273 }
274 EXPORT_SYMBOL(__cv_timedwait_io);
275
276 clock_t
277 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
278 {
279 return (__cv_timedwait_common(cvp, mp, exp_time,
280 TASK_INTERRUPTIBLE, 0));
281 }
282 EXPORT_SYMBOL(__cv_timedwait_sig);
283
284 /*
285 * 'expire_time' argument is an absolute clock time in nanoseconds.
286 * Return value is time left (expire_time - now) or -1 if timeout occurred.
287 */
288 static clock_t
289 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
290 int state)
291 {
292 DEFINE_WAIT(wait);
293 kmutex_t *m;
294 hrtime_t time_left;
295 ktime_t ktime_left;
296
297 ASSERT(cvp);
298 ASSERT(mp);
299 ASSERT(cvp->cv_magic == CV_MAGIC);
300 ASSERT(mutex_owned(mp));
301
302 time_left = expire_time - gethrtime();
303 if (time_left <= 0)
304 return (-1);
305
306 atomic_inc(&cvp->cv_refs);
307 m = READ_ONCE(cvp->cv_mutex);
308 if (!m)
309 m = xchg(&cvp->cv_mutex, mp);
310 /* Ensure the same mutex is used by all callers */
311 ASSERT(m == NULL || m == mp);
312
313 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
314 atomic_inc(&cvp->cv_waiters);
315
316 /*
317 * Mutex should be dropped after prepare_to_wait() this
318 * ensures we're linked in to the waiters list and avoids the
319 * race where 'cvp->cv_waiters > 0' but the list is empty.
320 */
321 mutex_exit(mp);
322 /*
323 * Allow a 100 us range to give kernel an opportunity to coalesce
324 * interrupts
325 */
326 ktime_left = ktime_set(0, time_left);
327 schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC,
328 HRTIMER_MODE_REL);
329
330 /* No more waiters a different mutex could be used */
331 if (atomic_dec_and_test(&cvp->cv_waiters)) {
332 /*
333 * This is set without any lock, so it's racy. But this is
334 * just for debug anyway, so make it best-effort
335 */
336 cvp->cv_mutex = NULL;
337 wake_up(&cvp->cv_destroy);
338 }
339
340 finish_wait(&cvp->cv_event, &wait);
341 atomic_dec(&cvp->cv_refs);
342
343 mutex_enter(mp);
344 time_left = expire_time - gethrtime();
345 return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
346 }
347
348 /*
349 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
350 */
351 static clock_t
352 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
353 hrtime_t res, int flag, int state)
354 {
355 if (res > 1) {
356 /*
357 * Align expiration to the specified resolution.
358 */
359 if (flag & CALLOUT_FLAG_ROUNDUP)
360 tim += res - 1;
361 tim = (tim / res) * res;
362 }
363
364 if (!(flag & CALLOUT_FLAG_ABSOLUTE))
365 tim += gethrtime();
366
367 return (__cv_timedwait_hires(cvp, mp, tim, state));
368 }
369
370 clock_t
371 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
372 int flag)
373 {
374 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
375 TASK_UNINTERRUPTIBLE));
376 }
377 EXPORT_SYMBOL(cv_timedwait_hires);
378
379 clock_t
380 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
381 hrtime_t res, int flag)
382 {
383 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
384 TASK_INTERRUPTIBLE));
385 }
386 EXPORT_SYMBOL(cv_timedwait_sig_hires);
387
388 void
389 __cv_signal(kcondvar_t *cvp)
390 {
391 ASSERT(cvp);
392 ASSERT(cvp->cv_magic == CV_MAGIC);
393 atomic_inc(&cvp->cv_refs);
394
395 /*
396 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
397 * waiter will be set runable with each call to wake_up().
398 * Additionally wake_up() holds a spin_lock assoicated with
399 * the wait queue to ensure we don't race waking up processes.
400 */
401 if (atomic_read(&cvp->cv_waiters) > 0)
402 wake_up(&cvp->cv_event);
403
404 atomic_dec(&cvp->cv_refs);
405 }
406 EXPORT_SYMBOL(__cv_signal);
407
408 void
409 __cv_broadcast(kcondvar_t *cvp)
410 {
411 ASSERT(cvp);
412 ASSERT(cvp->cv_magic == CV_MAGIC);
413 atomic_inc(&cvp->cv_refs);
414
415 /*
416 * Wake_up_all() will wake up all waiters even those which
417 * have the WQ_FLAG_EXCLUSIVE flag set.
418 */
419 if (atomic_read(&cvp->cv_waiters) > 0)
420 wake_up_all(&cvp->cv_event);
421
422 atomic_dec(&cvp->cv_refs);
423 }
424 EXPORT_SYMBOL(__cv_broadcast);