If a thread is holding mutex when doing cv_destroy, it might end up waiting a
thread in cv_wait. The waiter would wake up trying to aquire the same mutex
and cause deadlock.
We solve this by move the mutex_enter to the bottom of cv_wait, so that
the waiter will release the cv first, allowing cv_destroy to succeed and have
a chance to free the mutex.
This would create race condition on the cv_mutex. We use xchg to set and check
it to ensure we won't be harmed by the race. This would result in the cv_mutex
debugging becomes best-effort.
Also, the change reveals a race, which was unlikely before, where we call
mutex_destroy while test threads are still holding the mutex. We use
kthread_stop to make sure the threads are exit before mutex_destroy.
Signed-off-by: Chunwei Chen <tuxoko@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Issue zfsonlinux/zfs#4166
Issue zfsonlinux/zfs#4106
cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
{
DEFINE_WAIT(wait);
cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
{
DEFINE_WAIT(wait);
ASSERT(mutex_owned(mp));
atomic_inc(&cvp->cv_refs);
ASSERT(mutex_owned(mp));
atomic_inc(&cvp->cv_refs);
- if (cvp->cv_mutex == NULL)
- cvp->cv_mutex = mp;
-
+ m = ACCESS_ONCE(cvp->cv_mutex);
+ if (!m)
+ m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
/* Ensure the same mutex is used by all callers */
- ASSERT(cvp->cv_mutex == mp);
+ ASSERT(m == NULL || m == mp);
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
atomic_inc(&cvp->cv_waiters);
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
atomic_inc(&cvp->cv_waiters);
io_schedule();
else
schedule();
io_schedule();
else
schedule();
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
+ /*
+ * This is set without any lock, so it's racy. But this is
+ * just for debug anyway, so make it best-effort
+ */
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
+
+ /*
+ * Hold mutex after we release the cvp, otherwise we could dead lock
+ * with a thread holding the mutex and call cv_destroy.
+ */
+ mutex_enter(mp);
int state)
{
DEFINE_WAIT(wait);
int state)
{
DEFINE_WAIT(wait);
clock_t time_left;
ASSERT(cvp);
clock_t time_left;
ASSERT(cvp);
ASSERT(mutex_owned(mp));
atomic_inc(&cvp->cv_refs);
ASSERT(mutex_owned(mp));
atomic_inc(&cvp->cv_refs);
- if (cvp->cv_mutex == NULL)
- cvp->cv_mutex = mp;
-
+ m = ACCESS_ONCE(cvp->cv_mutex);
+ if (!m)
+ m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
/* Ensure the same mutex is used by all callers */
- ASSERT(cvp->cv_mutex == mp);
+ ASSERT(m == NULL || m == mp);
/* XXX - Does not handle jiffie wrap properly */
time_left = expire_time - jiffies;
if (time_left <= 0) {
/* XXX - Does not handle jiffie wrap properly */
time_left = expire_time - jiffies;
if (time_left <= 0) {
+ /* XXX - doesn't reset cv_mutex */
atomic_dec(&cvp->cv_refs);
return (-1);
}
atomic_dec(&cvp->cv_refs);
return (-1);
}
*/
mutex_exit(mp);
time_left = schedule_timeout(time_left);
*/
mutex_exit(mp);
time_left = schedule_timeout(time_left);
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
+ /*
+ * This is set without any lock, so it's racy. But this is
+ * just for debug anyway, so make it best-effort
+ */
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
+ /*
+ * Hold mutex after we release the cvp, otherwise we could dead lock
+ * with a thread holding the mutex and call cv_destroy.
+ */
+ mutex_enter(mp);
return (time_left > 0 ? time_left : -1);
}
return (time_left > 0 ? time_left : -1);
}
int state)
{
DEFINE_WAIT(wait);
int state)
{
DEFINE_WAIT(wait);
hrtime_t time_left, now;
unsigned long time_left_us;
hrtime_t time_left, now;
unsigned long time_left_us;
ASSERT(mutex_owned(mp));
atomic_inc(&cvp->cv_refs);
ASSERT(mutex_owned(mp));
atomic_inc(&cvp->cv_refs);
- if (cvp->cv_mutex == NULL)
- cvp->cv_mutex = mp;
-
+ m = ACCESS_ONCE(cvp->cv_mutex);
+ if (!m)
+ m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
/* Ensure the same mutex is used by all callers */
- ASSERT(cvp->cv_mutex == mp);
+ ASSERT(m == NULL || m == mp);
now = gethrtime();
time_left = expire_time - now;
now = gethrtime();
time_left = expire_time - now;
* interrupts
*/
usleep_range(time_left_us, time_left_us + 100);
* interrupts
*/
usleep_range(time_left_us, time_left_us + 100);
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
+ /*
+ * This is set without any lock, so it's racy. But this is
+ * just for debug anyway, so make it best-effort
+ */
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
time_left = expire_time - gethrtime();
return (time_left > 0 ? time_left : -1);
}
time_left = expire_time - gethrtime();
return (time_left > 0 ? time_left : -1);
}
ct->ct_thread->comm, atomic_read(&cv->cv_condvar.cv_waiters));
mutex_exit(&cv->cv_mtx);
ct->ct_thread->comm, atomic_read(&cv->cv_condvar.cv_waiters));
mutex_exit(&cv->cv_mtx);
+ /* wait for main thread reap us */
+ while (!kthread_should_stop())
+ schedule();
/* Wake everything for the failure case */
cv_broadcast(&cv.cv_condvar);
cv_destroy(&cv.cv_condvar);
/* Wake everything for the failure case */
cv_broadcast(&cv.cv_condvar);
cv_destroy(&cv.cv_condvar);
+
+ /* wait for threads to exit */
+ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
+ if (!IS_ERR(ct[i].ct_thread))
+ kthread_stop(ct[i].ct_thread);
+ }
mutex_destroy(&cv.cv_mtx);
return rc;
mutex_destroy(&cv.cv_mtx);
return rc;
/* Wake everything for the failure case */
cv_destroy(&cv.cv_condvar);
/* Wake everything for the failure case */
cv_destroy(&cv.cv_condvar);
+
+ /* wait for threads to exit */
+ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
+ if (!IS_ERR(ct[i].ct_thread))
+ kthread_stop(ct[i].ct_thread);
+ }
mutex_destroy(&cv.cv_mtx);
return rc;
mutex_destroy(&cv.cv_mtx);
return rc;
+ /* wait for main thread reap us */
+ while (!kthread_should_stop())
+ schedule();
/* Wake everything for the failure case */
cv_broadcast(&cv.cv_condvar);
cv_destroy(&cv.cv_condvar);
/* Wake everything for the failure case */
cv_broadcast(&cv.cv_condvar);
cv_destroy(&cv.cv_condvar);
+
+ /* wait for threads to exit */
+ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
+ if (!IS_ERR(ct[i].ct_thread))
+ kthread_stop(ct[i].ct_thread);
+ }
mutex_destroy(&cv.cv_mtx);
return rc;
mutex_destroy(&cv.cv_mtx);
return rc;
/* Wake everything for the failure case */
cv_broadcast(&cv.cv_condvar);
cv_destroy(&cv.cv_condvar);
/* Wake everything for the failure case */
cv_broadcast(&cv.cv_condvar);
cv_destroy(&cv.cv_condvar);
+
+ /* wait for threads to exit */
+ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
+ if (!IS_ERR(ct[i].ct_thread))
+ kthread_stop(ct[i].ct_thread);
+ }
mutex_destroy(&cv.cv_mtx);
return rc;
mutex_destroy(&cv.cv_mtx);
return rc;