]> git.proxmox.com Git - mirror_spl.git/commitdiff
Implement a proper rw_tryupgrade
authorChunwei Chen <david.chen@osnexus.com>
Wed, 25 May 2016 23:35:42 +0000 (16:35 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 31 May 2016 18:44:15 +0000 (11:44 -0700)
Current rw_tryupgrade does rw_exit and then rw_tryenter(RW_RWITER), and then
does rw_enter(RW_READER) if it fails. This violate the assumption that
rw_tryupgrade should be atomic and could cause extra contention or even lock
inversion.

This patch we implement a proper rw_tryupgrade. For rwsem-spinlock, we take
the spinlock to check rwsem->count and rwsem->wait_list. For normal rwsem, we
use cmpxchg on rwsem->count to change the value from single reader to single
writer.

Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes zfsonlinux/zfs#4692
Closes #554

config/spl-build.m4
include/linux/rwsem_compat.h
include/sys/rwlock.h
module/spl/spl-rwlock.c
module/splat/splat-rwlock.c

index 720506959f0b2789ad4e7f2be68ce130624d8315..d705c6531babb414a3b4da198de0562d05472fc4 100644 (file)
@@ -39,6 +39,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [
        SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
        SPL_AC_SHRINK_CONTROL_STRUCT
        SPL_AC_RWSEM_SPINLOCK_IS_RAW
+       SPL_AC_RWSEM_ACTIVITY
        SPL_AC_SCHED_RT_HEADER
        SPL_AC_2ARGS_VFS_GETATTR
        SPL_AC_USLEEP_RANGE
@@ -1316,6 +1317,30 @@ AC_DEFUN([SPL_AC_RWSEM_SPINLOCK_IS_RAW], [
        EXTRA_KCFLAGS="$tmp_flags"
 ])
 
+dnl #
+dnl # 3.16 API Change
+dnl #
+dnl # rwsem-spinlock "->activity" changed to "->count"
+dnl #
+AC_DEFUN([SPL_AC_RWSEM_ACTIVITY], [
+       AC_MSG_CHECKING([whether struct rw_semaphore has member activity])
+       tmp_flags="$EXTRA_KCFLAGS"
+       EXTRA_KCFLAGS="-Werror"
+       SPL_LINUX_TRY_COMPILE([
+               #include <linux/rwsem.h>
+       ],[
+               struct rw_semaphore dummy_semaphore __attribute__ ((unused));
+               dummy_semaphore.activity = 0;
+       ],[
+               AC_MSG_RESULT(yes)
+               AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1,
+               [struct rw_semaphore has member activity])
+       ],[
+               AC_MSG_RESULT(no)
+       ])
+       EXTRA_KCFLAGS="$tmp_flags"
+])
+
 dnl #
 dnl # 3.9 API change,
 dnl # Moved things from linux/sched.h to linux/sched/rt.h
index 5841d7c286978011df049ecbc2520edb249bdf7a..9a4df26738c8bac289040126e5fa6af20dac8552 100644 (file)
 
 #include <linux/rwsem.h>
 
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+#define        SPL_RWSEM_SINGLE_READER_VALUE   (1)
+#define        SPL_RWSEM_SINGLE_WRITER_VALUE   (-1)
+#else
+#define        SPL_RWSEM_SINGLE_READER_VALUE   (RWSEM_ACTIVE_READ_BIAS)
+#define        SPL_RWSEM_SINGLE_WRITER_VALUE   (RWSEM_ACTIVE_WRITE_BIAS)
+#endif
+
+/* Linux 3.16 change activity to count for rwsem-spinlock */
+#ifdef HAVE_RWSEM_ACTIVITY
+#define        RWSEM_COUNT(sem)        sem->activity
+#else
+#define        RWSEM_COUNT(sem)        sem->count
+#endif
+
+int rwsem_tryupgrade(struct rw_semaphore *rwsem);
+
 #if defined(RWSEM_SPINLOCK_IS_RAW)
 #define spl_rwsem_lock_irqsave(lk, fl)       raw_spin_lock_irqsave(lk, fl)
 #define spl_rwsem_unlock_irqrestore(lk, fl)  raw_spin_unlock_irqrestore(lk, fl)
index 14d097b01ac161b906268c7938d464a485190423..facebe3ba7fd6cc5a1c000eb14eadbada7488f9e 100644 (file)
@@ -223,13 +223,10 @@ RW_LOCK_HELD(krwlock_t *rwp)
        if (RW_WRITE_HELD(rwp)) {                                       \
                _rc_ = 1;                                               \
        } else {                                                        \
-               rw_exit(rwp);                                           \
-               if (rw_tryenter(rwp, RW_WRITER)) {                      \
-                       _rc_ = 1;                                       \
-               } else {                                                \
-                       rw_enter(rwp, RW_READER);                       \
-                       _rc_ = 0;                                       \
-               }                                                       \
+               spl_rw_lockdep_off_maybe(rwp);                          \
+               if ((_rc_ = rwsem_tryupgrade(SEM(rwp))))                \
+                       spl_rw_set_owner(rwp);                          \
+               spl_rw_lockdep_on_maybe(rwp);                           \
        }                                                               \
        _rc_;                                                           \
 })
index 98251c01154d6312b4d30ee8726e9e77ee67baec..9b356a843950218bfb4379ccce2ca82a6331b0a3 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_RWLOCK
 
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+       int ret = 0;
+       unsigned long flags;
+       spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags);
+       if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE &&
+           list_empty(&rwsem->wait_list)) {
+               ret = 1;
+               RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE;
+       }
+       spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags);
+       return (ret);
+}
+#else
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+       typeof (rwsem->count) val;
+       val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
+           SPL_RWSEM_SINGLE_WRITER_VALUE);
+       return (val == SPL_RWSEM_SINGLE_READER_VALUE);
+}
+#endif
+
+int
+rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+       if (__rwsem_tryupgrade(rwsem)) {
+               rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
+               rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+               rwsem->owner = current;
+#endif
+               return (1);
+       }
+       return (0);
+}
+EXPORT_SYMBOL(rwsem_tryupgrade);
+
 int spl_rw_init(void) { return 0; }
 void spl_rw_fini(void) { }
index abd6a0e6018f12574b0d03d34e2533b1a16b3bc3..c17eb07ba5c0ba62b499d16cc8ce54744d9b7620 100644 (file)
 #define SPLAT_RWLOCK_TEST5_DESC                "Write downgrade"
 
 #define SPLAT_RWLOCK_TEST6_ID          0x0706
-#define SPLAT_RWLOCK_TEST6_NAME                "rw_tryupgrade"
-#define SPLAT_RWLOCK_TEST6_DESC                "Read upgrade"
+#define SPLAT_RWLOCK_TEST6_NAME                "rw_tryupgrade-1"
+#define SPLAT_RWLOCK_TEST6_DESC                "rwsem->count value"
+
+#define SPLAT_RWLOCK_TEST7_ID          0x0707
+#define SPLAT_RWLOCK_TEST7_NAME                "rw_tryupgrade-2"
+#define SPLAT_RWLOCK_TEST7_DESC                "Read upgrade"
 
 #define SPLAT_RWLOCK_TEST_MAGIC                0x115599DDUL
 #define SPLAT_RWLOCK_TEST_NAME         "rwlock_test"
@@ -580,8 +584,55 @@ splat_rwlock_test6(struct file *file, void *arg)
        splat_init_rw_priv(rwp, file);
 
        rw_enter(&rwp->rw_rwlock, RW_READER);
-       if (!RW_READ_HELD(&rwp->rw_rwlock)) {
+       if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
+           SPL_RWSEM_SINGLE_READER_VALUE) {
+               splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+                            "We assumed single reader rwsem->count "
+                            "should be %ld, but is %ld\n",
+                            SPL_RWSEM_SINGLE_READER_VALUE,
+                            RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
+               rc = -ENOLCK;
+               goto out;
+       }
+       rw_exit(&rwp->rw_rwlock);
+
+       rw_enter(&rwp->rw_rwlock, RW_WRITER);
+       if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
+           SPL_RWSEM_SINGLE_WRITER_VALUE) {
                splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+                            "We assumed single writer rwsem->count "
+                            "should be %ld, but is %ld\n",
+                            SPL_RWSEM_SINGLE_WRITER_VALUE,
+                            RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
+               rc = -ENOLCK;
+               goto out;
+       }
+       rc = 0;
+       splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
+                    "rwsem->count same as we assumed\n");
+out:
+       rw_exit(&rwp->rw_rwlock);
+       rw_destroy(&rwp->rw_rwlock);
+       kfree(rwp);
+
+       return rc;
+}
+
+static int
+splat_rwlock_test7(struct file *file, void *arg)
+{
+       rw_priv_t *rwp;
+       int rc;
+
+       rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
+       if (rwp == NULL)
+               return -ENOMEM;
+
+       splat_init_rw_priv(rwp, file);
+
+       rw_enter(&rwp->rw_rwlock, RW_READER);
+       if (!RW_READ_HELD(&rwp->rw_rwlock)) {
+               splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
                             "rwlock should be read lock: %d\n",
                             RW_READ_HELD(&rwp->rw_rwlock));
                rc = -ENOLCK;
@@ -591,7 +642,7 @@ splat_rwlock_test6(struct file *file, void *arg)
        /* With one reader upgrade should never fail. */
        rc = rw_tryupgrade(&rwp->rw_rwlock);
        if (!rc) {
-               splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+               splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
                             "rwlock failed upgrade from reader: %d\n",
                             RW_READ_HELD(&rwp->rw_rwlock));
                rc = -ENOLCK;
@@ -599,7 +650,7 @@ splat_rwlock_test6(struct file *file, void *arg)
        }
 
        if (RW_READ_HELD(&rwp->rw_rwlock) || !RW_WRITE_HELD(&rwp->rw_rwlock)) {
-               splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should "
+               splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "rwlock should "
                           "have 0 (not %d) reader and 1 (not %d) writer\n",
                           RW_READ_HELD(&rwp->rw_rwlock),
                           RW_WRITE_HELD(&rwp->rw_rwlock));
@@ -607,7 +658,7 @@ splat_rwlock_test6(struct file *file, void *arg)
        }
 
        rc = 0;
-       splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
+       splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "%s",
                     "rwlock properly upgraded\n");
 out:
        rw_exit(&rwp->rw_rwlock);
@@ -646,6 +697,8 @@ splat_rwlock_init(void)
                      SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5);
        SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
                      SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6);
+       SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST7_NAME, SPLAT_RWLOCK_TEST7_DESC,
+                     SPLAT_RWLOCK_TEST7_ID, splat_rwlock_test7);
 
        return sub;
 }
@@ -654,6 +707,7 @@ void
 splat_rwlock_fini(splat_subsystem_t *sub)
 {
        ASSERT(sub);
+       SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST7_ID);
        SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID);
        SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID);
        SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID);