locking/pvqspinlock, x86: Optimize the PV unlock code path

author Waiman Long <Waiman.Long@hpe.com>

Tue, 10 Nov 2015 00:09:24 +0000 (19:09 -0500)

committer Ingo Molnar <mingo@kernel.org>

Mon, 23 Nov 2015 09:02:02 +0000 (10:02 +0100)
author Waiman Long <Waiman.Long@hpe.com>
Tue, 10 Nov 2015 00:09:24 +0000 (19:09 -0500)
committer Ingo Molnar <mingo@kernel.org>
Mon, 23 Nov 2015 09:02:02 +0000 (10:02 +0100)
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h

index b002e711ba88eaf1610071ef7459054ad18155b2..9f92c180ed2fb769bf0e5d0b83ae6eba9ac27f75 100644 (file)
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -1,6 +1,65 @@
  #ifndef __ASM_QSPINLOCK_PARAVIRT_H
  #define __ASM_QSPINLOCK_PARAVIRT_H
  
+/*
+ * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
+ * registers. For i386, however, only 1 32-bit register needs to be saved
+ * and restored. So an optimized version of __pv_queued_spin_unlock() is
+ * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit.
+ */
+#ifdef CONFIG_64BIT
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+#define __pv_queued_spin_unlock        __pv_queued_spin_unlock
+#define PV_UNLOCK              "__raw_callee_save___pv_queued_spin_unlock"
+#define PV_UNLOCK_SLOWPATH     "__raw_callee_save___pv_queued_spin_unlock_slowpath"
+
+/*
+ * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
+ * which combines the registers saving trunk and the body of the following
+ * C code:
+ *
+ * void __pv_queued_spin_unlock(struct qspinlock *lock)
+ * {
+ *     struct __qspinlock *l = (void *)lock;
+ *     u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ *
+ *     if (likely(lockval == _Q_LOCKED_VAL))
+ *             return;
+ *     pv_queued_spin_unlock_slowpath(lock, lockval);
+ * }
+ *
+ * For x86-64,
+ *   rdi = lock              (first argument)
+ *   rsi = lockval           (second argument)
+ *   rdx = internal variable (set to 0)
+ */
+asm    (".pushsection .text;"
+       ".globl " PV_UNLOCK ";"
+       ".align 4,0x90;"
+       PV_UNLOCK ": "
+       "push  %rdx;"
+       "mov   $0x1,%eax;"
+       "xor   %edx,%edx;"
+       "lock cmpxchg %dl,(%rdi);"
+       "cmp   $0x1,%al;"
+       "jne   .slowpath;"
+       "pop   %rdx;"
+       "ret;"
+       ".slowpath: "
+       "push   %rsi;"
+       "movzbl %al,%esi;"
+       "call " PV_UNLOCK_SLOWPATH ";"
+       "pop    %rsi;"
+       "pop    %rdx;"
+       "ret;"
+       ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
+       ".popsection");
+
+#else /* CONFIG_64BIT */
+
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
  PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
  
+#endif /* CONFIG_64BIT */
  #endif
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h

index f0450ff4829b6c1308d4768b2ae3a7c575b1cf51..4bd323d38c60bce5f5f582e2acf1027d80b023fb 100644 (file)
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -308,23 +308,14 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
  }
  
  /*
- * PV version of the unlock function to be used in stead of
- * queued_spin_unlock().
+ * PV versions of the unlock fastpath and slowpath functions to be used
+ * instead of queued_spin_unlock().
   */
-__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+__visible void
+__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
  {
         struct __qspinlock *l = (void *)lock;
         struct pv_node *node;
-       u8 locked;
-
-       /*
-        * We must not unlock if SLOW, because in that case we must first
-        * unhash. Otherwise it would be possible to have multiple @lock
-        * entries, which would be BAD.
-        */
-       locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
-       if (likely(locked == _Q_LOCKED_VAL))
-               return;
  
         if (unlikely(locked != _Q_SLOW_VAL)) {
                 WARN(!debug_locks_silent,
@@ -363,12 +354,32 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
          */
         pv_kick(node->cpu);
  }
+
  /*
   * Include the architecture specific callee-save thunk of the
   * __pv_queued_spin_unlock(). This thunk is put together with
- * __pv_queued_spin_unlock() near the top of the file to make sure
- * that the callee-save thunk and the real unlock function are close
- * to each other sharing consecutive instruction cachelines.
+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
+ * function close to each other sharing consecutive instruction cachelines.
+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
+ * can be defined.
   */
  #include <asm/qspinlock_paravirt.h>
  
+#ifndef __pv_queued_spin_unlock
+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+{
+       struct __qspinlock *l = (void *)lock;
+       u8 locked;
+
+       /*
+        * We must not unlock if SLOW, because in that case we must first
+        * unhash. Otherwise it would be possible to have multiple @lock
+        * entries, which would be BAD.
+        */
+       locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+       if (likely(locked == _Q_LOCKED_VAL))
+               return;
+
+       __pv_queued_spin_unlock_slowpath(lock, locked);
+}
+#endif /* __pv_queued_spin_unlock */
author	Waiman Long <Waiman.Long@hpe.com>
	Tue, 10 Nov 2015 00:09:24 +0000 (19:09 -0500)
committer	Ingo Molnar <mingo@kernel.org>
	Mon, 23 Nov 2015 09:02:02 +0000 (10:02 +0100)
arch/x86/include/asm/qspinlock_paravirt.h		patch \| blob \| blame \| history
kernel/locking/qspinlock_paravirt.h		patch \| blob \| blame \| history