]> git.proxmox.com Git - mirror_spl.git/blobdiff - include/sys/rwlock.h
Add new lock types MUTEX_NOLOCKDEP, and RW_NOLOCKDEP
[mirror_spl.git] / include / sys / rwlock.h
index cd6e46081b71ffa7bc5b8fed2a560968661234c6..c82764ce9eb1b0889931a72eabc97f95e2229de6 100644 (file)
+/*****************************************************************************\
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *  For details, see <http://zfsonlinux.org/>.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
 #ifndef _SPL_RWLOCK_H
-#define        _SPL_RWLOCK_H
+#define _SPL_RWLOCK_H
 
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/rwsem.h>
-#include <asm/current.h>
 #include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include <linux/rwsem.h>
+#include <linux/rwsem_compat.h>
 
 typedef enum {
-       RW_DRIVER  = 2,         /* driver (DDI) rwlock */
-       RW_DEFAULT = 4          /* kernel default rwlock */
+       RW_DRIVER       = 2,
+       RW_DEFAULT      = 4,
+       RW_NOLOCKDEP    = 5
 } krw_type_t;
 
 typedef enum {
-       RW_WRITER,
-       RW_READER
+       RW_NONE         = 0,
+       RW_WRITER       = 1,
+       RW_READER       = 2
 } krw_t;
 
-#define RW_READ_HELD(x)         (__rw_read_held((x)))
-#define RW_WRITE_HELD(x)        (__rw_write_held((x)))
-#define RW_LOCK_HELD(x)         (__rw_lock_held((x)))
-#define RW_ISWRITER(x)          (__rw_iswriter(x))
-
-#define RW_MAGIC  0x3423645a
-#define RW_POISON 0xa6
-
+/*
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner
+ * field, so we don't need our own.
+ */
 typedef struct {
-       int rw_magic;
-       char *rw_name;
-       struct rw_semaphore rw_sem;
-       struct task_struct *rw_owner;   /* holder of the write lock */
+       struct rw_semaphore rw_rwlock;
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+       kthread_t *rw_owner;
+#endif
+#ifdef CONFIG_LOCKDEP
+       krw_type_t      rw_type;
+#endif /* CONFIG_LOCKDEP */
 } krwlock_t;
 
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-struct rwsem_waiter {
-       struct list_head list;
-       struct task_struct *task;
-       unsigned int flags;
-#define RWSEM_WAITING_FOR_READ 0x00000001
-#define RWSEM_WAITING_FOR_WRITE        0x00000002
-};
+#define SEM(rwp)       (&(rwp)->rw_rwlock)
 
-/*
- * wake a single writer
- */
-static inline struct rw_semaphore *
-__rwsem_wake_one_writer_locked(struct rw_semaphore *sem)
+static inline void
+spl_rw_set_owner(krwlock_t *rwp)
 {
-       struct rwsem_waiter *waiter;
-       struct task_struct *tsk;
-
-       sem->activity = -1;
-
-       waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-       list_del(&waiter->list);
-
-       tsk = waiter->task;
-       smp_mb();
-       waiter->task = NULL;
-       wake_up_process(tsk);
-       put_task_struct(tsk);
-       return sem;
-}
-
 /*
- * release a read lock on the semaphore
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write,
+ * downgrade_write and __init_rwsem will set/clear owner for us.
  */
-static void fastcall
-__up_read_locked(struct rw_semaphore *sem)
-{
-       if (--sem->activity == 0 && !list_empty(&sem->wait_list))
-               sem = __rwsem_wake_one_writer_locked(sem);
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+       rwp->rw_owner = current;
+#endif
 }
 
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static int fastcall
-__down_write_trylock_locked(struct rw_semaphore *sem)
+static inline void
+spl_rw_clear_owner(krwlock_t *rwp)
 {
-       int ret = 0;
-
-       if (sem->activity == 0 && list_empty(&sem->wait_list)) {
-               /* granted */
-               sem->activity = -1;
-               ret = 1;
-       }
-
-       return ret;
-}
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+       rwp->rw_owner = NULL;
 #endif
-
-extern int __rw_read_held(krwlock_t *rwlp);
-extern int __rw_write_held(krwlock_t *rwlp);
-extern int __rw_lock_held(krwlock_t *rwlp);
-
-static __inline__ void
-rw_init(krwlock_t *rwlp, char *name, krw_type_t type, void *arg)
-{
-       ASSERT(type == RW_DEFAULT);     /* XXX no irq handler use */
-       ASSERT(arg == NULL);            /* XXX no irq handler use */
-
-       rwlp->rw_magic = RW_MAGIC;
-       rwlp->rw_owner = NULL;          /* no one holds the write lock yet */
-       init_rwsem(&rwlp->rw_sem);
-       rwlp->rw_name = NULL;
-
-        if (name) {
-                rwlp->rw_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
-                if (rwlp->rw_name)
-                        strcpy(rwlp->rw_name, name);
-        }
 }
 
-static __inline__ void
-rw_destroy(krwlock_t *rwlp)
+static inline kthread_t *
+rw_owner(krwlock_t *rwp)
 {
-       ASSERT(rwlp);
-       ASSERT(rwlp->rw_magic == RW_MAGIC);
-       ASSERT(rwlp->rw_owner == NULL);
-       spin_lock(&rwlp->rw_sem.wait_lock);
-       ASSERT(list_empty(&rwlp->rw_sem.wait_list));
-       spin_unlock(&rwlp->rw_sem.wait_lock);
-
-       if (rwlp->rw_name)
-                kfree(rwlp->rw_name);
-
-       memset(rwlp, RW_POISON, sizeof(krwlock_t));
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+       return SEM(rwp)->owner;
+#else
+       return rwp->rw_owner;
+#endif
 }
 
-/* Return 0 if the lock could not be obtained without blocking.
- */
-static __inline__ int
-rw_tryenter(krwlock_t *rwlp, krw_t rw)
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
 {
-       int result;
-
-       ASSERT(rwlp);
-       ASSERT(rwlp->rw_magic == RW_MAGIC);
-
-       switch (rw) {
-               /* these functions return 1 if success, 0 if contention */
-               case RW_READER:
-                       /* Here the Solaris code would return 0
-                        * if there were any write waiters.  Specifically
-                        * thinking about the case where readers may have
-                        * the lock and we would also allow this thread
-                        * to grab the read lock with a writer waiting in the
-                        * queue. This doesn't seem like a correctness
-                        * issue, so just call down_read_trylock()
-                        * for the test.  We may have to revisit this if
-                        * it becomes an issue */
-                       result = down_read_trylock(&rwlp->rw_sem);
-                       break;
-               case RW_WRITER:
-                       result = down_write_trylock(&rwlp->rw_sem);
-                       if (result) {
-                               /* there better not be anyone else
-                                * holding the write lock here */
-                               ASSERT(rwlp->rw_owner == NULL);
-                               rwlp->rw_owner = current;
-                       }
-                       break;
-               default:
-                       SBUG();
-       }
-
-       return result;
+       rwp->rw_type = type;
 }
-
-static __inline__ void
-rw_enter(krwlock_t *rwlp, krw_t rw)
+static inline void
+spl_rw_lockdep_off_maybe(krwlock_t *rwp)               \
+{                                                      \
+       if (rwp && rwp->rw_type == RW_NOLOCKDEP)        \
+               lockdep_off();                          \
+}
+static inline void
+spl_rw_lockdep_on_maybe(krwlock_t *rwp)                        \
+{                                                      \
+       if (rwp && rwp->rw_type == RW_NOLOCKDEP)        \
+               lockdep_on();                           \
+}
+#else  /* CONFIG_LOCKDEP */
+#define spl_rw_set_type(rwp, type)
+#define spl_rw_lockdep_off_maybe(rwp)
+#define spl_rw_lockdep_on_maybe(rwp)
+#endif /* CONFIG_LOCKDEP */
+
+static inline int
+RW_READ_HELD(krwlock_t *rwp)
 {
-       ASSERT(rwlp);
-       ASSERT(rwlp->rw_magic == RW_MAGIC);
-
-       switch (rw) {
-               case RW_READER:
-                       /* Here the Solaris code would block
-                        * if there were any write waiters.  Specifically
-                        * thinking about the case where readers may have
-                        * the lock and we would also allow this thread
-                        * to grab the read lock with a writer waiting in the
-                        * queue. This doesn't seem like a correctness
-                        * issue, so just call down_read()
-                        * for the test.  We may have to revisit this if
-                        * it becomes an issue */
-                       down_read(&rwlp->rw_sem);
-                       break;
-               case RW_WRITER:
-                       down_write(&rwlp->rw_sem);
-
-                       /* there better not be anyone else
-                        * holding the write lock here */
-                       ASSERT(rwlp->rw_owner == NULL);
-                       rwlp->rw_owner = current;
-                       break;
-               default:
-                       SBUG();
-       }
+       return (spl_rwsem_is_locked(SEM(rwp)) && rw_owner(rwp) == NULL);
 }
 
-static __inline__ void
-rw_exit(krwlock_t *rwlp)
+static inline int
+RW_WRITE_HELD(krwlock_t *rwp)
 {
-       ASSERT(rwlp);
-       ASSERT(rwlp->rw_magic == RW_MAGIC);
-
-       /* rw_owner is held by current
-        * thread iff it is a writer */
-       if (rwlp->rw_owner == current) {
-               rwlp->rw_owner = NULL;
-               up_write(&rwlp->rw_sem);
-       } else {
-               up_read(&rwlp->rw_sem);
-       }
+       return (rw_owner(rwp) == current);
 }
 
-static __inline__ void
-rw_downgrade(krwlock_t *rwlp)
+static inline int
+RW_LOCK_HELD(krwlock_t *rwp)
 {
-       ASSERT(rwlp);
-       ASSERT(rwlp->rw_magic == RW_MAGIC);
-       ASSERT(rwlp->rw_owner == current);
-
-       rwlp->rw_owner = NULL;
-       downgrade_write(&rwlp->rw_sem);
+       return spl_rwsem_is_locked(SEM(rwp));
 }
 
-/* Return 0 if unable to perform the upgrade.
- * Might be wise to fix the caller
- * to acquire the write lock first?
+/*
+ * The following functions must be a #define and not static inline.
+ * This ensures that the native linux semaphore functions (down/up)
+ * will be correctly located in the users code which is important
+ * for the built in kernel lock analysis tools
  */
-static __inline__ int
-rw_tryupgrade(krwlock_t *rwlp)
-{
-       int result = 0;
-
-       ASSERT(rwlp);
-       ASSERT(rwlp->rw_magic == RW_MAGIC);
-
-       spin_lock(&rwlp->rw_sem.wait_lock);
-
-       /* Check if there is anyone waiting for the
-        * lock.  If there is, then we know we should
-        * not try to upgrade the lock */
-       if (!list_empty(&rwlp->rw_sem.wait_list)) {
-               spin_unlock(&rwlp->rw_sem.wait_lock);
-               return 0;
-       }
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-       /* Note that activity is protected by
-        * the wait_lock.  Don't try to upgrade
-        * if there are multiple readers currently
-        * holding the lock */
-       if (rwlp->rw_sem.activity > 1) {
-#else
-       /* Don't try to upgrade
-        * if there are multiple readers currently
-        * holding the lock */
-       if ((rwlp->rw_sem.count & RWSEM_ACTIVE_MASK) > 1) {
+#define rw_init(rwp, name, type, arg)                                  \
+({                                                                     \
+       static struct lock_class_key __key;                             \
+       ASSERT(type == RW_DEFAULT || type == RW_NOLOCKDEP);             \
+                                                                       \
+       __init_rwsem(SEM(rwp), #rwp, &__key);                           \
+       spl_rw_clear_owner(rwp);                                        \
+       spl_rw_set_type(rwp, type);                                     \
+})
+
+#define rw_destroy(rwp)                                                        \
+({                                                                     \
+       VERIFY(!RW_LOCK_HELD(rwp));                                     \
+})
+
+#define rw_tryenter(rwp, rw)                                           \
+({                                                                     \
+       int _rc_ = 0;                                                   \
+                                                                       \
+       spl_rw_lockdep_off_maybe(rwp);                                  \
+       switch (rw) {                                                   \
+       case RW_READER:                                                 \
+               _rc_ = down_read_trylock(SEM(rwp));                     \
+               break;                                                  \
+       case RW_WRITER:                                                 \
+               if ((_rc_ = down_write_trylock(SEM(rwp))))              \
+                       spl_rw_set_owner(rwp);                          \
+               break;                                                  \
+       default:                                                        \
+               VERIFY(0);                                              \
+       }                                                               \
+       spl_rw_lockdep_on_maybe(rwp);                                   \
+       _rc_;                                                           \
+})
+
+#define rw_enter(rwp, rw)                                              \
+({                                                                     \
+       spl_rw_lockdep_off_maybe(rwp);                                  \
+       switch (rw) {                                                   \
+       case RW_READER:                                                 \
+               down_read(SEM(rwp));                                    \
+               break;                                                  \
+       case RW_WRITER:                                                 \
+               down_write(SEM(rwp));                                   \
+               spl_rw_set_owner(rwp);                                  \
+               break;                                                  \
+       default:                                                        \
+               VERIFY(0);                                              \
+       }                                                               \
+       spl_rw_lockdep_on_maybe(rwp);                                   \
+})
+
+#define rw_exit(rwp)                                                   \
+({                                                                     \
+       spl_rw_lockdep_off_maybe(rwp);                                  \
+       if (RW_WRITE_HELD(rwp)) {                                       \
+               spl_rw_clear_owner(rwp);                                \
+               up_write(SEM(rwp));                                     \
+       } else {                                                        \
+               ASSERT(RW_READ_HELD(rwp));                              \
+               up_read(SEM(rwp));                                      \
+       }                                                               \
+       spl_rw_lockdep_on_maybe(rwp);                                   \
+})
+
+#define rw_downgrade(rwp)                                              \
+({                                                                     \
+       spl_rw_lockdep_off_maybe(rwp);                                  \
+       spl_rw_clear_owner(rwp);                                        \
+       downgrade_write(SEM(rwp));                                      \
+       spl_rw_lockdep_on_maybe(rwp);                                   \
+})
+
+#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+#error spinlock rwsem should not have spin on owner
 #endif
-               spin_unlock(&rwlp->rw_sem.wait_lock);
-               return 0;
-       }
-
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-       /* Here it should be safe to drop the
-        * read lock and reacquire it for writing since
-        * we know there are no waiters */
-       __up_read_locked(&rwlp->rw_sem);
+/*
+ * For the generic implementations of rw-semaphores the following is
+ * true.  If your semaphore implementation internally represents the
+ * semaphore state differently then special case handling is required.
+ * - if activity/count is 0 then there are no active readers or writers
+ * - if activity/count is +ve then that is the number of active readers
+ * - if activity/count is -1 then there is one active writer
+ */
 
-       /* returns 1 if success, 0 if contention */
-       result = __down_write_trylock_locked(&rwlp->rw_sem);
+extern void __up_read_locked(struct rw_semaphore *);
+extern int __down_write_trylock_locked(struct rw_semaphore *);
+
+#define rw_tryupgrade(rwp)                                             \
+({                                                                     \
+       unsigned long _flags_;                                          \
+       int _rc_ = 0;                                                   \
+                                                                       \
+       spl_rw_lockdep_off_maybe(rwp);                                  \
+       spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, _flags_);          \
+       if ((list_empty(&SEM(rwp)->wait_list)) &&                       \
+           (SEM(rwp)->activity == 1)) {                                \
+               __up_read_locked(SEM(rwp));                             \
+               VERIFY(_rc_ = __down_write_trylock_locked(SEM(rwp)));   \
+               (rwp)->rw_owner = current;                              \
+       }                                                               \
+       spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, _flags_);     \
+       spl_rw_lockdep_on_maybe(rwp);                                   \
+       _rc_;                                                           \
+})
 #else
-       /* Here it should be safe to drop the
-        * read lock and reacquire it for writing since
-        * we know there are no waiters */
-       up_read(&rwlp->rw_sem);
-
-       /* returns 1 if success, 0 if contention */
-       result = down_write_trylock(&rwlp->rw_sem);
+/*
+ * rw_tryupgrade() can be implemented correctly but for each supported
+ * arch we will need a custom implementation.  For the x86 implementation
+ * it looks like a custom cmpxchg() to atomically check and promote the
+ * rwsem would be safe.  For now that's not worth the trouble so in this
+ * case rw_tryupgrade() has just been disabled.
+ */
+#define rw_tryupgrade(rwp)     ({ 0; })
 #endif
 
-       /* Check if upgrade failed.  Should not ever happen
-        * if we got to this point */
-       ASSERT(result);
-       ASSERT(rwlp->rw_owner == NULL);
-       rwlp->rw_owner = current;
-       spin_unlock(&rwlp->rw_sem.wait_lock);
-       return 1;
-}
-
-static __inline__ kthread_t *
-rw_owner(krwlock_t *rwlp)
-{
-       ASSERT(rwlp);
-       ASSERT(rwlp->rw_magic == RW_MAGIC);
-
-       return rwlp->rw_owner;
-}
-
-#ifdef __cplusplus
-}
-#endif
+int spl_rw_init(void);
+void spl_rw_fini(void);
 
-#endif /* _SPL_RWLOCK_H */
+#endif /* _SPL_RWLOCK_H */