+/*****************************************************************************\
+ * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ * Copyright (C) 2007 The Regents of the University of California.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ * UCRL-CODE-235197
+ *
+ * This file is part of the SPL, Solaris Porting Layer.
+ * For details, see <http://zfsonlinux.org/>.
+ *
+ * The SPL is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * The SPL is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with the SPL. If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
#ifndef _SPL_RWLOCK_H
-#define _SPL_RWLOCK_H
+#define _SPL_RWLOCK_H
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/rwsem.h>
-#include <asm/current.h>
#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include <linux/rwsem.h>
+#include <linux/rwsem_compat.h>
typedef enum {
- RW_DRIVER = 2, /* driver (DDI) rwlock */
- RW_DEFAULT = 4 /* kernel default rwlock */
+ RW_DRIVER = 2,
+ RW_DEFAULT = 4,
+ RW_NOLOCKDEP = 5
} krw_type_t;
typedef enum {
- RW_WRITER,
- RW_READER
+ RW_NONE = 0,
+ RW_WRITER = 1,
+ RW_READER = 2
} krw_t;
-#define RW_READ_HELD(x) (__rw_read_held((x)))
-#define RW_WRITE_HELD(x) (__rw_write_held((x)))
-#define RW_LOCK_HELD(x) (__rw_lock_held((x)))
-#define RW_ISWRITER(x) (__rw_iswriter(x))
-
-#define RW_MAGIC 0x3423645a
-#define RW_POISON 0xa6
-
+/*
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner
+ * field, so we don't need our own.
+ */
typedef struct {
- int rw_magic;
- char *rw_name;
- struct rw_semaphore rw_sem;
- struct task_struct *rw_owner; /* holder of the write lock */
+ struct rw_semaphore rw_rwlock;
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+ kthread_t *rw_owner;
+#endif
+#ifdef CONFIG_LOCKDEP
+ krw_type_t rw_type;
+#endif /* CONFIG_LOCKDEP */
} krwlock_t;
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-struct rwsem_waiter {
- struct list_head list;
- struct task_struct *task;
- unsigned int flags;
-#define RWSEM_WAITING_FOR_READ 0x00000001
-#define RWSEM_WAITING_FOR_WRITE 0x00000002
-};
+#define SEM(rwp) (&(rwp)->rw_rwlock)
-/*
- * wake a single writer
- */
-static inline struct rw_semaphore *
-__rwsem_wake_one_writer_locked(struct rw_semaphore *sem)
+static inline void
+spl_rw_set_owner(krwlock_t *rwp)
{
- struct rwsem_waiter *waiter;
- struct task_struct *tsk;
-
- sem->activity = -1;
-
- waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
- list_del(&waiter->list);
-
- tsk = waiter->task;
- smp_mb();
- waiter->task = NULL;
- wake_up_process(tsk);
- put_task_struct(tsk);
- return sem;
-}
-
/*
- * release a read lock on the semaphore
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write,
+ * downgrade_write and __init_rwsem will set/clear owner for us.
*/
-static void fastcall
-__up_read_locked(struct rw_semaphore *sem)
-{
- if (--sem->activity == 0 && !list_empty(&sem->wait_list))
- sem = __rwsem_wake_one_writer_locked(sem);
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+ rwp->rw_owner = current;
+#endif
}
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static int fastcall
-__down_write_trylock_locked(struct rw_semaphore *sem)
+static inline void
+spl_rw_clear_owner(krwlock_t *rwp)
{
- int ret = 0;
-
- if (sem->activity == 0 && list_empty(&sem->wait_list)) {
- /* granted */
- sem->activity = -1;
- ret = 1;
- }
-
- return ret;
-}
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+ rwp->rw_owner = NULL;
#endif
-
-extern int __rw_read_held(krwlock_t *rwlp);
-extern int __rw_write_held(krwlock_t *rwlp);
-extern int __rw_lock_held(krwlock_t *rwlp);
-
-static __inline__ void
-rw_init(krwlock_t *rwlp, char *name, krw_type_t type, void *arg)
-{
- ASSERT(type == RW_DEFAULT); /* XXX no irq handler use */
- ASSERT(arg == NULL); /* XXX no irq handler use */
-
- rwlp->rw_magic = RW_MAGIC;
- rwlp->rw_owner = NULL; /* no one holds the write lock yet */
- init_rwsem(&rwlp->rw_sem);
- rwlp->rw_name = NULL;
-
- if (name) {
- rwlp->rw_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
- if (rwlp->rw_name)
- strcpy(rwlp->rw_name, name);
- }
}
-static __inline__ void
-rw_destroy(krwlock_t *rwlp)
+static inline kthread_t *
+rw_owner(krwlock_t *rwp)
{
- ASSERT(rwlp);
- ASSERT(rwlp->rw_magic == RW_MAGIC);
- ASSERT(rwlp->rw_owner == NULL);
- spin_lock(&rwlp->rw_sem.wait_lock);
- ASSERT(list_empty(&rwlp->rw_sem.wait_list));
- spin_unlock(&rwlp->rw_sem.wait_lock);
-
- if (rwlp->rw_name)
- kfree(rwlp->rw_name);
-
- memset(rwlp, RW_POISON, sizeof(krwlock_t));
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ return SEM(rwp)->owner;
+#else
+ return rwp->rw_owner;
+#endif
}
-/* Return 0 if the lock could not be obtained without blocking.
- */
-static __inline__ int
-rw_tryenter(krwlock_t *rwlp, krw_t rw)
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
{
- int result;
-
- ASSERT(rwlp);
- ASSERT(rwlp->rw_magic == RW_MAGIC);
-
- switch (rw) {
- /* these functions return 1 if success, 0 if contention */
- case RW_READER:
- /* Here the Solaris code would return 0
- * if there were any write waiters. Specifically
- * thinking about the case where readers may have
- * the lock and we would also allow this thread
- * to grab the read lock with a writer waiting in the
- * queue. This doesn't seem like a correctness
- * issue, so just call down_read_trylock()
- * for the test. We may have to revisit this if
- * it becomes an issue */
- result = down_read_trylock(&rwlp->rw_sem);
- break;
- case RW_WRITER:
- result = down_write_trylock(&rwlp->rw_sem);
- if (result) {
- /* there better not be anyone else
- * holding the write lock here */
- ASSERT(rwlp->rw_owner == NULL);
- rwlp->rw_owner = current;
- }
- break;
- default:
- SBUG();
- }
-
- return result;
+ rwp->rw_type = type;
}
-
-static __inline__ void
-rw_enter(krwlock_t *rwlp, krw_t rw)
+static inline void
+spl_rw_lockdep_off_maybe(krwlock_t *rwp) \
+{ \
+ if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
+ lockdep_off(); \
+}
+static inline void
+spl_rw_lockdep_on_maybe(krwlock_t *rwp) \
+{ \
+ if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
+ lockdep_on(); \
+}
+#else /* CONFIG_LOCKDEP */
+#define spl_rw_set_type(rwp, type)
+#define spl_rw_lockdep_off_maybe(rwp)
+#define spl_rw_lockdep_on_maybe(rwp)
+#endif /* CONFIG_LOCKDEP */
+
+static inline int
+RW_READ_HELD(krwlock_t *rwp)
{
- ASSERT(rwlp);
- ASSERT(rwlp->rw_magic == RW_MAGIC);
-
- switch (rw) {
- case RW_READER:
- /* Here the Solaris code would block
- * if there were any write waiters. Specifically
- * thinking about the case where readers may have
- * the lock and we would also allow this thread
- * to grab the read lock with a writer waiting in the
- * queue. This doesn't seem like a correctness
- * issue, so just call down_read()
- * for the test. We may have to revisit this if
- * it becomes an issue */
- down_read(&rwlp->rw_sem);
- break;
- case RW_WRITER:
- down_write(&rwlp->rw_sem);
-
- /* there better not be anyone else
- * holding the write lock here */
- ASSERT(rwlp->rw_owner == NULL);
- rwlp->rw_owner = current;
- break;
- default:
- SBUG();
- }
+ return (spl_rwsem_is_locked(SEM(rwp)) && rw_owner(rwp) == NULL);
}
-static __inline__ void
-rw_exit(krwlock_t *rwlp)
+static inline int
+RW_WRITE_HELD(krwlock_t *rwp)
{
- ASSERT(rwlp);
- ASSERT(rwlp->rw_magic == RW_MAGIC);
-
- /* rw_owner is held by current
- * thread iff it is a writer */
- if (rwlp->rw_owner == current) {
- rwlp->rw_owner = NULL;
- up_write(&rwlp->rw_sem);
- } else {
- up_read(&rwlp->rw_sem);
- }
+ return (rw_owner(rwp) == current);
}
-static __inline__ void
-rw_downgrade(krwlock_t *rwlp)
+static inline int
+RW_LOCK_HELD(krwlock_t *rwp)
{
- ASSERT(rwlp);
- ASSERT(rwlp->rw_magic == RW_MAGIC);
- ASSERT(rwlp->rw_owner == current);
-
- rwlp->rw_owner = NULL;
- downgrade_write(&rwlp->rw_sem);
+ return spl_rwsem_is_locked(SEM(rwp));
}
-/* Return 0 if unable to perform the upgrade.
- * Might be wise to fix the caller
- * to acquire the write lock first?
+/*
+ * The following functions must be a #define and not static inline.
+ * This ensures that the native linux semaphore functions (down/up)
+ * will be correctly located in the users code which is important
+ * for the built in kernel lock analysis tools
*/
-static __inline__ int
-rw_tryupgrade(krwlock_t *rwlp)
-{
- int result = 0;
-
- ASSERT(rwlp);
- ASSERT(rwlp->rw_magic == RW_MAGIC);
-
- spin_lock(&rwlp->rw_sem.wait_lock);
-
- /* Check if there is anyone waiting for the
- * lock. If there is, then we know we should
- * not try to upgrade the lock */
- if (!list_empty(&rwlp->rw_sem.wait_list)) {
- spin_unlock(&rwlp->rw_sem.wait_lock);
- return 0;
- }
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
- /* Note that activity is protected by
- * the wait_lock. Don't try to upgrade
- * if there are multiple readers currently
- * holding the lock */
- if (rwlp->rw_sem.activity > 1) {
-#else
- /* Don't try to upgrade
- * if there are multiple readers currently
- * holding the lock */
- if ((rwlp->rw_sem.count & RWSEM_ACTIVE_MASK) > 1) {
+#define rw_init(rwp, name, type, arg) \
+({ \
+ static struct lock_class_key __key; \
+ ASSERT(type == RW_DEFAULT || type == RW_NOLOCKDEP); \
+ \
+ __init_rwsem(SEM(rwp), #rwp, &__key); \
+ spl_rw_clear_owner(rwp); \
+ spl_rw_set_type(rwp, type); \
+})
+
+#define rw_destroy(rwp) \
+({ \
+ VERIFY(!RW_LOCK_HELD(rwp)); \
+})
+
+#define rw_tryenter(rwp, rw) \
+({ \
+ int _rc_ = 0; \
+ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ switch (rw) { \
+ case RW_READER: \
+ _rc_ = down_read_trylock(SEM(rwp)); \
+ break; \
+ case RW_WRITER: \
+ if ((_rc_ = down_write_trylock(SEM(rwp)))) \
+ spl_rw_set_owner(rwp); \
+ break; \
+ default: \
+ VERIFY(0); \
+ } \
+ spl_rw_lockdep_on_maybe(rwp); \
+ _rc_; \
+})
+
+#define rw_enter(rwp, rw) \
+({ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ switch (rw) { \
+ case RW_READER: \
+ down_read(SEM(rwp)); \
+ break; \
+ case RW_WRITER: \
+ down_write(SEM(rwp)); \
+ spl_rw_set_owner(rwp); \
+ break; \
+ default: \
+ VERIFY(0); \
+ } \
+ spl_rw_lockdep_on_maybe(rwp); \
+})
+
+#define rw_exit(rwp) \
+({ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ if (RW_WRITE_HELD(rwp)) { \
+ spl_rw_clear_owner(rwp); \
+ up_write(SEM(rwp)); \
+ } else { \
+ ASSERT(RW_READ_HELD(rwp)); \
+ up_read(SEM(rwp)); \
+ } \
+ spl_rw_lockdep_on_maybe(rwp); \
+})
+
+#define rw_downgrade(rwp) \
+({ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ spl_rw_clear_owner(rwp); \
+ downgrade_write(SEM(rwp)); \
+ spl_rw_lockdep_on_maybe(rwp); \
+})
+
+#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+#error spinlock rwsem should not have spin on owner
#endif
- spin_unlock(&rwlp->rw_sem.wait_lock);
- return 0;
- }
-
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
- /* Here it should be safe to drop the
- * read lock and reacquire it for writing since
- * we know there are no waiters */
- __up_read_locked(&rwlp->rw_sem);
+/*
+ * For the generic implementations of rw-semaphores the following is
+ * true. If your semaphore implementation internally represents the
+ * semaphore state differently then special case handling is required.
+ * - if activity/count is 0 then there are no active readers or writers
+ * - if activity/count is +ve then that is the number of active readers
+ * - if activity/count is -1 then there is one active writer
+ */
- /* returns 1 if success, 0 if contention */
- result = __down_write_trylock_locked(&rwlp->rw_sem);
+extern void __up_read_locked(struct rw_semaphore *);
+extern int __down_write_trylock_locked(struct rw_semaphore *);
+
+#define rw_tryupgrade(rwp) \
+({ \
+ unsigned long _flags_; \
+ int _rc_ = 0; \
+ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, _flags_); \
+ if ((list_empty(&SEM(rwp)->wait_list)) && \
+ (SEM(rwp)->activity == 1)) { \
+ __up_read_locked(SEM(rwp)); \
+ VERIFY(_rc_ = __down_write_trylock_locked(SEM(rwp))); \
+ (rwp)->rw_owner = current; \
+ } \
+ spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, _flags_); \
+ spl_rw_lockdep_on_maybe(rwp); \
+ _rc_; \
+})
#else
- /* Here it should be safe to drop the
- * read lock and reacquire it for writing since
- * we know there are no waiters */
- up_read(&rwlp->rw_sem);
-
- /* returns 1 if success, 0 if contention */
- result = down_write_trylock(&rwlp->rw_sem);
+/*
+ * rw_tryupgrade() can be implemented correctly but for each supported
+ * arch we will need a custom implementation. For the x86 implementation
+ * it looks like a custom cmpxchg() to atomically check and promote the
+ * rwsem would be safe. For now that's not worth the trouble so in this
+ * case rw_tryupgrade() has just been disabled.
+ */
+#define rw_tryupgrade(rwp) ({ 0; })
#endif
- /* Check if upgrade failed. Should not ever happen
- * if we got to this point */
- ASSERT(result);
- ASSERT(rwlp->rw_owner == NULL);
- rwlp->rw_owner = current;
- spin_unlock(&rwlp->rw_sem.wait_lock);
- return 1;
-}
-
-static __inline__ kthread_t *
-rw_owner(krwlock_t *rwlp)
-{
- ASSERT(rwlp);
- ASSERT(rwlp->rw_magic == RW_MAGIC);
-
- return rwlp->rw_owner;
-}
-
-#ifdef __cplusplus
-}
-#endif
+int spl_rw_init(void);
+void spl_rw_fini(void);
-#endif /* _SPL_RWLOCK_H */
+#endif /* _SPL_RWLOCK_H */