*/
#include "qemu/osdep.h"
-#include "qemu-common.h"
#include "qemu/rcu.h"
#include "qemu/atomic.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
+#include "qemu/lockable.h"
#if defined(CONFIG_MALLOC_TRIM)
#include <malloc.h>
#endif
unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
QemuEvent rcu_gp_event;
+static int in_drain_call_rcu;
static QemuMutex rcu_registry_lock;
static QemuMutex rcu_sync_lock;
{
unsigned long v;
- v = atomic_read(ctr);
+ v = qatomic_read(ctr);
return v && (v != rcu_gp_ctr);
}
/* Written to only by each individual reader. Read by both the reader and the
* writers.
*/
-__thread struct rcu_reader_data rcu_reader;
+QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader)
/* Protected by rcu_registry_lock. */
typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
*/
qemu_event_reset(&rcu_gp_event);
- /* Instead of using atomic_mb_set for index->waiting, and
- * atomic_mb_read for index->ctr, memory barriers are placed
- * manually since writes to different threads are independent.
- * qemu_event_reset has acquire semantics, so no memory barrier
- * is needed here.
- */
QLIST_FOREACH(index, ®istry, node) {
- atomic_set(&index->waiting, true);
+ qatomic_set(&index->waiting, true);
}
- /* Here, order the stores to index->waiting before the
- * loads of index->ctr.
+ /* Here, order the stores to index->waiting before the loads of
+ * index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(),
+ * ensuring that the loads of index->ctr are sequentially consistent.
+ *
+ * If this is the last iteration, this barrier also prevents
+ * frees from seeping upwards, and orders the two wait phases
+ * on architectures with 32-bit longs; see synchronize_rcu().
*/
- smp_mb();
+ smp_mb_global();
QLIST_FOREACH_SAFE(index, ®istry, node, tmp) {
if (!rcu_gp_ongoing(&index->ctr)) {
QLIST_REMOVE(index, node);
QLIST_INSERT_HEAD(&qsreaders, index, node);
- /* No need for mb_set here, worst of all we
+ /* No need for memory barriers here, worst of all we
* get some extra futex wakeups.
*/
- atomic_set(&index->waiting, false);
+ qatomic_set(&index->waiting, false);
+ } else if (qatomic_read(&in_drain_call_rcu)) {
+ notifier_list_notify(&index->force_rcu, NULL);
}
}
void synchronize_rcu(void)
{
- qemu_mutex_lock(&rcu_sync_lock);
- qemu_mutex_lock(&rcu_registry_lock);
+ QEMU_LOCK_GUARD(&rcu_sync_lock);
+ /* Write RCU-protected pointers before reading p_rcu_reader->ctr.
+ * Pairs with smp_mb_placeholder() in rcu_read_lock().
+ *
+ * Also orders write to RCU-protected pointers before
+ * write to rcu_gp_ctr.
+ */
+ smp_mb_global();
+
+ QEMU_LOCK_GUARD(&rcu_registry_lock);
if (!QLIST_EMPTY(®istry)) {
- /* In either case, the atomic_mb_set below blocks stores that free
- * old RCU-protected pointers.
- */
if (sizeof(rcu_gp_ctr) < 8) {
/* For architectures with 32-bit longs, a two-subphases algorithm
* ensures we do not encounter overflow bugs.
*
* Switch parity: 0 -> 1, 1 -> 0.
*/
- atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+ qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
wait_for_readers();
- atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+ qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
} else {
/* Increment current grace period. */
- atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+ qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
}
wait_for_readers();
}
-
- qemu_mutex_unlock(&rcu_registry_lock);
- qemu_mutex_unlock(&rcu_sync_lock);
}
struct rcu_head **old_tail;
node->next = NULL;
- old_tail = atomic_xchg(&tail, &node->next);
- atomic_mb_set(old_tail, node);
+
+ /*
+ * Make this node the tail of the list. The node will be
+ * used by further enqueue operations, but it will not
+ * be dequeued yet...
+ */
+ old_tail = qatomic_xchg(&tail, &node->next);
+
+ /*
+ * ... until it is pointed to from another item in the list.
+ * In the meantime, try_dequeue() will find a NULL next pointer
+ * and loop.
+ *
+ * Synchronizes with qatomic_load_acquire() in try_dequeue().
+ */
+ qatomic_store_release(old_tail, node);
}
static struct rcu_head *try_dequeue(void)
struct rcu_head *node, *next;
retry:
- /* Test for an empty list, which we do not expect. Note that for
+ /* Head is only written by this thread, so no need for barriers. */
+ node = head;
+
+ /*
+ * If the head node has NULL in its next pointer, the value is
+ * wrong and we need to wait until its enqueuer finishes the update.
+ */
+ next = qatomic_load_acquire(&node->next);
+ if (!next) {
+ return NULL;
+ }
+
+ /*
+ * Test for an empty list, which we do not expect. Note that for
* the consumer head and tail are always consistent. The head
* is consistent because only the consumer reads/writes it.
* The tail, because it is the first step in the enqueuing.
* It is only the next pointers that might be inconsistent.
*/
- if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) {
+ if (head == &dummy && qatomic_read(&tail) == &dummy.next) {
abort();
}
- /* If the head node has NULL in its next pointer, the value is
- * wrong and we need to wait until its enqueuer finishes the update.
- */
- node = head;
- next = atomic_mb_read(&head->next);
- if (!next) {
- return NULL;
- }
-
- /* Since we are the sole consumer, and we excluded the empty case
+ /*
+ * Since we are the sole consumer, and we excluded the empty case
* above, the queue will always have at least two nodes: the
* dummy node, and the one being removed. So we do not need to update
* the tail pointer.
for (;;) {
int tries = 0;
- int n = atomic_read(&rcu_call_count);
+ int n = qatomic_read(&rcu_call_count);
/* Heuristically wait for a decent number of callbacks to pile up.
* Fetch rcu_call_count now, we only must process elements that were
g_usleep(10000);
if (n == 0) {
qemu_event_reset(&rcu_call_ready_event);
- n = atomic_read(&rcu_call_count);
+ n = qatomic_read(&rcu_call_count);
if (n == 0) {
#if defined(CONFIG_MALLOC_TRIM)
malloc_trim(4 * 1024 * 1024);
qemu_event_wait(&rcu_call_ready_event);
}
}
- n = atomic_read(&rcu_call_count);
+ n = qatomic_read(&rcu_call_count);
}
- atomic_sub(&rcu_call_count, n);
+ qatomic_sub(&rcu_call_count, n);
synchronize_rcu();
qemu_mutex_lock_iothread();
while (n > 0) {
{
node->func = func;
enqueue(node);
- atomic_inc(&rcu_call_count);
+ qatomic_inc(&rcu_call_count);
qemu_event_set(&rcu_call_ready_event);
}
+
+struct rcu_drain {
+ struct rcu_head rcu;
+ QemuEvent drain_complete_event;
+};
+
+static void drain_rcu_callback(struct rcu_head *node)
+{
+ struct rcu_drain *event = (struct rcu_drain *)node;
+ qemu_event_set(&event->drain_complete_event);
+}
+
+/*
+ * This function ensures that all pending RCU callbacks
+ * on the current thread are done executing
+
+ * drops big qemu lock during the wait to allow RCU thread
+ * to process the callbacks
+ *
+ */
+
+void drain_call_rcu(void)
+{
+ struct rcu_drain rcu_drain;
+ bool locked = qemu_mutex_iothread_locked();
+
+ memset(&rcu_drain, 0, sizeof(struct rcu_drain));
+ qemu_event_init(&rcu_drain.drain_complete_event, false);
+
+ if (locked) {
+ qemu_mutex_unlock_iothread();
+ }
+
+
+ /*
+ * RCU callbacks are invoked in the same order as in which they
+ * are registered, thus we can be sure that when 'drain_rcu_callback'
+ * is called, all RCU callbacks that were registered on this thread
+ * prior to calling this function are completed.
+ *
+ * Note that since we have only one global queue of the RCU callbacks,
+ * we also end up waiting for most of RCU callbacks that were registered
+ * on the other threads, but this is a side effect that shouldn't be
+ * assumed.
+ */
+
+ qatomic_inc(&in_drain_call_rcu);
+ call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
+ qemu_event_wait(&rcu_drain.drain_complete_event);
+ qatomic_dec(&in_drain_call_rcu);
+
+ if (locked) {
+ qemu_mutex_lock_iothread();
+ }
+
+}
+
void rcu_register_thread(void)
{
- assert(rcu_reader.ctr == 0);
+ assert(get_ptr_rcu_reader()->ctr == 0);
qemu_mutex_lock(&rcu_registry_lock);
- QLIST_INSERT_HEAD(®istry, &rcu_reader, node);
+ QLIST_INSERT_HEAD(®istry, get_ptr_rcu_reader(), node);
qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_unregister_thread(void)
{
qemu_mutex_lock(&rcu_registry_lock);
- QLIST_REMOVE(&rcu_reader, node);
+ QLIST_REMOVE(get_ptr_rcu_reader(), node);
+ qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_add_force_rcu_notifier(Notifier *n)
+{
+ qemu_mutex_lock(&rcu_registry_lock);
+ notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n);
+ qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_remove_force_rcu_notifier(Notifier *n)
+{
+ qemu_mutex_lock(&rcu_registry_lock);
+ notifier_remove(n);
qemu_mutex_unlock(&rcu_registry_lock);
}
static void __attribute__((__constructor__)) rcu_init(void)
{
+ smp_mb_global_init();
#ifdef CONFIG_POSIX
pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_child);
#endif