From 9dd866204c76fab850d5939018e7d5aa79b1df3c Mon Sep 17 00:00:00 2001
From: =?utf8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Wed, 30 Sep 2020 13:21:31 +0200
Subject: [PATCH] pmxcfs: protect CPG operations with mutex
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

cpg_mcast_joined (and transitively, cpg_join/leave) are not thread-safe.
pmxcfs triggers such operations via FUSE and CPG dispatch callbacks,
which are running in concurrent threads.

accordingly, we need to protect these operations with a mutex, otherwise
they might return CS_OK without actually doing what they were supposed
to do (which in turn can lead to the dfsm taking a wrong turn and
getting stuck in a supposedly short-lived state, blocking access via
FUSE and getting whole clusters fenced).

huge thanks to Alexandre Derumier for providing the initial bug report
and quite a lot of test runs while debugging this issue.

Signed-off-by: Fabian GrÃ¼nbichler <f.gruenbichler@proxmox.com>
---
 data/src/dfsm.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/data/src/dfsm.c b/data/src/dfsm.c
index 172d877..17a3ba4 100644
--- a/data/src/dfsm.c
+++ b/data/src/dfsm.c
@@ -107,6 +107,7 @@ struct dfsm {
 	cpg_callbacks_t *cpg_callbacks;
 	dfsm_callbacks_t *dfsm_callbacks;
 	cpg_handle_t cpg_handle;
+	GMutex cpg_mutex;
 	struct cpg_name cpg_group_name;
 	uint32_t nodeid;
 	uint32_t pid;
@@ -204,7 +205,9 @@ dfsm_send_message_full(
 	cs_error_t result;
 	int retries = 0;
 loop:
+	g_mutex_lock (&dfsm->cpg_mutex);
 	result = cpg_mcast_joined(dfsm->cpg_handle, CPG_TYPE_AGREED, iov, len);
+	g_mutex_unlock (&dfsm->cpg_mutex);
 	if (retry && result == CS_ERR_TRY_AGAIN) {
 		nanosleep(&tvreq, NULL);
 		++retries;
@@ -1250,7 +1253,9 @@ dfsm_new(
 
 	if (!(dfsm->msg_queue = g_sequence_new(NULL))) 
 		goto err;
-		
+
+	g_mutex_init(&dfsm->cpg_mutex);
+
 	dfsm->log_domain = log_domain;
 	dfsm->data = data;
 	dfsm->mode = DFSM_MODE_START;
@@ -1424,7 +1429,9 @@ dfsm_join(dfsm_t *dfsm)
 	struct timespec tvreq = { .tv_sec = 0, .tv_nsec = 100000000 };
 	int retries = 0;
 loop:
+	g_mutex_lock (&dfsm->cpg_mutex);
 	result = cpg_join(dfsm->cpg_handle, &dfsm->cpg_group_name); 
+	g_mutex_unlock (&dfsm->cpg_mutex);
 	if (result == CS_ERR_TRY_AGAIN) {
 		nanosleep(&tvreq, NULL);
 		++retries;
@@ -1453,7 +1460,9 @@ dfsm_leave (dfsm_t *dfsm)
 	struct timespec tvreq = { .tv_sec = 0, .tv_nsec = 100000000 };
 	int retries = 0;
 loop:
+	g_mutex_lock (&dfsm->cpg_mutex);
 	result = cpg_leave(dfsm->cpg_handle, &dfsm->cpg_group_name);
+	g_mutex_unlock (&dfsm->cpg_mutex);
 	if (result == CS_ERR_TRY_AGAIN) {
 		nanosleep(&tvreq, NULL);
 		++retries;
@@ -1509,6 +1518,8 @@ dfsm_destroy(dfsm_t *dfsm)
 	g_mutex_clear (&dfsm->sync_mutex);
 
 	g_cond_clear (&dfsm->sync_cond);
+
+	g_mutex_clear (&dfsm->cpg_mutex);
  
 	if (dfsm->results)
 		g_hash_table_destroy(dfsm->results);
-- 
2.39.2