+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jan Friesse <jfriesse@redhat.com>
-Date: Tue, 24 Apr 2018 17:44:48 +0200
-Subject: [PATCH] cpg: Inform clients about left nodes during pause
-
-Patch tries to fix incorrect behaviour during following test-case:
-- 3 nodes
-- Node 1 is paused
-- Node 2 and 3 detects node 1 as failed and informs CPG clients
-- Node 1 is unpaused
-- Node 1 clients are informed about new membership, but not about Node 1
- being paused, so from Node 1 point-of-view, Node 2 and 3 failure
-
-Solution is to:
-- Remove downlist master choose and always choose local node downlist.
- For Node 1 in example above, downlist contains Node 2 and 3.
-- Keep code which informs clients about left nodes
-- Use joinlist as a authoritative source of nodes/clients which exists
- in membership
-
-This patch doesn't break backwards compatibility.
-
-I've walked thru all the patches which changed behavior of cpg to ensure
-patch does not break CPG behavior. Most important were:
-- 058f50314cd20abe67f5e8fb3c029a63b0e10cdc - Base. Code was significantly
- changed to handle double free by split group_info into two structures
- cpg_pd (local node clients) and process_info (all clients). Joinlist
- was
-- 97c28ea756cdf59316b2f609103122cc678329bd - This patch removed
- confchg_fn and made CPG sync correct
-- feff0e8542463773207a3b2c1f6004afba1f58d5 - I've tested described
- behavior without any issues
-- 6bbbfcb6b4af72cf35ab9fdb4412fa6c6bdacc12 - Added idea of using
- heuristics to choose same downlist on all nodes. Sadly this idea
- was beginning of the problems described in
- 040fda8872a4a20340d73fa1c240b86afb2489f8,
- ac1d79ea7c14997353427e962865781d0836d9fa,
- 559d4083ed8355fe83f275e53b9c8f52a91694b2,
- 02c5dffa5bb8579c223006fa1587de9ba7409a3d,
- 64d0e5ace025cc929e42896c5d6beb3ef75b8244 and
- b55f32fe2e1538db33a1ec584b67744c724328c6
-- 02c5dffa5bb8579c223006fa1587de9ba7409a3d - Made joinlist as
- authoritative source of nodes/clients but left downlist_master_choose
- as a source of information about left nodes
-
-Long story made short. This patch basically reverts
-idea of using heuristics to choose same downlist on all nodes.
-
-Signed-off-by: Jan Friesse <jfriesse@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- exec/cpg.c | 164 ++++-------------------------------------------------
- 1 file changed, 11 insertions(+), 153 deletions(-)
-
-diff --git a/exec/cpg.c b/exec/cpg.c
-index 78ac1e9e..b851cba3 100644
---- a/exec/cpg.c
-+++ b/exec/cpg.c
-@@ -139,13 +139,6 @@ enum cpg_sync_state {
- CPGSYNC_JOINLIST
- };
-
--enum cpg_downlist_state_e {
-- CPG_DOWNLIST_NONE,
-- CPG_DOWNLIST_WAITING_FOR_MESSAGES,
-- CPG_DOWNLIST_APPLYING,
--};
--static enum cpg_downlist_state_e downlist_state;
--static struct list_head downlist_messages_head;
- static struct list_head joinlist_messages_head;
-
- struct cpg_pd {
-@@ -295,9 +288,7 @@ static int cpg_exec_send_downlist(void);
-
- static int cpg_exec_send_joinlist(void);
-
--static void downlist_messages_delete (void);
--
--static void downlist_master_choose_and_send (void);
-+static void downlist_inform_clients (void);
-
- static void joinlist_inform_clients (void);
-
-@@ -499,14 +490,6 @@ struct req_exec_cpg_downlist {
- mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
- };
-
--struct downlist_msg {
-- mar_uint32_t sender_nodeid;
-- mar_uint32_t old_members __attribute__((aligned(8)));
-- mar_uint32_t left_nodes __attribute__((aligned(8)));
-- mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
-- struct list_head list;
--};
--
- struct joinlist_msg {
- mar_uint32_t sender_nodeid;
- uint32_t pid;
-@@ -566,8 +549,6 @@ static void cpg_sync_init (
- last_sync_ring_id.nodeid = ring_id->rep.nodeid;
- last_sync_ring_id.seq = ring_id->seq;
-
-- downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES;
--
- entries = 0;
- /*
- * Determine list of nodeids for downlist message
-@@ -611,14 +592,10 @@ static void cpg_sync_activate (void)
- my_member_list_entries * sizeof (unsigned int));
- my_old_member_list_entries = my_member_list_entries;
-
-- if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
-- downlist_master_choose_and_send ();
-- }
-+ downlist_inform_clients ();
-
- joinlist_inform_clients ();
-
-- downlist_messages_delete ();
-- downlist_state = CPG_DOWNLIST_NONE;
- joinlist_messages_delete ();
-
- notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
-@@ -626,8 +603,7 @@ static void cpg_sync_activate (void)
-
- static void cpg_sync_abort (void)
- {
-- downlist_state = CPG_DOWNLIST_NONE;
-- downlist_messages_delete ();
-+
- joinlist_messages_delete ();
- }
-
-@@ -800,76 +776,17 @@ static int notify_lib_joinlist(
- return CS_OK;
- }
-
--static void downlist_log(const char *msg, struct downlist_msg* dl)
-+static void downlist_log(const char *msg, struct req_exec_cpg_downlist *dl)
- {
- log_printf (LOG_DEBUG,
-- "%s: sender %s; members(old:%d left:%d)",
-+ "%s: members(old:%d left:%d)",
- msg,
-- api->totem_ifaces_print(dl->sender_nodeid),
- dl->old_members,
- dl->left_nodes);
- }
-
--static struct downlist_msg* downlist_master_choose (void)
-+static void downlist_inform_clients (void)
- {
-- struct downlist_msg *cmp;
-- struct downlist_msg *best = NULL;
-- struct list_head *iter;
-- uint32_t cmp_members;
-- uint32_t best_members;
-- uint32_t i;
-- int ignore_msg;
--
-- for (iter = downlist_messages_head.next;
-- iter != &downlist_messages_head;
-- iter = iter->next) {
--
-- cmp = list_entry(iter, struct downlist_msg, list);
-- downlist_log("comparing", cmp);
--
-- ignore_msg = 0;
-- for (i = 0; i < cmp->left_nodes; i++) {
-- if (cmp->nodeids[i] == api->totem_nodeid_get()) {
-- log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n");
--
-- ignore_msg = 1;
-- break;
-- }
-- }
--
-- if (ignore_msg) {
-- continue ;
-- }
--
-- if (best == NULL) {
-- best = cmp;
-- continue;
-- }
--
-- best_members = best->old_members - best->left_nodes;
-- cmp_members = cmp->old_members - cmp->left_nodes;
--
-- if (cmp_members > best_members) {
-- best = cmp;
-- } else if (cmp_members == best_members) {
-- if (cmp->old_members > best->old_members) {
-- best = cmp;
-- } else if (cmp->old_members == best->old_members) {
-- if (cmp->sender_nodeid < best->sender_nodeid) {
-- best = cmp;
-- }
-- }
-- }
-- }
--
-- assert (best != NULL);
--
-- return best;
--}
--
--static void downlist_master_choose_and_send (void)
--{
-- struct downlist_msg *stored_msg;
- struct list_head *iter;
- struct process_info *left_pi;
- qb_map_t *group_map;
-@@ -884,14 +801,7 @@ static void downlist_master_choose_and_send (void)
- qb_map_iter_t *miter;
- int i, size;
-
-- downlist_state = CPG_DOWNLIST_APPLYING;
--
-- stored_msg = downlist_master_choose ();
-- if (!stored_msg) {
-- log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist");
-- return;
-- }
-- downlist_log("chosen downlist", stored_msg);
-+ downlist_log("my downlist", &g_req_exec_cpg_downlist);
-
- group_map = qb_skiplist_create();
-
-@@ -905,9 +815,9 @@ static void downlist_master_choose_and_send (void)
- iter = iter->next;
-
- left_pi = NULL;
-- for (i = 0; i < stored_msg->left_nodes; i++) {
-+ for (i = 0; i < g_req_exec_cpg_downlist.left_nodes; i++) {
-
-- if (pi->nodeid == stored_msg->nodeids[i]) {
-+ if (pi->nodeid == g_req_exec_cpg_downlist.nodeids[i]) {
- left_pi = pi;
- break;
- }
-@@ -1039,23 +949,6 @@ static void joinlist_inform_clients (void)
- joinlist_remove_zombie_pi_entries ();
- }
-
--static void downlist_messages_delete (void)
--{
-- struct downlist_msg *stored_msg;
-- struct list_head *iter, *iter_next;
--
-- for (iter = downlist_messages_head.next;
-- iter != &downlist_messages_head;
-- iter = iter_next) {
--
-- iter_next = iter->next;
--
-- stored_msg = list_entry(iter, struct downlist_msg, list);
-- list_del (&stored_msg->list);
-- free (stored_msg);
-- }
--}
--
- static void joinlist_messages_delete (void)
- {
- struct joinlist_msg *stored_msg;
-@@ -1076,7 +969,6 @@ static void joinlist_messages_delete (void)
-
- static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
- {
-- list_init (&downlist_messages_head);
- list_init (&joinlist_messages_head);
- api = corosync_api;
- return (NULL);
-@@ -1338,43 +1230,9 @@ static void message_handler_req_exec_cpg_downlist(
- unsigned int nodeid)
- {
- const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message;
-- int i;
-- struct list_head *iter;
-- struct downlist_msg *stored_msg;
-- int found;
--
-- if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
-- log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
-- req_exec_cpg_downlist->left_nodes, downlist_state);
-- return;
-- }
--
-- stored_msg = malloc (sizeof (struct downlist_msg));
-- stored_msg->sender_nodeid = nodeid;
-- stored_msg->old_members = req_exec_cpg_downlist->old_members;
-- stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes;
-- memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids,
-- req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t));
-- list_init (&stored_msg->list);
-- list_add (&stored_msg->list, &downlist_messages_head);
--
-- for (i = 0; i < my_member_list_entries; i++) {
-- found = 0;
-- for (iter = downlist_messages_head.next;
-- iter != &downlist_messages_head;
-- iter = iter->next) {
--
-- stored_msg = list_entry(iter, struct downlist_msg, list);
-- if (my_member_list[i] == stored_msg->sender_nodeid) {
-- found = 1;
-- }
-- }
-- if (!found) {
-- return;
-- }
-- }
-
-- downlist_master_choose_and_send ();
-+ log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received",
-+ req_exec_cpg_downlist->left_nodes);
- }
-
-