]> git.proxmox.com Git - corosync-pve.git/blob - patches/0010-cpg-Inform-clients-about-left-nodes-during-pause.patch
2e745c79bce4b9b1f9ae2571ef0fb6e330312a40
[corosync-pve.git] / patches / 0010-cpg-Inform-clients-about-left-nodes-during-pause.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Jan Friesse <jfriesse@redhat.com>
3 Date: Tue, 24 Apr 2018 17:44:48 +0200
4 Subject: [PATCH] cpg: Inform clients about left nodes during pause
5
6 Patch tries to fix incorrect behaviour during following test-case:
7 - 3 nodes
8 - Node 1 is paused
9 - Node 2 and 3 detects node 1 as failed and informs CPG clients
10 - Node 1 is unpaused
11 - Node 1 clients are informed about new membership, but not about Node 1
12 being paused, so from Node 1 point-of-view, Node 2 and 3 failure
13
14 Solution is to:
15 - Remove downlist master choose and always choose local node downlist.
16 For Node 1 in example above, downlist contains Node 2 and 3.
17 - Keep code which informs clients about left nodes
18 - Use joinlist as a authoritative source of nodes/clients which exists
19 in membership
20
21 This patch doesn't break backwards compatibility.
22
23 I've walked thru all the patches which changed behavior of cpg to ensure
24 patch does not break CPG behavior. Most important were:
25 - 058f50314cd20abe67f5e8fb3c029a63b0e10cdc - Base. Code was significantly
26 changed to handle double free by split group_info into two structures
27 cpg_pd (local node clients) and process_info (all clients). Joinlist
28 was
29 - 97c28ea756cdf59316b2f609103122cc678329bd - This patch removed
30 confchg_fn and made CPG sync correct
31 - feff0e8542463773207a3b2c1f6004afba1f58d5 - I've tested described
32 behavior without any issues
33 - 6bbbfcb6b4af72cf35ab9fdb4412fa6c6bdacc12 - Added idea of using
34 heuristics to choose same downlist on all nodes. Sadly this idea
35 was beginning of the problems described in
36 040fda8872a4a20340d73fa1c240b86afb2489f8,
37 ac1d79ea7c14997353427e962865781d0836d9fa,
38 559d4083ed8355fe83f275e53b9c8f52a91694b2,
39 02c5dffa5bb8579c223006fa1587de9ba7409a3d,
40 64d0e5ace025cc929e42896c5d6beb3ef75b8244 and
41 b55f32fe2e1538db33a1ec584b67744c724328c6
42 - 02c5dffa5bb8579c223006fa1587de9ba7409a3d - Made joinlist as
43 authoritative source of nodes/clients but left downlist_master_choose
44 as a source of information about left nodes
45
46 Long story made short. This patch basically reverts
47 idea of using heuristics to choose same downlist on all nodes.
48
49 Signed-off-by: Jan Friesse <jfriesse@redhat.com>
50 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
51 ---
52 exec/cpg.c | 164 +++++--------------------------------------------------------
53 1 file changed, 11 insertions(+), 153 deletions(-)
54
55 diff --git a/exec/cpg.c b/exec/cpg.c
56 index 78ac1e9e..b851cba3 100644
57 --- a/exec/cpg.c
58 +++ b/exec/cpg.c
59 @@ -139,13 +139,6 @@ enum cpg_sync_state {
60 CPGSYNC_JOINLIST
61 };
62
63 -enum cpg_downlist_state_e {
64 - CPG_DOWNLIST_NONE,
65 - CPG_DOWNLIST_WAITING_FOR_MESSAGES,
66 - CPG_DOWNLIST_APPLYING,
67 -};
68 -static enum cpg_downlist_state_e downlist_state;
69 -static struct list_head downlist_messages_head;
70 static struct list_head joinlist_messages_head;
71
72 struct cpg_pd {
73 @@ -295,9 +288,7 @@ static int cpg_exec_send_downlist(void);
74
75 static int cpg_exec_send_joinlist(void);
76
77 -static void downlist_messages_delete (void);
78 -
79 -static void downlist_master_choose_and_send (void);
80 +static void downlist_inform_clients (void);
81
82 static void joinlist_inform_clients (void);
83
84 @@ -499,14 +490,6 @@ struct req_exec_cpg_downlist {
85 mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
86 };
87
88 -struct downlist_msg {
89 - mar_uint32_t sender_nodeid;
90 - mar_uint32_t old_members __attribute__((aligned(8)));
91 - mar_uint32_t left_nodes __attribute__((aligned(8)));
92 - mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
93 - struct list_head list;
94 -};
95 -
96 struct joinlist_msg {
97 mar_uint32_t sender_nodeid;
98 uint32_t pid;
99 @@ -566,8 +549,6 @@ static void cpg_sync_init (
100 last_sync_ring_id.nodeid = ring_id->rep.nodeid;
101 last_sync_ring_id.seq = ring_id->seq;
102
103 - downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES;
104 -
105 entries = 0;
106 /*
107 * Determine list of nodeids for downlist message
108 @@ -611,14 +592,10 @@ static void cpg_sync_activate (void)
109 my_member_list_entries * sizeof (unsigned int));
110 my_old_member_list_entries = my_member_list_entries;
111
112 - if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
113 - downlist_master_choose_and_send ();
114 - }
115 + downlist_inform_clients ();
116
117 joinlist_inform_clients ();
118
119 - downlist_messages_delete ();
120 - downlist_state = CPG_DOWNLIST_NONE;
121 joinlist_messages_delete ();
122
123 notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
124 @@ -626,8 +603,7 @@ static void cpg_sync_activate (void)
125
126 static void cpg_sync_abort (void)
127 {
128 - downlist_state = CPG_DOWNLIST_NONE;
129 - downlist_messages_delete ();
130 +
131 joinlist_messages_delete ();
132 }
133
134 @@ -800,76 +776,17 @@ static int notify_lib_joinlist(
135 return CS_OK;
136 }
137
138 -static void downlist_log(const char *msg, struct downlist_msg* dl)
139 +static void downlist_log(const char *msg, struct req_exec_cpg_downlist *dl)
140 {
141 log_printf (LOG_DEBUG,
142 - "%s: sender %s; members(old:%d left:%d)",
143 + "%s: members(old:%d left:%d)",
144 msg,
145 - api->totem_ifaces_print(dl->sender_nodeid),
146 dl->old_members,
147 dl->left_nodes);
148 }
149
150 -static struct downlist_msg* downlist_master_choose (void)
151 +static void downlist_inform_clients (void)
152 {
153 - struct downlist_msg *cmp;
154 - struct downlist_msg *best = NULL;
155 - struct list_head *iter;
156 - uint32_t cmp_members;
157 - uint32_t best_members;
158 - uint32_t i;
159 - int ignore_msg;
160 -
161 - for (iter = downlist_messages_head.next;
162 - iter != &downlist_messages_head;
163 - iter = iter->next) {
164 -
165 - cmp = list_entry(iter, struct downlist_msg, list);
166 - downlist_log("comparing", cmp);
167 -
168 - ignore_msg = 0;
169 - for (i = 0; i < cmp->left_nodes; i++) {
170 - if (cmp->nodeids[i] == api->totem_nodeid_get()) {
171 - log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n");
172 -
173 - ignore_msg = 1;
174 - break;
175 - }
176 - }
177 -
178 - if (ignore_msg) {
179 - continue ;
180 - }
181 -
182 - if (best == NULL) {
183 - best = cmp;
184 - continue;
185 - }
186 -
187 - best_members = best->old_members - best->left_nodes;
188 - cmp_members = cmp->old_members - cmp->left_nodes;
189 -
190 - if (cmp_members > best_members) {
191 - best = cmp;
192 - } else if (cmp_members == best_members) {
193 - if (cmp->old_members > best->old_members) {
194 - best = cmp;
195 - } else if (cmp->old_members == best->old_members) {
196 - if (cmp->sender_nodeid < best->sender_nodeid) {
197 - best = cmp;
198 - }
199 - }
200 - }
201 - }
202 -
203 - assert (best != NULL);
204 -
205 - return best;
206 -}
207 -
208 -static void downlist_master_choose_and_send (void)
209 -{
210 - struct downlist_msg *stored_msg;
211 struct list_head *iter;
212 struct process_info *left_pi;
213 qb_map_t *group_map;
214 @@ -884,14 +801,7 @@ static void downlist_master_choose_and_send (void)
215 qb_map_iter_t *miter;
216 int i, size;
217
218 - downlist_state = CPG_DOWNLIST_APPLYING;
219 -
220 - stored_msg = downlist_master_choose ();
221 - if (!stored_msg) {
222 - log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist");
223 - return;
224 - }
225 - downlist_log("chosen downlist", stored_msg);
226 + downlist_log("my downlist", &g_req_exec_cpg_downlist);
227
228 group_map = qb_skiplist_create();
229
230 @@ -905,9 +815,9 @@ static void downlist_master_choose_and_send (void)
231 iter = iter->next;
232
233 left_pi = NULL;
234 - for (i = 0; i < stored_msg->left_nodes; i++) {
235 + for (i = 0; i < g_req_exec_cpg_downlist.left_nodes; i++) {
236
237 - if (pi->nodeid == stored_msg->nodeids[i]) {
238 + if (pi->nodeid == g_req_exec_cpg_downlist.nodeids[i]) {
239 left_pi = pi;
240 break;
241 }
242 @@ -1039,23 +949,6 @@ static void joinlist_inform_clients (void)
243 joinlist_remove_zombie_pi_entries ();
244 }
245
246 -static void downlist_messages_delete (void)
247 -{
248 - struct downlist_msg *stored_msg;
249 - struct list_head *iter, *iter_next;
250 -
251 - for (iter = downlist_messages_head.next;
252 - iter != &downlist_messages_head;
253 - iter = iter_next) {
254 -
255 - iter_next = iter->next;
256 -
257 - stored_msg = list_entry(iter, struct downlist_msg, list);
258 - list_del (&stored_msg->list);
259 - free (stored_msg);
260 - }
261 -}
262 -
263 static void joinlist_messages_delete (void)
264 {
265 struct joinlist_msg *stored_msg;
266 @@ -1076,7 +969,6 @@ static void joinlist_messages_delete (void)
267
268 static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
269 {
270 - list_init (&downlist_messages_head);
271 list_init (&joinlist_messages_head);
272 api = corosync_api;
273 return (NULL);
274 @@ -1338,43 +1230,9 @@ static void message_handler_req_exec_cpg_downlist(
275 unsigned int nodeid)
276 {
277 const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message;
278 - int i;
279 - struct list_head *iter;
280 - struct downlist_msg *stored_msg;
281 - int found;
282
283 - if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
284 - log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
285 - req_exec_cpg_downlist->left_nodes, downlist_state);
286 - return;
287 - }
288 -
289 - stored_msg = malloc (sizeof (struct downlist_msg));
290 - stored_msg->sender_nodeid = nodeid;
291 - stored_msg->old_members = req_exec_cpg_downlist->old_members;
292 - stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes;
293 - memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids,
294 - req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t));
295 - list_init (&stored_msg->list);
296 - list_add (&stored_msg->list, &downlist_messages_head);
297 -
298 - for (i = 0; i < my_member_list_entries; i++) {
299 - found = 0;
300 - for (iter = downlist_messages_head.next;
301 - iter != &downlist_messages_head;
302 - iter = iter->next) {
303 -
304 - stored_msg = list_entry(iter, struct downlist_msg, list);
305 - if (my_member_list[i] == stored_msg->sender_nodeid) {
306 - found = 1;
307 - }
308 - }
309 - if (!found) {
310 - return;
311 - }
312 - }
313 -
314 - downlist_master_choose_and_send ();
315 + log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received",
316 + req_exec_cpg_downlist->left_nodes);
317 }
318
319
320 --
321 2.14.2
322