]>
Commit | Line | Data |
---|---|---|
6a2849ec TL |
1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
2 | From: Jan Friesse <jfriesse@redhat.com> | |
3 | Date: Tue, 24 Apr 2018 17:44:48 +0200 | |
4 | Subject: [PATCH] cpg: Inform clients about left nodes during pause | |
5 | ||
6 | Patch tries to fix incorrect behaviour during following test-case: | |
7 | - 3 nodes | |
8 | - Node 1 is paused | |
9 | - Node 2 and 3 detects node 1 as failed and informs CPG clients | |
10 | - Node 1 is unpaused | |
11 | - Node 1 clients are informed about new membership, but not about Node 1 | |
12 | being paused, so from Node 1 point-of-view, Node 2 and 3 failure | |
13 | ||
14 | Solution is to: | |
15 | - Remove downlist master choose and always choose local node downlist. | |
16 | For Node 1 in example above, downlist contains Node 2 and 3. | |
17 | - Keep code which informs clients about left nodes | |
18 | - Use joinlist as a authoritative source of nodes/clients which exists | |
19 | in membership | |
20 | ||
21 | This patch doesn't break backwards compatibility. | |
22 | ||
23 | I've walked thru all the patches which changed behavior of cpg to ensure | |
24 | patch does not break CPG behavior. Most important were: | |
25 | - 058f50314cd20abe67f5e8fb3c029a63b0e10cdc - Base. Code was significantly | |
26 | changed to handle double free by split group_info into two structures | |
27 | cpg_pd (local node clients) and process_info (all clients). Joinlist | |
28 | was | |
29 | - 97c28ea756cdf59316b2f609103122cc678329bd - This patch removed | |
30 | confchg_fn and made CPG sync correct | |
31 | - feff0e8542463773207a3b2c1f6004afba1f58d5 - I've tested described | |
32 | behavior without any issues | |
33 | - 6bbbfcb6b4af72cf35ab9fdb4412fa6c6bdacc12 - Added idea of using | |
34 | heuristics to choose same downlist on all nodes. Sadly this idea | |
35 | was beginning of the problems described in | |
36 | 040fda8872a4a20340d73fa1c240b86afb2489f8, | |
37 | ac1d79ea7c14997353427e962865781d0836d9fa, | |
38 | 559d4083ed8355fe83f275e53b9c8f52a91694b2, | |
39 | 02c5dffa5bb8579c223006fa1587de9ba7409a3d, | |
40 | 64d0e5ace025cc929e42896c5d6beb3ef75b8244 and | |
41 | b55f32fe2e1538db33a1ec584b67744c724328c6 | |
42 | - 02c5dffa5bb8579c223006fa1587de9ba7409a3d - Made joinlist as | |
43 | authoritative source of nodes/clients but left downlist_master_choose | |
44 | as a source of information about left nodes | |
45 | ||
46 | Long story made short. This patch basically reverts | |
47 | idea of using heuristics to choose same downlist on all nodes. | |
48 | ||
49 | Signed-off-by: Jan Friesse <jfriesse@redhat.com> | |
50 | Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> | |
51 | --- | |
e77cc04e | 52 | exec/cpg.c | 164 ++++------------------------------------------------- |
6a2849ec TL |
53 | 1 file changed, 11 insertions(+), 153 deletions(-) |
54 | ||
55 | diff --git a/exec/cpg.c b/exec/cpg.c | |
56 | index 78ac1e9e..b851cba3 100644 | |
57 | --- a/exec/cpg.c | |
58 | +++ b/exec/cpg.c | |
59 | @@ -139,13 +139,6 @@ enum cpg_sync_state { | |
60 | CPGSYNC_JOINLIST | |
61 | }; | |
62 | ||
63 | -enum cpg_downlist_state_e { | |
64 | - CPG_DOWNLIST_NONE, | |
65 | - CPG_DOWNLIST_WAITING_FOR_MESSAGES, | |
66 | - CPG_DOWNLIST_APPLYING, | |
67 | -}; | |
68 | -static enum cpg_downlist_state_e downlist_state; | |
69 | -static struct list_head downlist_messages_head; | |
70 | static struct list_head joinlist_messages_head; | |
71 | ||
72 | struct cpg_pd { | |
73 | @@ -295,9 +288,7 @@ static int cpg_exec_send_downlist(void); | |
74 | ||
75 | static int cpg_exec_send_joinlist(void); | |
76 | ||
77 | -static void downlist_messages_delete (void); | |
78 | - | |
79 | -static void downlist_master_choose_and_send (void); | |
80 | +static void downlist_inform_clients (void); | |
81 | ||
82 | static void joinlist_inform_clients (void); | |
83 | ||
84 | @@ -499,14 +490,6 @@ struct req_exec_cpg_downlist { | |
85 | mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); | |
86 | }; | |
87 | ||
88 | -struct downlist_msg { | |
89 | - mar_uint32_t sender_nodeid; | |
90 | - mar_uint32_t old_members __attribute__((aligned(8))); | |
91 | - mar_uint32_t left_nodes __attribute__((aligned(8))); | |
92 | - mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); | |
93 | - struct list_head list; | |
94 | -}; | |
95 | - | |
96 | struct joinlist_msg { | |
97 | mar_uint32_t sender_nodeid; | |
98 | uint32_t pid; | |
99 | @@ -566,8 +549,6 @@ static void cpg_sync_init ( | |
100 | last_sync_ring_id.nodeid = ring_id->rep.nodeid; | |
101 | last_sync_ring_id.seq = ring_id->seq; | |
102 | ||
103 | - downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES; | |
104 | - | |
105 | entries = 0; | |
106 | /* | |
107 | * Determine list of nodeids for downlist message | |
108 | @@ -611,14 +592,10 @@ static void cpg_sync_activate (void) | |
109 | my_member_list_entries * sizeof (unsigned int)); | |
110 | my_old_member_list_entries = my_member_list_entries; | |
111 | ||
112 | - if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) { | |
113 | - downlist_master_choose_and_send (); | |
114 | - } | |
115 | + downlist_inform_clients (); | |
116 | ||
117 | joinlist_inform_clients (); | |
118 | ||
119 | - downlist_messages_delete (); | |
120 | - downlist_state = CPG_DOWNLIST_NONE; | |
121 | joinlist_messages_delete (); | |
122 | ||
123 | notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); | |
124 | @@ -626,8 +603,7 @@ static void cpg_sync_activate (void) | |
125 | ||
126 | static void cpg_sync_abort (void) | |
127 | { | |
128 | - downlist_state = CPG_DOWNLIST_NONE; | |
129 | - downlist_messages_delete (); | |
130 | + | |
131 | joinlist_messages_delete (); | |
132 | } | |
133 | ||
134 | @@ -800,76 +776,17 @@ static int notify_lib_joinlist( | |
135 | return CS_OK; | |
136 | } | |
137 | ||
138 | -static void downlist_log(const char *msg, struct downlist_msg* dl) | |
139 | +static void downlist_log(const char *msg, struct req_exec_cpg_downlist *dl) | |
140 | { | |
141 | log_printf (LOG_DEBUG, | |
142 | - "%s: sender %s; members(old:%d left:%d)", | |
143 | + "%s: members(old:%d left:%d)", | |
144 | msg, | |
145 | - api->totem_ifaces_print(dl->sender_nodeid), | |
146 | dl->old_members, | |
147 | dl->left_nodes); | |
148 | } | |
149 | ||
150 | -static struct downlist_msg* downlist_master_choose (void) | |
151 | +static void downlist_inform_clients (void) | |
152 | { | |
153 | - struct downlist_msg *cmp; | |
154 | - struct downlist_msg *best = NULL; | |
155 | - struct list_head *iter; | |
156 | - uint32_t cmp_members; | |
157 | - uint32_t best_members; | |
158 | - uint32_t i; | |
159 | - int ignore_msg; | |
160 | - | |
161 | - for (iter = downlist_messages_head.next; | |
162 | - iter != &downlist_messages_head; | |
163 | - iter = iter->next) { | |
164 | - | |
165 | - cmp = list_entry(iter, struct downlist_msg, list); | |
166 | - downlist_log("comparing", cmp); | |
167 | - | |
168 | - ignore_msg = 0; | |
169 | - for (i = 0; i < cmp->left_nodes; i++) { | |
170 | - if (cmp->nodeids[i] == api->totem_nodeid_get()) { | |
171 | - log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n"); | |
172 | - | |
173 | - ignore_msg = 1; | |
174 | - break; | |
175 | - } | |
176 | - } | |
177 | - | |
178 | - if (ignore_msg) { | |
179 | - continue ; | |
180 | - } | |
181 | - | |
182 | - if (best == NULL) { | |
183 | - best = cmp; | |
184 | - continue; | |
185 | - } | |
186 | - | |
187 | - best_members = best->old_members - best->left_nodes; | |
188 | - cmp_members = cmp->old_members - cmp->left_nodes; | |
189 | - | |
190 | - if (cmp_members > best_members) { | |
191 | - best = cmp; | |
192 | - } else if (cmp_members == best_members) { | |
193 | - if (cmp->old_members > best->old_members) { | |
194 | - best = cmp; | |
195 | - } else if (cmp->old_members == best->old_members) { | |
196 | - if (cmp->sender_nodeid < best->sender_nodeid) { | |
197 | - best = cmp; | |
198 | - } | |
199 | - } | |
200 | - } | |
201 | - } | |
202 | - | |
203 | - assert (best != NULL); | |
204 | - | |
205 | - return best; | |
206 | -} | |
207 | - | |
208 | -static void downlist_master_choose_and_send (void) | |
209 | -{ | |
210 | - struct downlist_msg *stored_msg; | |
211 | struct list_head *iter; | |
212 | struct process_info *left_pi; | |
213 | qb_map_t *group_map; | |
214 | @@ -884,14 +801,7 @@ static void downlist_master_choose_and_send (void) | |
215 | qb_map_iter_t *miter; | |
216 | int i, size; | |
217 | ||
218 | - downlist_state = CPG_DOWNLIST_APPLYING; | |
219 | - | |
220 | - stored_msg = downlist_master_choose (); | |
221 | - if (!stored_msg) { | |
222 | - log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist"); | |
223 | - return; | |
224 | - } | |
225 | - downlist_log("chosen downlist", stored_msg); | |
226 | + downlist_log("my downlist", &g_req_exec_cpg_downlist); | |
227 | ||
228 | group_map = qb_skiplist_create(); | |
229 | ||
230 | @@ -905,9 +815,9 @@ static void downlist_master_choose_and_send (void) | |
231 | iter = iter->next; | |
232 | ||
233 | left_pi = NULL; | |
234 | - for (i = 0; i < stored_msg->left_nodes; i++) { | |
235 | + for (i = 0; i < g_req_exec_cpg_downlist.left_nodes; i++) { | |
236 | ||
237 | - if (pi->nodeid == stored_msg->nodeids[i]) { | |
238 | + if (pi->nodeid == g_req_exec_cpg_downlist.nodeids[i]) { | |
239 | left_pi = pi; | |
240 | break; | |
241 | } | |
242 | @@ -1039,23 +949,6 @@ static void joinlist_inform_clients (void) | |
243 | joinlist_remove_zombie_pi_entries (); | |
244 | } | |
245 | ||
246 | -static void downlist_messages_delete (void) | |
247 | -{ | |
248 | - struct downlist_msg *stored_msg; | |
249 | - struct list_head *iter, *iter_next; | |
250 | - | |
251 | - for (iter = downlist_messages_head.next; | |
252 | - iter != &downlist_messages_head; | |
253 | - iter = iter_next) { | |
254 | - | |
255 | - iter_next = iter->next; | |
256 | - | |
257 | - stored_msg = list_entry(iter, struct downlist_msg, list); | |
258 | - list_del (&stored_msg->list); | |
259 | - free (stored_msg); | |
260 | - } | |
261 | -} | |
262 | - | |
263 | static void joinlist_messages_delete (void) | |
264 | { | |
265 | struct joinlist_msg *stored_msg; | |
266 | @@ -1076,7 +969,6 @@ static void joinlist_messages_delete (void) | |
267 | ||
268 | static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) | |
269 | { | |
270 | - list_init (&downlist_messages_head); | |
271 | list_init (&joinlist_messages_head); | |
272 | api = corosync_api; | |
273 | return (NULL); | |
274 | @@ -1338,43 +1230,9 @@ static void message_handler_req_exec_cpg_downlist( | |
275 | unsigned int nodeid) | |
276 | { | |
277 | const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message; | |
278 | - int i; | |
279 | - struct list_head *iter; | |
280 | - struct downlist_msg *stored_msg; | |
281 | - int found; | |
e77cc04e | 282 | - |
6a2849ec TL |
283 | - if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) { |
284 | - log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d", | |
285 | - req_exec_cpg_downlist->left_nodes, downlist_state); | |
286 | - return; | |
287 | - } | |
288 | - | |
289 | - stored_msg = malloc (sizeof (struct downlist_msg)); | |
290 | - stored_msg->sender_nodeid = nodeid; | |
291 | - stored_msg->old_members = req_exec_cpg_downlist->old_members; | |
292 | - stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes; | |
293 | - memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids, | |
294 | - req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t)); | |
295 | - list_init (&stored_msg->list); | |
296 | - list_add (&stored_msg->list, &downlist_messages_head); | |
297 | - | |
298 | - for (i = 0; i < my_member_list_entries; i++) { | |
299 | - found = 0; | |
300 | - for (iter = downlist_messages_head.next; | |
301 | - iter != &downlist_messages_head; | |
302 | - iter = iter->next) { | |
303 | - | |
304 | - stored_msg = list_entry(iter, struct downlist_msg, list); | |
305 | - if (my_member_list[i] == stored_msg->sender_nodeid) { | |
306 | - found = 1; | |
307 | - } | |
308 | - } | |
309 | - if (!found) { | |
310 | - return; | |
311 | - } | |
312 | - } | |
e77cc04e | 313 | |
6a2849ec TL |
314 | - downlist_master_choose_and_send (); |
315 | + log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received", | |
316 | + req_exec_cpg_downlist->left_nodes); | |
317 | } | |
318 | ||
319 |