]> git.proxmox.com Git - mirror_corosync-qdevice.git/blob - qdevices/qnetd-algo-ffsplit.c
qnetd: Use log-common for nodelist debug dump
[mirror_corosync-qdevice.git] / qdevices / qnetd-algo-ffsplit.c
1 /*
2 * Copyright (c) 2015-2019 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Author: Jan Friesse (jfriesse@redhat.com)
7 *
8 * This software licensed under BSD license, the text of which follows:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <sys/types.h>
36
37 #include <string.h>
38
39 #include "log.h"
40 #include "log-common.h"
41 #include "qnetd-algo-ffsplit.h"
42 #include "qnetd-log-debug.h"
43 #include "qnetd-cluster-list.h"
44 #include "qnetd-cluster.h"
45 #include "qnetd-client-send.h"
46
47 enum qnetd_algo_ffsplit_cluster_state {
48 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE,
49 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP,
50 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS,
51 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS,
52 };
53
54 struct qnetd_algo_ffsplit_cluster_data {
55 enum qnetd_algo_ffsplit_cluster_state cluster_state;
56 const struct node_list *quorate_partition_node_list;
57 };
58
59 enum qnetd_algo_ffsplit_client_state {
60 QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE,
61 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK,
62 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK,
63 };
64
65 struct qnetd_algo_ffsplit_client_data {
66 enum qnetd_algo_ffsplit_client_state client_state;
67 uint32_t vote_info_expected_seq_num;
68 };
69
70 enum tlv_reply_error_code
71 qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
72 {
73 struct qnetd_algo_ffsplit_cluster_data *cluster_data;
74 struct qnetd_algo_ffsplit_client_data *client_data;
75
76 if (qnetd_cluster_size(client->cluster) == 1) {
77 cluster_data = malloc(sizeof(*cluster_data));
78 if (cluster_data == NULL) {
79 log(LOG_ERR, "ffsplit: Can't initialize cluster data for client %s",
80 client->addr_str);
81
82 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
83 }
84 memset(cluster_data, 0, sizeof(*cluster_data));
85 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
86 cluster_data->quorate_partition_node_list = NULL;
87
88 client->cluster->algorithm_data = cluster_data;
89 }
90
91 client_data = malloc(sizeof(*client_data));
92 if (client_data == NULL) {
93 log(LOG_ERR, "ffsplit: Can't initialize node data for client %s",
94 client->addr_str);
95
96 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
97 }
98 memset(client_data, 0, sizeof(*client_data));
99 client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
100 client->algorithm_data = client_data;
101
102 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
103 }
104
105 static int
106 qnetd_algo_ffsplit_is_preferred_partition(const struct qnetd_client *client,
107 const struct node_list *config_node_list, const struct node_list *membership_node_list)
108 {
109 uint32_t preferred_node_id;
110 struct node_list_entry *node_entry;
111 int case_processed;
112
113 preferred_node_id = 0;
114 case_processed = 0;
115
116 switch (client->tie_breaker.mode) {
117 case TLV_TIE_BREAKER_MODE_LOWEST:
118 node_entry = TAILQ_FIRST(config_node_list);
119
120 preferred_node_id = node_entry->node_id;
121
122 TAILQ_FOREACH(node_entry, config_node_list, entries) {
123 if (node_entry->node_id < preferred_node_id) {
124 preferred_node_id = node_entry->node_id;
125 }
126 }
127 case_processed = 1;
128 break;
129 case TLV_TIE_BREAKER_MODE_HIGHEST:
130 node_entry = TAILQ_FIRST(config_node_list);
131
132 preferred_node_id = node_entry->node_id;
133
134 TAILQ_FOREACH(node_entry, config_node_list, entries) {
135 if (node_entry->node_id > preferred_node_id) {
136 preferred_node_id = node_entry->node_id;
137 }
138 }
139 case_processed = 1;
140 break;
141 case TLV_TIE_BREAKER_MODE_NODE_ID:
142 preferred_node_id = client->tie_breaker.node_id;
143 case_processed = 1;
144 break;
145 }
146
147 if (!case_processed) {
148 log(LOG_CRIT, "qnetd_algo_ffsplit_is_preferred_partition unprocessed "
149 "tie_breaker.mode");
150 exit(1);
151 }
152
153 return (node_list_find_node_id(membership_node_list, preferred_node_id) != NULL);
154 }
155
156 static int
157 qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client *client, int client_leaving,
158 const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
159 const struct node_list *membership_node_list)
160 {
161 const struct qnetd_client *iter_client1, *iter_client2;
162 const struct node_list *config_node_list1, *config_node_list2;
163 const struct node_list *membership_node_list1, *membership_node_list2;
164 const struct node_list_entry *iter_node1, *iter_node2;
165 const struct node_list_entry *iter_node3, *iter_node4;
166 const struct tlv_ring_id *ring_id1, *ring_id2;
167
168 /*
169 * Test if all active clients share same config list.
170 */
171 TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
172 TAILQ_FOREACH(iter_client2, &client->cluster->client_list, cluster_entries) {
173 if (iter_client1 == iter_client2) {
174 continue;
175 }
176
177 if (iter_client1->node_id == client->node_id) {
178 if (client_leaving) {
179 continue;
180 }
181
182 config_node_list1 = config_node_list;
183 } else {
184 config_node_list1 = &iter_client1->configuration_node_list;
185 }
186
187 if (iter_client2->node_id == client->node_id) {
188 if (client_leaving) {
189 continue;
190 }
191
192 config_node_list2 = config_node_list;
193 } else {
194 config_node_list2 = &iter_client2->configuration_node_list;
195 }
196
197 /*
198 * Walk thru all node ids in given config node list...
199 */
200 TAILQ_FOREACH(iter_node1, config_node_list1, entries) {
201 /*
202 * ... and try to find given node id in other list
203 */
204 iter_node2 = node_list_find_node_id(config_node_list2, iter_node1->node_id);
205
206 if (iter_node2 == NULL) {
207 /*
208 * Node with iter_node1->node_id was not found in
209 * config_node_list2 -> lists doesn't match
210 */
211 return (0);
212 }
213 }
214 }
215 }
216
217 /*
218 * Test if same partitions share same ring ids and membership node list
219 */
220 TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
221 if (iter_client1->node_id == client->node_id) {
222 if (client_leaving) {
223 continue;
224 }
225
226 membership_node_list1 = membership_node_list;
227 ring_id1 = ring_id;
228 } else {
229 membership_node_list1 = &iter_client1->last_membership_node_list;
230 ring_id1 = &iter_client1->last_ring_id;
231 }
232
233 /*
234 * Walk thru all memberships nodes
235 */
236 TAILQ_FOREACH(iter_node1, membership_node_list1, entries) {
237 /*
238 * try to find client with given node id
239 */
240 iter_client2 = qnetd_cluster_find_client_by_node_id(client->cluster,
241 iter_node1->node_id);
242 if (iter_client2 == NULL) {
243 /*
244 * Client with given id is not connected
245 */
246 continue;
247 }
248
249 if (iter_client2->node_id == client->node_id) {
250 if (client_leaving) {
251 continue;
252 }
253
254 membership_node_list2 = membership_node_list;
255 ring_id2 = ring_id;
256 } else {
257 membership_node_list2 = &iter_client2->last_membership_node_list;
258 ring_id2 = &iter_client2->last_ring_id;
259 }
260
261 /*
262 * Compare ring ids
263 */
264 if (!tlv_ring_id_eq(ring_id1, ring_id2)) {
265 return (0);
266 }
267
268 /*
269 * Now compare that membership node list equals, so walk thru all
270 * members ...
271 */
272 TAILQ_FOREACH(iter_node3, membership_node_list1, entries) {
273 /*
274 * ... and try to find given node id in other membership node list
275 */
276 iter_node4 = node_list_find_node_id(membership_node_list2, iter_node3->node_id);
277
278 if (iter_node4 == NULL) {
279 /*
280 * Node with iter_node3->node_id was not found in
281 * membership_node_list2 -> lists doesn't match
282 */
283 return (0);
284 }
285 }
286 }
287 }
288
289 return (1);
290 }
291
292 static void
293 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(const struct qnetd_client *client,
294 const struct node_list *client_membership_node_list, enum tlv_heuristics client_heuristics,
295 size_t *no_clients, size_t *no_heuristics_pass, size_t *no_heuristics_fail)
296 {
297 const struct node_list_entry *iter_node;
298 const struct qnetd_client *iter_client;
299 enum tlv_heuristics iter_heuristics;
300
301 *no_clients = 0;
302 *no_heuristics_pass = 0;
303 *no_heuristics_fail = 0;
304
305 if (client == NULL || client_membership_node_list == NULL) {
306 return ;
307 }
308
309 TAILQ_FOREACH(iter_node, client_membership_node_list, entries) {
310 iter_client = qnetd_cluster_find_client_by_node_id(client->cluster,
311 iter_node->node_id);
312 if (iter_client != NULL) {
313 (*no_clients)++;
314
315 if (iter_client == client) {
316 iter_heuristics = client_heuristics;
317 } else {
318 iter_heuristics = iter_client->last_heuristics;
319 }
320
321 if (iter_heuristics == TLV_HEURISTICS_PASS) {
322 (*no_heuristics_pass)++;
323 } else if (iter_heuristics == TLV_HEURISTICS_FAIL) {
324 (*no_heuristics_fail)++;
325 }
326 }
327 }
328 }
329
330 /*
331 * Compares two partitions. Return 1 if client1, config_node_list1, membership_node_list1 is
332 * "better" than client2, config_node_list2, membership_node_list2
333 */
334 static int
335 qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
336 const struct node_list *config_node_list1, const struct node_list *membership_node_list1,
337 enum tlv_heuristics heuristics_1,
338 const struct qnetd_client *client2,
339 const struct node_list *config_node_list2, const struct node_list *membership_node_list2,
340 enum tlv_heuristics heuristics_2)
341 {
342 size_t part1_active_clients, part2_active_clients;
343 size_t part1_no_heuristics_pass, part2_no_heuristics_pass;
344 size_t part1_no_heuristics_fail, part2_no_heuristics_fail;
345 size_t part1_score, part2_score;
346
347 int res;
348
349 res = -1;
350
351 if (node_list_size(config_node_list1) % 2 != 0) {
352 /*
353 * Odd clusters never split into 50:50.
354 */
355 if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
356 res = 1; goto exit_res;
357 } else {
358 res = 0; goto exit_res;
359 }
360 } else {
361 if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
362 res = 1; goto exit_res;
363 } else if (node_list_size(membership_node_list1) < node_list_size(config_node_list1) / 2) {
364 res = 0; goto exit_res;
365 }
366
367 /*
368 * 50:50 split
369 */
370
371 /*
372 * Check how many active clients are in partitions and heuristics results
373 */
374 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client1,
375 membership_node_list1, heuristics_1, &part1_active_clients,
376 &part1_no_heuristics_pass, &part1_no_heuristics_fail);
377 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client2,
378 membership_node_list2, heuristics_2, &part2_active_clients,
379 &part2_no_heuristics_pass, &part2_no_heuristics_fail);
380
381 /*
382 * Partition can contain clients with one of 4 states:
383 * 1. Not-connected to qnetd (D)
384 * 2. Disabled heuristics (U)
385 * 3. Enabled heuristics with pass result (P)
386 * 4. Enabled heuristics with fail result (F)
387 *
388 * The question is, what partition should get vote is kind of hard with
389 * so much states. Following simple "score" seems to be good enough, but may
390 * be suboptimal in some cases. As and example let's say there are
391 * 2 partitions with 4 nodes each. Partition 1 looks like PDDD and partition 2 looks
392 * like FUUU. Partition 1 score is 1 + (1 - 0), partition 2 score is 4 + (0 - 1).
393 * Partition 2 wins eventho there is one processor with failed heuristics.
394 */
395 part1_score = part1_active_clients + (part1_no_heuristics_pass - part1_no_heuristics_fail);
396 part2_score = part2_active_clients + (part2_no_heuristics_pass - part2_no_heuristics_fail);
397
398 if (part1_score > part2_score) {
399 res = 1; goto exit_res;
400 } else if (part1_score < part2_score) {
401 res = 0; goto exit_res;
402 }
403
404 if (part1_active_clients > part2_active_clients) {
405 res = 1; goto exit_res;
406 } else if (part1_active_clients < part2_active_clients) {
407 res = 0; goto exit_res;
408 }
409
410 /*
411 * Number of active clients in both partitions equals. Use tie-breaker.
412 */
413
414 if (qnetd_algo_ffsplit_is_preferred_partition(client1, config_node_list1,
415 membership_node_list1)) {
416 res = 1; goto exit_res;
417 } else {
418 res = 0; goto exit_res;
419 }
420 }
421
422 exit_res:
423 if (res == -1) {
424 log(LOG_CRIT, "qnetd_algo_ffsplit_partition_cmp unhandled case");
425 exit(1);
426 /* NOTREACHED */
427 }
428
429 return (res);
430 }
431
432 /*
433 * Select best partition for given client->cluster.
434 * If there is no partition which could become quorate, NULL is returned
435 */
436 static const struct node_list *
437 qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving,
438 const struct node_list *config_node_list, const struct node_list *membership_node_list,
439 enum tlv_heuristics client_heuristics)
440 {
441 const struct qnetd_client *iter_client;
442 const struct qnetd_client *best_client;
443 const struct node_list *best_config_node_list, *best_membership_node_list;
444 const struct node_list *iter_config_node_list, *iter_membership_node_list;
445 enum tlv_heuristics iter_heuristics, best_heuristics;
446
447 best_client = NULL;
448 best_config_node_list = best_membership_node_list = NULL;
449 best_heuristics = TLV_HEURISTICS_UNDEFINED;
450
451 /*
452 * Get highest score
453 */
454 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
455 if (iter_client->node_id == client->node_id) {
456 if (client_leaving) {
457 continue;
458 }
459
460 iter_config_node_list = config_node_list;
461 iter_membership_node_list = membership_node_list;
462 iter_heuristics = client_heuristics;
463 } else {
464 iter_config_node_list = &iter_client->configuration_node_list;
465 iter_membership_node_list = &iter_client->last_membership_node_list;
466 iter_heuristics = iter_client->last_heuristics;
467 }
468
469 if (qnetd_algo_ffsplit_partition_cmp(iter_client, iter_config_node_list,
470 iter_membership_node_list, iter_heuristics, best_client, best_config_node_list,
471 best_membership_node_list, best_heuristics) > 0) {
472 best_client = iter_client;
473 best_config_node_list = iter_config_node_list;
474 best_membership_node_list = iter_membership_node_list;
475 best_heuristics = iter_heuristics;
476 }
477 }
478
479 return (best_membership_node_list);
480 }
481
482 /*
483 * Update state of all nodes to match quorate_partition_node_list
484 */
485 static void
486 qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client *client, int client_leaving,
487 const struct node_list *quorate_partition_node_list)
488 {
489 const struct qnetd_client *iter_client;
490 struct qnetd_algo_ffsplit_client_data *iter_client_data;
491
492 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
493 iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
494
495 if (iter_client->node_id == client->node_id && client_leaving) {
496 iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
497
498 continue;
499 }
500
501 if (quorate_partition_node_list == NULL ||
502 node_list_find_node_id(quorate_partition_node_list, iter_client->node_id) == NULL) {
503 iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK;
504 } else {
505 iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK;
506 }
507 }
508 }
509
510 /*
511 * Send vote info. If client_leaving is set, client is ignored. if send_acks
512 * is set, only ACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state),
513 * otherwise only NACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state)
514 *
515 * Returns number of send votes
516 */
517 static size_t
518 qnetd_algo_ffsplit_send_votes(struct qnetd_client *client, int client_leaving,
519 const struct tlv_ring_id *ring_id, int send_acks)
520 {
521 size_t sent_votes;
522 struct qnetd_client *iter_client;
523 struct qnetd_algo_ffsplit_client_data *iter_client_data;
524 const struct tlv_ring_id *ring_id_to_send;
525 enum tlv_vote vote_to_send;
526
527 sent_votes = 0;
528
529 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
530 if (iter_client->node_id == client->node_id) {
531 if (client_leaving) {
532 continue;
533 }
534
535 ring_id_to_send = ring_id;
536 } else {
537 ring_id_to_send = &iter_client->last_ring_id;
538 }
539
540 iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
541 vote_to_send = TLV_VOTE_UNDEFINED;
542
543 if (send_acks) {
544 if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
545 vote_to_send = TLV_VOTE_ACK;
546 }
547 } else {
548 if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
549 vote_to_send = TLV_VOTE_NACK;
550 }
551 }
552
553 if (vote_to_send != TLV_VOTE_UNDEFINED) {
554 iter_client_data->vote_info_expected_seq_num++;
555 sent_votes++;
556
557 if (qnetd_client_send_vote_info(iter_client,
558 iter_client_data->vote_info_expected_seq_num, ring_id_to_send,
559 vote_to_send) == -1) {
560 client->schedule_disconnect = 1;
561 }
562 }
563 }
564
565 return (sent_votes);
566 }
567
568 /*
569 * Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is
570 * set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is
571 * not set
572 */
573 static size_t
574 qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int sending_acks)
575 {
576 size_t no_clients;
577 struct qnetd_client *iter_client;
578 struct qnetd_algo_ffsplit_client_data *iter_client_data;
579
580 no_clients = 0;
581
582 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
583 iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
584
585 if (sending_acks &&
586 iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
587 no_clients++;
588 }
589
590 if (!sending_acks &&
591 iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
592 no_clients++;
593 }
594 }
595
596 return (no_clients);
597 }
598
599 static enum tlv_vote
600 qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
601 const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
602 const struct node_list *membership_node_list, enum tlv_heuristics client_heuristics)
603 {
604 struct qnetd_algo_ffsplit_cluster_data *cluster_data;
605 const struct node_list *quorate_partition_node_list;
606
607 cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
608
609 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP;
610
611 if (!qnetd_algo_ffsplit_is_membership_stable(client, client_leaving,
612 ring_id, config_node_list, membership_node_list)) {
613 /*
614 * Wait until membership is stable
615 */
616 log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name);
617
618 return (TLV_VOTE_WAIT_FOR_REPLY);
619 }
620
621 log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name);
622
623 quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving,
624 config_node_list, membership_node_list, client_heuristics);
625 cluster_data->quorate_partition_node_list = quorate_partition_node_list;
626
627 if (quorate_partition_node_list == NULL) {
628 log(LOG_DEBUG, "ffsplit: No quorate partition was selected");
629 } else {
630 log(LOG_DEBUG, "ffsplit: Quorate partition selected");
631 log_common_debug_dump_node_list(quorate_partition_node_list);
632 }
633
634 qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list);
635
636 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS;
637
638 if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 0) == 0) {
639 log(LOG_DEBUG, "ffsplit: No client gets NACK");
640 /*
641 * No one gets nack -> send acks
642 */
643 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
644
645 if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 1) == 0) {
646 log(LOG_DEBUG, "ffsplit: No client gets ACK");
647 /*
648 * No one gets acks -> finished
649 */
650 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
651 }
652 }
653
654 return (TLV_VOTE_NO_CHANGE);
655 }
656
657 enum tlv_reply_error_code
658 qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
659 uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
660 const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
661 {
662
663 if (node_list_size(nodes) == 0) {
664 /*
665 * Empty node list shouldn't happen
666 */
667 log(LOG_ERR, "ffsplit: Received empty config node list for client %s",
668 client->addr_str);
669
670 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
671 }
672
673 if (node_list_find_node_id(nodes, client->node_id) == NULL) {
674 /*
675 * Current node is not in node list
676 */
677 log(LOG_ERR, "ffsplit: Received config node list without client %s",
678 client->addr_str);
679
680 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
681 }
682
683 if (initial || node_list_size(&client->last_membership_node_list) == 0) {
684 /*
685 * Initial node list -> membership is going to be send by client
686 */
687 *result_vote = TLV_VOTE_ASK_LATER;
688 } else {
689 *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
690 nodes, &client->last_membership_node_list, client->last_heuristics);
691 }
692
693 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
694 }
695
696 /*
697 * Called after client sent membership node list.
698 * All client fields are already set. Nodes is actual node list.
699 * msg_seq_num is 32-bit number set by client. If client sent config file version,
700 * config_version_set is set to 1 and config_version contains valid config file version.
701 * ring_id and quorate are copied from client votequorum callback.
702 *
703 * Function has to return result_vote. This can be one of ack/nack, ask_later (client
704 * should ask later for a vote) or wait_for_reply (client should wait for reply).
705 *
706 * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
707 * on failure (error is send back to client)
708 */
709
710 enum tlv_reply_error_code
711 qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
712 uint32_t msg_seq_num, const struct tlv_ring_id *ring_id,
713 const struct node_list *nodes, enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
714 {
715
716 if (node_list_size(nodes) == 0) {
717 /*
718 * Empty node list shouldn't happen
719 */
720 log(LOG_ERR, "ffsplit: Received empty membership node list for client %s",
721 client->addr_str);
722
723 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
724 }
725
726 if (node_list_find_node_id(nodes, client->node_id) == NULL) {
727 /*
728 * Current node is not in node list
729 */
730 log(LOG_ERR, "ffsplit: Received membership node list without client %s",
731 client->addr_str);
732
733 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
734 }
735
736 if (node_list_size(&client->configuration_node_list) == 0) {
737 /*
738 * Config node list not received -> it's going to be sent later
739 */
740 *result_vote = TLV_VOTE_ASK_LATER;
741 } else {
742 *result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id,
743 &client->configuration_node_list, nodes, heuristics);
744 }
745
746 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
747 }
748
749 enum tlv_reply_error_code
750 qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
751 uint32_t msg_seq_num, enum tlv_quorate quorate, const struct node_list *nodes,
752 enum tlv_vote *result_vote)
753 {
754
755 /*
756 * Quorum node list is informative -> no change
757 */
758 *result_vote = TLV_VOTE_NO_CHANGE;
759
760 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
761 }
762
763 void
764 qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
765 {
766
767 (void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id,
768 &client->configuration_node_list, &client->last_membership_node_list,
769 client->last_heuristics);
770
771 free(client->algorithm_data);
772
773 if (qnetd_cluster_size(client->cluster) == 1) {
774 /*
775 * Last client in the cluster
776 */
777 free(client->cluster->algorithm_data);
778 }
779 }
780
781 enum tlv_reply_error_code
782 qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client *client, uint32_t msg_seq_num,
783 enum tlv_vote *result_vote)
784 {
785
786 /*
787 * Ask for vote is not supported in current algorithm
788 */
789 return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE);
790 }
791
792 enum tlv_reply_error_code
793 qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client *client, uint32_t msg_seq_num)
794 {
795 struct qnetd_algo_ffsplit_cluster_data *cluster_data;
796 struct qnetd_algo_ffsplit_client_data *client_data;
797
798 cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
799 client_data = (struct qnetd_algo_ffsplit_client_data *)client->algorithm_data;
800
801 if (client_data->vote_info_expected_seq_num != msg_seq_num) {
802 log(LOG_DEBUG, "ffsplit: Received old vote info reply from client %s",
803 client->addr_str);
804
805 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
806 }
807
808 client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
809
810 if (cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS &&
811 cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS) {
812 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
813 }
814
815 if (cluster_data->cluster_state == QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS) {
816 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 0) == 0) {
817 log(LOG_DEBUG, "ffsplit: All NACK votes sent for cluster %s",
818 client->cluster_name);
819
820 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
821
822 if (qnetd_algo_ffsplit_send_votes(client, 0, &client->last_ring_id, 1) == 0) {
823 log(LOG_DEBUG, "ffsplit: No client gets ACK");
824 /*
825 * No one gets acks -> finished
826 */
827 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
828 }
829 }
830 } else {
831 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 1) == 0) {
832 log(LOG_DEBUG, "ffsplit: All ACK votes sent for cluster %s",
833 client->cluster_name);
834
835 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
836 }
837 }
838
839 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
840 }
841
842 enum tlv_reply_error_code
843 qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client *client, uint32_t msg_seq_num,
844 enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
845 {
846
847 if (node_list_size(&client->configuration_node_list) == 0 ||
848 node_list_size(&client->last_membership_node_list) == 0) {
849 /*
850 * Config or membership node list not received -> it's going to be sent later
851 */
852 *result_vote = TLV_VOTE_ASK_LATER;
853 } else {
854 *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
855 &client->configuration_node_list, &client->last_membership_node_list,
856 heuristics);
857 }
858
859 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
860 }
861
862 enum tlv_reply_error_code
863 qnetd_algo_ffsplit_timer_callback(struct qnetd_client *client, int *reschedule_timer,
864 int *send_vote, enum tlv_vote *result_vote)
865 {
866
867 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
868 }
869
870 static struct qnetd_algorithm qnetd_algo_ffsplit = {
871 .init = qnetd_algo_ffsplit_client_init,
872 .config_node_list_received = qnetd_algo_ffsplit_config_node_list_received,
873 .membership_node_list_received = qnetd_algo_ffsplit_membership_node_list_received,
874 .quorum_node_list_received = qnetd_algo_ffsplit_quorum_node_list_received,
875 .client_disconnect = qnetd_algo_ffsplit_client_disconnect,
876 .ask_for_vote_received = qnetd_algo_ffsplit_ask_for_vote_received,
877 .vote_info_reply_received = qnetd_algo_ffsplit_vote_info_reply_received,
878 .heuristics_change_received = qnetd_algo_ffsplit_heuristics_change_received,
879 .timer_callback = qnetd_algo_ffsplit_timer_callback,
880 };
881
882 enum tlv_reply_error_code qnetd_algo_ffsplit_register()
883 {
884
885 return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT, &qnetd_algo_ffsplit));
886 }