2 * Copyright (c) 2015-2020 Red Hat, Inc.
6 * Author: Jan Friesse (jfriesse@redhat.com)
8 * This software licensed under BSD license, the text of which follows:
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/types.h>
40 #include "log-common.h"
41 #include "qnetd-algo-ffsplit.h"
42 #include "qnetd-log-debug.h"
43 #include "qnetd-cluster-list.h"
44 #include "qnetd-cluster.h"
45 #include "qnetd-client-send.h"
47 enum qnetd_algo_ffsplit_cluster_state
{
48 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
,
49 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP
,
50 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
,
51 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
,
54 struct qnetd_algo_ffsplit_cluster_data
{
55 enum qnetd_algo_ffsplit_cluster_state cluster_state
;
56 const struct node_list
*quorate_partition_node_list
;
59 enum qnetd_algo_ffsplit_client_state
{
60 QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
,
61 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
,
62 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
,
65 struct qnetd_algo_ffsplit_client_data
{
66 enum qnetd_algo_ffsplit_client_state client_state
;
67 uint32_t vote_info_expected_seq_num
;
70 enum tlv_reply_error_code
71 qnetd_algo_ffsplit_client_init(struct qnetd_client
*client
)
73 struct qnetd_algo_ffsplit_cluster_data
*cluster_data
;
74 struct qnetd_algo_ffsplit_client_data
*client_data
;
76 if (qnetd_cluster_size(client
->cluster
) == 1) {
77 cluster_data
= malloc(sizeof(*cluster_data
));
78 if (cluster_data
== NULL
) {
79 log(LOG_ERR
, "ffsplit: Can't initialize cluster data for client %s",
82 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR
);
84 memset(cluster_data
, 0, sizeof(*cluster_data
));
85 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
86 cluster_data
->quorate_partition_node_list
= NULL
;
88 client
->cluster
->algorithm_data
= cluster_data
;
91 client_data
= malloc(sizeof(*client_data
));
92 if (client_data
== NULL
) {
93 log(LOG_ERR
, "ffsplit: Can't initialize node data for client %s",
96 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR
);
98 memset(client_data
, 0, sizeof(*client_data
));
99 client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
;
100 client
->algorithm_data
= client_data
;
102 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
106 qnetd_algo_ffsplit_is_preferred_partition(const struct qnetd_client
*client
,
107 const struct node_list
*config_node_list
, const struct node_list
*membership_node_list
)
109 uint32_t preferred_node_id
;
110 struct node_list_entry
*node_entry
;
113 preferred_node_id
= 0;
116 switch (client
->tie_breaker
.mode
) {
117 case TLV_TIE_BREAKER_MODE_LOWEST
:
118 node_entry
= TAILQ_FIRST(config_node_list
);
120 preferred_node_id
= node_entry
->node_id
;
122 TAILQ_FOREACH(node_entry
, config_node_list
, entries
) {
123 if (node_entry
->node_id
< preferred_node_id
) {
124 preferred_node_id
= node_entry
->node_id
;
129 case TLV_TIE_BREAKER_MODE_HIGHEST
:
130 node_entry
= TAILQ_FIRST(config_node_list
);
132 preferred_node_id
= node_entry
->node_id
;
134 TAILQ_FOREACH(node_entry
, config_node_list
, entries
) {
135 if (node_entry
->node_id
> preferred_node_id
) {
136 preferred_node_id
= node_entry
->node_id
;
141 case TLV_TIE_BREAKER_MODE_NODE_ID
:
142 preferred_node_id
= client
->tie_breaker
.node_id
;
147 if (!case_processed
) {
148 log(LOG_CRIT
, "qnetd_algo_ffsplit_is_preferred_partition unprocessed "
153 return (node_list_find_node_id(membership_node_list
, preferred_node_id
) != NULL
);
157 qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client
*client
, int client_leaving
,
158 const struct tlv_ring_id
*ring_id
, const struct node_list
*config_node_list
,
159 const struct node_list
*membership_node_list
)
161 const struct qnetd_client
*iter_client1
, *iter_client2
;
162 const struct node_list
*config_node_list1
, *config_node_list2
;
163 const struct node_list
*membership_node_list1
, *membership_node_list2
;
164 const struct node_list_entry
*iter_node1
, *iter_node2
;
165 const struct node_list_entry
*iter_node3
, *iter_node4
;
166 const struct tlv_ring_id
*ring_id1
, *ring_id2
;
169 * Test if all active clients share same config list.
171 TAILQ_FOREACH(iter_client1
, &client
->cluster
->client_list
, cluster_entries
) {
172 TAILQ_FOREACH(iter_client2
, &client
->cluster
->client_list
, cluster_entries
) {
173 if (iter_client1
== iter_client2
) {
177 if (iter_client1
->node_id
== client
->node_id
) {
178 if (client_leaving
) {
182 config_node_list1
= config_node_list
;
184 config_node_list1
= &iter_client1
->configuration_node_list
;
187 if (iter_client2
->node_id
== client
->node_id
) {
188 if (client_leaving
) {
192 config_node_list2
= config_node_list
;
194 config_node_list2
= &iter_client2
->configuration_node_list
;
198 * Walk thru all node ids in given config node list...
200 TAILQ_FOREACH(iter_node1
, config_node_list1
, entries
) {
202 * ... and try to find given node id in other list
204 iter_node2
= node_list_find_node_id(config_node_list2
, iter_node1
->node_id
);
206 if (iter_node2
== NULL
) {
208 * Node with iter_node1->node_id was not found in
209 * config_node_list2 -> lists doesn't match
218 * Test if same partitions share same ring ids and membership node list
220 TAILQ_FOREACH(iter_client1
, &client
->cluster
->client_list
, cluster_entries
) {
221 if (iter_client1
->node_id
== client
->node_id
) {
222 if (client_leaving
) {
226 membership_node_list1
= membership_node_list
;
229 membership_node_list1
= &iter_client1
->last_membership_node_list
;
230 ring_id1
= &iter_client1
->last_ring_id
;
234 * Walk thru all memberships nodes
236 TAILQ_FOREACH(iter_node1
, membership_node_list1
, entries
) {
238 * try to find client with given node id
240 iter_client2
= qnetd_cluster_find_client_by_node_id(client
->cluster
,
241 iter_node1
->node_id
);
242 if (iter_client2
== NULL
) {
244 * Client with given id is not connected
249 if (iter_client2
->node_id
== client
->node_id
) {
250 if (client_leaving
) {
254 membership_node_list2
= membership_node_list
;
257 membership_node_list2
= &iter_client2
->last_membership_node_list
;
258 ring_id2
= &iter_client2
->last_ring_id
;
264 if (!tlv_ring_id_eq(ring_id1
, ring_id2
)) {
269 * Now compare that membership node list equals, so walk thru all
272 TAILQ_FOREACH(iter_node3
, membership_node_list1
, entries
) {
274 * ... and try to find given node id in other membership node list
276 iter_node4
= node_list_find_node_id(membership_node_list2
, iter_node3
->node_id
);
278 if (iter_node4
== NULL
) {
280 * Node with iter_node3->node_id was not found in
281 * membership_node_list2 -> lists doesn't match
293 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(const struct qnetd_client
*client
,
294 const struct node_list
*client_membership_node_list
, enum tlv_heuristics client_heuristics
,
295 size_t *no_clients
, size_t *no_heuristics_pass
, size_t *no_heuristics_fail
)
297 const struct node_list_entry
*iter_node
;
298 const struct qnetd_client
*iter_client
;
299 enum tlv_heuristics iter_heuristics
;
302 *no_heuristics_pass
= 0;
303 *no_heuristics_fail
= 0;
305 if (client
== NULL
|| client_membership_node_list
== NULL
) {
309 TAILQ_FOREACH(iter_node
, client_membership_node_list
, entries
) {
310 iter_client
= qnetd_cluster_find_client_by_node_id(client
->cluster
,
312 if (iter_client
!= NULL
) {
315 if (iter_client
== client
) {
316 iter_heuristics
= client_heuristics
;
318 iter_heuristics
= iter_client
->last_heuristics
;
321 if (iter_heuristics
== TLV_HEURISTICS_PASS
) {
322 (*no_heuristics_pass
)++;
323 } else if (iter_heuristics
== TLV_HEURISTICS_FAIL
) {
324 (*no_heuristics_fail
)++;
331 * Compares two partitions. Return 1 if client1, config_node_list1, membership_node_list1 is
332 * "better" than client2, config_node_list2, membership_node_list2
335 qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client
*client1
,
336 const struct node_list
*config_node_list1
, const struct node_list
*membership_node_list1
,
337 enum tlv_heuristics heuristics_1
,
338 const struct qnetd_client
*client2
,
339 const struct node_list
*config_node_list2
, const struct node_list
*membership_node_list2
,
340 enum tlv_heuristics heuristics_2
)
342 size_t part1_active_clients
, part2_active_clients
;
343 size_t part1_no_heuristics_pass
, part2_no_heuristics_pass
;
344 size_t part1_no_heuristics_fail
, part2_no_heuristics_fail
;
345 size_t part1_score
, part2_score
;
351 if (node_list_size(config_node_list1
) % 2 != 0) {
353 * Odd clusters never split into 50:50.
355 if (node_list_size(membership_node_list1
) > node_list_size(config_node_list1
) / 2) {
356 res
= 1; goto exit_res
;
358 res
= 0; goto exit_res
;
361 if (node_list_size(membership_node_list1
) > node_list_size(config_node_list1
) / 2) {
362 res
= 1; goto exit_res
;
363 } else if (node_list_size(membership_node_list1
) < node_list_size(config_node_list1
) / 2) {
364 res
= 0; goto exit_res
;
372 * Check how many active clients are in partitions and heuristics results
374 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client1
,
375 membership_node_list1
, heuristics_1
, &part1_active_clients
,
376 &part1_no_heuristics_pass
, &part1_no_heuristics_fail
);
377 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client2
,
378 membership_node_list2
, heuristics_2
, &part2_active_clients
,
379 &part2_no_heuristics_pass
, &part2_no_heuristics_fail
);
382 * Partition can contain clients with one of 4 states:
383 * 1. Not-connected to qnetd (D)
384 * 2. Disabled heuristics (U)
385 * 3. Enabled heuristics with pass result (P)
386 * 4. Enabled heuristics with fail result (F)
388 * The question is, what partition should get vote is kind of hard with
389 * so much states. Following simple "score" seems to be good enough, but may
390 * be suboptimal in some cases. As and example let's say there are
391 * 2 partitions with 4 nodes each. Partition 1 looks like PDDD and partition 2 looks
392 * like FUUU. Partition 1 score is 1 + (1 - 0), partition 2 score is 4 + (0 - 1).
393 * Partition 2 wins eventho there is one processor with failed heuristics.
395 part1_score
= part1_active_clients
+ (part1_no_heuristics_pass
- part1_no_heuristics_fail
);
396 part2_score
= part2_active_clients
+ (part2_no_heuristics_pass
- part2_no_heuristics_fail
);
398 if (part1_score
> part2_score
) {
399 res
= 1; goto exit_res
;
400 } else if (part1_score
< part2_score
) {
401 res
= 0; goto exit_res
;
404 if (part1_active_clients
> part2_active_clients
) {
405 res
= 1; goto exit_res
;
406 } else if (part1_active_clients
< part2_active_clients
) {
407 res
= 0; goto exit_res
;
411 * Number of active clients in both partitions equals. Use tie-breaker.
414 if (qnetd_algo_ffsplit_is_preferred_partition(client1
, config_node_list1
,
415 membership_node_list1
)) {
416 res
= 1; goto exit_res
;
418 res
= 0; goto exit_res
;
424 log(LOG_CRIT
, "qnetd_algo_ffsplit_partition_cmp unhandled case");
433 * Select best partition for given client->cluster.
434 * If there is no partition which could become quorate, NULL is returned
436 static const struct node_list
*
437 qnetd_algo_ffsplit_select_partition(const struct qnetd_client
*client
, int client_leaving
,
438 const struct node_list
*config_node_list
, const struct node_list
*membership_node_list
,
439 enum tlv_heuristics client_heuristics
)
441 const struct qnetd_client
*iter_client
;
442 const struct qnetd_client
*best_client
;
443 const struct node_list
*best_config_node_list
, *best_membership_node_list
;
444 const struct node_list
*iter_config_node_list
, *iter_membership_node_list
;
445 enum tlv_heuristics iter_heuristics
, best_heuristics
;
448 best_config_node_list
= best_membership_node_list
= NULL
;
449 best_heuristics
= TLV_HEURISTICS_UNDEFINED
;
454 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
455 if (iter_client
->node_id
== client
->node_id
) {
456 if (client_leaving
) {
460 iter_config_node_list
= config_node_list
;
461 iter_membership_node_list
= membership_node_list
;
462 iter_heuristics
= client_heuristics
;
464 iter_config_node_list
= &iter_client
->configuration_node_list
;
465 iter_membership_node_list
= &iter_client
->last_membership_node_list
;
466 iter_heuristics
= iter_client
->last_heuristics
;
469 if (qnetd_algo_ffsplit_partition_cmp(iter_client
, iter_config_node_list
,
470 iter_membership_node_list
, iter_heuristics
, best_client
, best_config_node_list
,
471 best_membership_node_list
, best_heuristics
) > 0) {
472 best_client
= iter_client
;
473 best_config_node_list
= iter_config_node_list
;
474 best_membership_node_list
= iter_membership_node_list
;
475 best_heuristics
= iter_heuristics
;
479 return (best_membership_node_list
);
483 * Update state of all nodes to match quorate_partition_node_list
486 qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client
*client
, int client_leaving
,
487 const struct node_list
*quorate_partition_node_list
)
489 const struct qnetd_client
*iter_client
;
490 struct qnetd_algo_ffsplit_client_data
*iter_client_data
;
492 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
493 iter_client_data
= (struct qnetd_algo_ffsplit_client_data
*)iter_client
->algorithm_data
;
495 if (iter_client
->node_id
== client
->node_id
&& client_leaving
) {
496 iter_client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
;
501 if (quorate_partition_node_list
== NULL
||
502 node_list_find_node_id(quorate_partition_node_list
, iter_client
->node_id
) == NULL
) {
503 iter_client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
;
505 iter_client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
;
511 * Send vote info. If client_leaving is set, client is ignored. if send_acks
512 * is set, only ACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state),
513 * otherwise only NACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state)
515 * Returns number of send votes
518 qnetd_algo_ffsplit_send_votes(struct qnetd_client
*client
, int client_leaving
,
519 const struct tlv_ring_id
*ring_id
, int send_acks
)
522 struct qnetd_client
*iter_client
;
523 struct qnetd_algo_ffsplit_client_data
*iter_client_data
;
524 const struct tlv_ring_id
*ring_id_to_send
;
525 enum tlv_vote vote_to_send
;
529 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
530 if (iter_client
->node_id
== client
->node_id
) {
531 if (client_leaving
) {
535 ring_id_to_send
= ring_id
;
537 ring_id_to_send
= &iter_client
->last_ring_id
;
540 iter_client_data
= (struct qnetd_algo_ffsplit_client_data
*)iter_client
->algorithm_data
;
541 vote_to_send
= TLV_VOTE_UNDEFINED
;
544 if (iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
) {
545 vote_to_send
= TLV_VOTE_ACK
;
548 if (iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
) {
549 vote_to_send
= TLV_VOTE_NACK
;
553 if (vote_to_send
!= TLV_VOTE_UNDEFINED
) {
554 iter_client_data
->vote_info_expected_seq_num
++;
557 if (qnetd_client_send_vote_info(iter_client
,
558 iter_client_data
->vote_info_expected_seq_num
, ring_id_to_send
,
559 vote_to_send
) == -1) {
560 client
->schedule_disconnect
= 1;
569 * Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is
570 * set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is
574 qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client
*client
, int sending_acks
)
577 struct qnetd_client
*iter_client
;
578 struct qnetd_algo_ffsplit_client_data
*iter_client_data
;
582 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
583 iter_client_data
= (struct qnetd_algo_ffsplit_client_data
*)iter_client
->algorithm_data
;
586 iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
) {
591 iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
) {
600 qnetd_algo_ffsplit_do(struct qnetd_client
*client
, int client_leaving
,
601 const struct tlv_ring_id
*ring_id
, const struct node_list
*config_node_list
,
602 const struct node_list
*membership_node_list
, enum tlv_heuristics client_heuristics
)
604 struct qnetd_algo_ffsplit_cluster_data
*cluster_data
;
605 const struct node_list
*quorate_partition_node_list
;
607 cluster_data
= (struct qnetd_algo_ffsplit_cluster_data
*)client
->cluster
->algorithm_data
;
609 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP
;
611 if (!qnetd_algo_ffsplit_is_membership_stable(client
, client_leaving
,
612 ring_id
, config_node_list
, membership_node_list
)) {
614 * Wait until membership is stable
616 log(LOG_DEBUG
, "ffsplit: Membership for cluster %s is not yet stable", client
->cluster_name
);
618 return (TLV_VOTE_WAIT_FOR_REPLY
);
621 log(LOG_DEBUG
, "ffsplit: Membership for cluster %s is now stable", client
->cluster_name
);
623 quorate_partition_node_list
= qnetd_algo_ffsplit_select_partition(client
, client_leaving
,
624 config_node_list
, membership_node_list
, client_heuristics
);
625 cluster_data
->quorate_partition_node_list
= quorate_partition_node_list
;
627 if (quorate_partition_node_list
== NULL
) {
628 log(LOG_DEBUG
, "ffsplit: No quorate partition was selected");
630 log(LOG_DEBUG
, "ffsplit: Quorate partition selected");
631 log_common_debug_dump_node_list(quorate_partition_node_list
);
634 qnetd_algo_ffsplit_update_nodes_state(client
, client_leaving
, quorate_partition_node_list
);
636 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
;
638 if (qnetd_algo_ffsplit_send_votes(client
, client_leaving
, ring_id
, 0) == 0) {
639 log(LOG_DEBUG
, "ffsplit: No client gets NACK");
641 * No one gets nack -> send acks
643 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
;
645 if (qnetd_algo_ffsplit_send_votes(client
, client_leaving
, ring_id
, 1) == 0) {
646 log(LOG_DEBUG
, "ffsplit: No client gets ACK");
648 * No one gets acks -> finished
650 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
654 return (TLV_VOTE_NO_CHANGE
);
657 enum tlv_reply_error_code
658 qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client
*client
,
659 uint32_t msg_seq_num
, int config_version_set
, uint64_t config_version
,
660 const struct node_list
*nodes
, int initial
, enum tlv_vote
*result_vote
)
663 if (node_list_size(nodes
) == 0) {
665 * Empty node list shouldn't happen
667 log(LOG_ERR
, "ffsplit: Received empty config node list for client %s",
670 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST
);
673 if (node_list_find_node_id(nodes
, client
->node_id
) == NULL
) {
675 * Current node is not in node list
677 log(LOG_ERR
, "ffsplit: Received config node list without client %s",
680 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST
);
683 if (initial
|| node_list_size(&client
->last_membership_node_list
) == 0) {
685 * Initial node list -> membership is going to be send by client
687 *result_vote
= TLV_VOTE_ASK_LATER
;
689 *result_vote
= qnetd_algo_ffsplit_do(client
, 0, &client
->last_ring_id
,
690 nodes
, &client
->last_membership_node_list
, client
->last_heuristics
);
693 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
697 * Called after client sent membership node list.
698 * All client fields are already set. Nodes is actual node list.
699 * msg_seq_num is 32-bit number set by client. If client sent config file version,
700 * config_version_set is set to 1 and config_version contains valid config file version.
701 * ring_id and quorate are copied from client votequorum callback.
703 * Function has to return result_vote. This can be one of ack/nack, ask_later (client
704 * should ask later for a vote) or wait_for_reply (client should wait for reply).
706 * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
707 * on failure (error is send back to client)
710 enum tlv_reply_error_code
711 qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client
*client
,
712 uint32_t msg_seq_num
, const struct tlv_ring_id
*ring_id
,
713 const struct node_list
*nodes
, enum tlv_heuristics heuristics
, enum tlv_vote
*result_vote
)
716 if (node_list_size(nodes
) == 0) {
718 * Empty node list shouldn't happen
720 log(LOG_ERR
, "ffsplit: Received empty membership node list for client %s",
723 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST
);
726 if (node_list_find_node_id(nodes
, client
->node_id
) == NULL
) {
728 * Current node is not in node list
730 log(LOG_ERR
, "ffsplit: Received membership node list without client %s",
733 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST
);
736 if (node_list_size(&client
->configuration_node_list
) == 0) {
738 * Config node list not received -> it's going to be sent later
740 *result_vote
= TLV_VOTE_ASK_LATER
;
742 *result_vote
= qnetd_algo_ffsplit_do(client
, 0, ring_id
,
743 &client
->configuration_node_list
, nodes
, heuristics
);
746 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
749 enum tlv_reply_error_code
750 qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client
*client
,
751 uint32_t msg_seq_num
, enum tlv_quorate quorate
, const struct node_list
*nodes
,
752 enum tlv_vote
*result_vote
)
756 * Quorum node list is informative -> no change
758 *result_vote
= TLV_VOTE_NO_CHANGE
;
760 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
764 qnetd_algo_ffsplit_client_disconnect(struct qnetd_client
*client
, int server_going_down
)
767 if (!server_going_down
) {
768 (void)qnetd_algo_ffsplit_do(client
, 1, &client
->last_ring_id
,
769 &client
->configuration_node_list
, &client
->last_membership_node_list
,
770 client
->last_heuristics
);
773 free(client
->algorithm_data
);
775 if (qnetd_cluster_size(client
->cluster
) == 1) {
777 * Last client in the cluster
779 free(client
->cluster
->algorithm_data
);
783 enum tlv_reply_error_code
784 qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client
*client
, uint32_t msg_seq_num
,
785 enum tlv_vote
*result_vote
)
789 * Ask for vote is not supported in current algorithm
791 return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE
);
794 enum tlv_reply_error_code
795 qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client
*client
, uint32_t msg_seq_num
)
797 struct qnetd_algo_ffsplit_cluster_data
*cluster_data
;
798 struct qnetd_algo_ffsplit_client_data
*client_data
;
800 cluster_data
= (struct qnetd_algo_ffsplit_cluster_data
*)client
->cluster
->algorithm_data
;
801 client_data
= (struct qnetd_algo_ffsplit_client_data
*)client
->algorithm_data
;
803 if (client_data
->vote_info_expected_seq_num
!= msg_seq_num
) {
804 log(LOG_DEBUG
, "ffsplit: Received old vote info reply from client %s",
807 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
810 client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
;
812 if (cluster_data
->cluster_state
!= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
&&
813 cluster_data
->cluster_state
!= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
) {
814 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
817 if (cluster_data
->cluster_state
== QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
) {
818 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client
, 0) == 0) {
819 log(LOG_DEBUG
, "ffsplit: All NACK votes sent for cluster %s",
820 client
->cluster_name
);
822 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
;
824 if (qnetd_algo_ffsplit_send_votes(client
, 0, &client
->last_ring_id
, 1) == 0) {
825 log(LOG_DEBUG
, "ffsplit: No client gets ACK");
827 * No one gets acks -> finished
829 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
833 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client
, 1) == 0) {
834 log(LOG_DEBUG
, "ffsplit: All ACK votes sent for cluster %s",
835 client
->cluster_name
);
837 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
841 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
844 enum tlv_reply_error_code
845 qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client
*client
, uint32_t msg_seq_num
,
846 enum tlv_heuristics heuristics
, enum tlv_vote
*result_vote
)
849 if (node_list_size(&client
->configuration_node_list
) == 0 ||
850 node_list_size(&client
->last_membership_node_list
) == 0) {
852 * Config or membership node list not received -> it's going to be sent later
854 *result_vote
= TLV_VOTE_ASK_LATER
;
856 *result_vote
= qnetd_algo_ffsplit_do(client
, 0, &client
->last_ring_id
,
857 &client
->configuration_node_list
, &client
->last_membership_node_list
,
861 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
864 enum tlv_reply_error_code
865 qnetd_algo_ffsplit_timer_callback(struct qnetd_client
*client
, int *reschedule_timer
,
866 int *send_vote
, enum tlv_vote
*result_vote
)
869 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
872 static struct qnetd_algorithm qnetd_algo_ffsplit
= {
873 .init
= qnetd_algo_ffsplit_client_init
,
874 .config_node_list_received
= qnetd_algo_ffsplit_config_node_list_received
,
875 .membership_node_list_received
= qnetd_algo_ffsplit_membership_node_list_received
,
876 .quorum_node_list_received
= qnetd_algo_ffsplit_quorum_node_list_received
,
877 .client_disconnect
= qnetd_algo_ffsplit_client_disconnect
,
878 .ask_for_vote_received
= qnetd_algo_ffsplit_ask_for_vote_received
,
879 .vote_info_reply_received
= qnetd_algo_ffsplit_vote_info_reply_received
,
880 .heuristics_change_received
= qnetd_algo_ffsplit_heuristics_change_received
,
881 .timer_callback
= qnetd_algo_ffsplit_timer_callback
,
884 enum tlv_reply_error_code
qnetd_algo_ffsplit_register()
887 return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT
, &qnetd_algo_ffsplit
));