2 * Copyright (c) 2015-2019 Red Hat, Inc.
6 * Author: Jan Friesse (jfriesse@redhat.com)
8 * This software licensed under BSD license, the text of which follows:
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/types.h>
40 #include "qnetd-algo-ffsplit.h"
41 #include "qnetd-log-debug.h"
42 #include "qnetd-cluster-list.h"
43 #include "qnetd-cluster.h"
44 #include "qnetd-client-send.h"
46 enum qnetd_algo_ffsplit_cluster_state
{
47 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
,
48 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP
,
49 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
,
50 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
,
53 struct qnetd_algo_ffsplit_cluster_data
{
54 enum qnetd_algo_ffsplit_cluster_state cluster_state
;
55 const struct node_list
*quorate_partition_node_list
;
58 enum qnetd_algo_ffsplit_client_state
{
59 QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
,
60 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
,
61 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
,
64 struct qnetd_algo_ffsplit_client_data
{
65 enum qnetd_algo_ffsplit_client_state client_state
;
66 uint32_t vote_info_expected_seq_num
;
69 enum tlv_reply_error_code
70 qnetd_algo_ffsplit_client_init(struct qnetd_client
*client
)
72 struct qnetd_algo_ffsplit_cluster_data
*cluster_data
;
73 struct qnetd_algo_ffsplit_client_data
*client_data
;
75 if (qnetd_cluster_size(client
->cluster
) == 1) {
76 cluster_data
= malloc(sizeof(*cluster_data
));
77 if (cluster_data
== NULL
) {
78 log(LOG_ERR
, "ffsplit: Can't initialize cluster data for client %s",
81 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR
);
83 memset(cluster_data
, 0, sizeof(*cluster_data
));
84 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
85 cluster_data
->quorate_partition_node_list
= NULL
;
87 client
->cluster
->algorithm_data
= cluster_data
;
90 client_data
= malloc(sizeof(*client_data
));
91 if (client_data
== NULL
) {
92 log(LOG_ERR
, "ffsplit: Can't initialize node data for client %s",
95 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR
);
97 memset(client_data
, 0, sizeof(*client_data
));
98 client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
;
99 client
->algorithm_data
= client_data
;
101 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
105 qnetd_algo_ffsplit_is_preferred_partition(const struct qnetd_client
*client
,
106 const struct node_list
*config_node_list
, const struct node_list
*membership_node_list
)
108 uint32_t preferred_node_id
;
109 struct node_list_entry
*node_entry
;
112 preferred_node_id
= 0;
115 switch (client
->tie_breaker
.mode
) {
116 case TLV_TIE_BREAKER_MODE_LOWEST
:
117 node_entry
= TAILQ_FIRST(config_node_list
);
119 preferred_node_id
= node_entry
->node_id
;
121 TAILQ_FOREACH(node_entry
, config_node_list
, entries
) {
122 if (node_entry
->node_id
< preferred_node_id
) {
123 preferred_node_id
= node_entry
->node_id
;
128 case TLV_TIE_BREAKER_MODE_HIGHEST
:
129 node_entry
= TAILQ_FIRST(config_node_list
);
131 preferred_node_id
= node_entry
->node_id
;
133 TAILQ_FOREACH(node_entry
, config_node_list
, entries
) {
134 if (node_entry
->node_id
> preferred_node_id
) {
135 preferred_node_id
= node_entry
->node_id
;
140 case TLV_TIE_BREAKER_MODE_NODE_ID
:
141 preferred_node_id
= client
->tie_breaker
.node_id
;
146 if (!case_processed
) {
147 log(LOG_CRIT
, "qnetd_algo_ffsplit_is_preferred_partition unprocessed "
152 return (node_list_find_node_id(membership_node_list
, preferred_node_id
) != NULL
);
156 qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client
*client
, int client_leaving
,
157 const struct tlv_ring_id
*ring_id
, const struct node_list
*config_node_list
,
158 const struct node_list
*membership_node_list
)
160 const struct qnetd_client
*iter_client1
, *iter_client2
;
161 const struct node_list
*config_node_list1
, *config_node_list2
;
162 const struct node_list
*membership_node_list1
, *membership_node_list2
;
163 const struct node_list_entry
*iter_node1
, *iter_node2
;
164 const struct node_list_entry
*iter_node3
, *iter_node4
;
165 const struct tlv_ring_id
*ring_id1
, *ring_id2
;
168 * Test if all active clients share same config list.
170 TAILQ_FOREACH(iter_client1
, &client
->cluster
->client_list
, cluster_entries
) {
171 TAILQ_FOREACH(iter_client2
, &client
->cluster
->client_list
, cluster_entries
) {
172 if (iter_client1
== iter_client2
) {
176 if (iter_client1
->node_id
== client
->node_id
) {
177 if (client_leaving
) {
181 config_node_list1
= config_node_list
;
183 config_node_list1
= &iter_client1
->configuration_node_list
;
186 if (iter_client2
->node_id
== client
->node_id
) {
187 if (client_leaving
) {
191 config_node_list2
= config_node_list
;
193 config_node_list2
= &iter_client2
->configuration_node_list
;
197 * Walk thru all node ids in given config node list...
199 TAILQ_FOREACH(iter_node1
, config_node_list1
, entries
) {
201 * ... and try to find given node id in other list
203 iter_node2
= node_list_find_node_id(config_node_list2
, iter_node1
->node_id
);
205 if (iter_node2
== NULL
) {
207 * Node with iter_node1->node_id was not found in
208 * config_node_list2 -> lists doesn't match
217 * Test if same partitions share same ring ids and membership node list
219 TAILQ_FOREACH(iter_client1
, &client
->cluster
->client_list
, cluster_entries
) {
220 if (iter_client1
->node_id
== client
->node_id
) {
221 if (client_leaving
) {
225 membership_node_list1
= membership_node_list
;
228 membership_node_list1
= &iter_client1
->last_membership_node_list
;
229 ring_id1
= &iter_client1
->last_ring_id
;
233 * Walk thru all memberships nodes
235 TAILQ_FOREACH(iter_node1
, membership_node_list1
, entries
) {
237 * try to find client with given node id
239 iter_client2
= qnetd_cluster_find_client_by_node_id(client
->cluster
,
240 iter_node1
->node_id
);
241 if (iter_client2
== NULL
) {
243 * Client with given id is not connected
248 if (iter_client2
->node_id
== client
->node_id
) {
249 if (client_leaving
) {
253 membership_node_list2
= membership_node_list
;
256 membership_node_list2
= &iter_client2
->last_membership_node_list
;
257 ring_id2
= &iter_client2
->last_ring_id
;
263 if (!tlv_ring_id_eq(ring_id1
, ring_id2
)) {
268 * Now compare that membership node list equals, so walk thru all
271 TAILQ_FOREACH(iter_node3
, membership_node_list1
, entries
) {
273 * ... and try to find given node id in other membership node list
275 iter_node4
= node_list_find_node_id(membership_node_list2
, iter_node3
->node_id
);
277 if (iter_node4
== NULL
) {
279 * Node with iter_node3->node_id was not found in
280 * membership_node_list2 -> lists doesn't match
292 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(const struct qnetd_client
*client
,
293 const struct node_list
*client_membership_node_list
, enum tlv_heuristics client_heuristics
,
294 size_t *no_clients
, size_t *no_heuristics_pass
, size_t *no_heuristics_fail
)
296 const struct node_list_entry
*iter_node
;
297 const struct qnetd_client
*iter_client
;
298 enum tlv_heuristics iter_heuristics
;
301 *no_heuristics_pass
= 0;
302 *no_heuristics_fail
= 0;
304 if (client
== NULL
|| client_membership_node_list
== NULL
) {
308 TAILQ_FOREACH(iter_node
, client_membership_node_list
, entries
) {
309 iter_client
= qnetd_cluster_find_client_by_node_id(client
->cluster
,
311 if (iter_client
!= NULL
) {
314 if (iter_client
== client
) {
315 iter_heuristics
= client_heuristics
;
317 iter_heuristics
= iter_client
->last_heuristics
;
320 if (iter_heuristics
== TLV_HEURISTICS_PASS
) {
321 (*no_heuristics_pass
)++;
322 } else if (iter_heuristics
== TLV_HEURISTICS_FAIL
) {
323 (*no_heuristics_fail
)++;
330 * Compares two partitions. Return 1 if client1, config_node_list1, membership_node_list1 is
331 * "better" than client2, config_node_list2, membership_node_list2
334 qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client
*client1
,
335 const struct node_list
*config_node_list1
, const struct node_list
*membership_node_list1
,
336 enum tlv_heuristics heuristics_1
,
337 const struct qnetd_client
*client2
,
338 const struct node_list
*config_node_list2
, const struct node_list
*membership_node_list2
,
339 enum tlv_heuristics heuristics_2
)
341 size_t part1_active_clients
, part2_active_clients
;
342 size_t part1_no_heuristics_pass
, part2_no_heuristics_pass
;
343 size_t part1_no_heuristics_fail
, part2_no_heuristics_fail
;
344 size_t part1_score
, part2_score
;
350 if (node_list_size(config_node_list1
) % 2 != 0) {
352 * Odd clusters never split into 50:50.
354 if (node_list_size(membership_node_list1
) > node_list_size(config_node_list1
) / 2) {
355 res
= 1; goto exit_res
;
357 res
= 0; goto exit_res
;
360 if (node_list_size(membership_node_list1
) > node_list_size(config_node_list1
) / 2) {
361 res
= 1; goto exit_res
;
362 } else if (node_list_size(membership_node_list1
) < node_list_size(config_node_list1
) / 2) {
363 res
= 0; goto exit_res
;
371 * Check how many active clients are in partitions and heuristics results
373 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client1
,
374 membership_node_list1
, heuristics_1
, &part1_active_clients
,
375 &part1_no_heuristics_pass
, &part1_no_heuristics_fail
);
376 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client2
,
377 membership_node_list2
, heuristics_2
, &part2_active_clients
,
378 &part2_no_heuristics_pass
, &part2_no_heuristics_fail
);
381 * Partition can contain clients with one of 4 states:
382 * 1. Not-connected to qnetd (D)
383 * 2. Disabled heuristics (U)
384 * 3. Enabled heuristics with pass result (P)
385 * 4. Enabled heuristics with fail result (F)
387 * The question is, what partition should get vote is kind of hard with
388 * so much states. Following simple "score" seems to be good enough, but may
389 * be suboptimal in some cases. As and example let's say there are
390 * 2 partitions with 4 nodes each. Partition 1 looks like PDDD and partition 2 looks
391 * like FUUU. Partition 1 score is 1 + (1 - 0), partition 2 score is 4 + (0 - 1).
392 * Partition 2 wins eventho there is one processor with failed heuristics.
394 part1_score
= part1_active_clients
+ (part1_no_heuristics_pass
- part1_no_heuristics_fail
);
395 part2_score
= part2_active_clients
+ (part2_no_heuristics_pass
- part2_no_heuristics_fail
);
397 if (part1_score
> part2_score
) {
398 res
= 1; goto exit_res
;
399 } else if (part1_score
< part2_score
) {
400 res
= 0; goto exit_res
;
403 if (part1_active_clients
> part2_active_clients
) {
404 res
= 1; goto exit_res
;
405 } else if (part1_active_clients
< part2_active_clients
) {
406 res
= 0; goto exit_res
;
410 * Number of active clients in both partitions equals. Use tie-breaker.
413 if (qnetd_algo_ffsplit_is_preferred_partition(client1
, config_node_list1
,
414 membership_node_list1
)) {
415 res
= 1; goto exit_res
;
417 res
= 0; goto exit_res
;
423 log(LOG_CRIT
, "qnetd_algo_ffsplit_partition_cmp unhandled case");
432 * Select best partition for given client->cluster.
433 * If there is no partition which could become quorate, NULL is returned
435 static const struct node_list
*
436 qnetd_algo_ffsplit_select_partition(const struct qnetd_client
*client
, int client_leaving
,
437 const struct node_list
*config_node_list
, const struct node_list
*membership_node_list
,
438 enum tlv_heuristics client_heuristics
)
440 const struct qnetd_client
*iter_client
;
441 const struct qnetd_client
*best_client
;
442 const struct node_list
*best_config_node_list
, *best_membership_node_list
;
443 const struct node_list
*iter_config_node_list
, *iter_membership_node_list
;
444 enum tlv_heuristics iter_heuristics
, best_heuristics
;
447 best_config_node_list
= best_membership_node_list
= NULL
;
448 best_heuristics
= TLV_HEURISTICS_UNDEFINED
;
453 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
454 if (iter_client
->node_id
== client
->node_id
) {
455 if (client_leaving
) {
459 iter_config_node_list
= config_node_list
;
460 iter_membership_node_list
= membership_node_list
;
461 iter_heuristics
= client_heuristics
;
463 iter_config_node_list
= &iter_client
->configuration_node_list
;
464 iter_membership_node_list
= &iter_client
->last_membership_node_list
;
465 iter_heuristics
= iter_client
->last_heuristics
;
468 if (qnetd_algo_ffsplit_partition_cmp(iter_client
, iter_config_node_list
,
469 iter_membership_node_list
, iter_heuristics
, best_client
, best_config_node_list
,
470 best_membership_node_list
, best_heuristics
) > 0) {
471 best_client
= iter_client
;
472 best_config_node_list
= iter_config_node_list
;
473 best_membership_node_list
= iter_membership_node_list
;
474 best_heuristics
= iter_heuristics
;
478 return (best_membership_node_list
);
482 * Update state of all nodes to match quorate_partition_node_list
485 qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client
*client
, int client_leaving
,
486 const struct node_list
*quorate_partition_node_list
)
488 const struct qnetd_client
*iter_client
;
489 struct qnetd_algo_ffsplit_client_data
*iter_client_data
;
491 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
492 iter_client_data
= (struct qnetd_algo_ffsplit_client_data
*)iter_client
->algorithm_data
;
494 if (iter_client
->node_id
== client
->node_id
&& client_leaving
) {
495 iter_client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
;
500 if (quorate_partition_node_list
== NULL
||
501 node_list_find_node_id(quorate_partition_node_list
, iter_client
->node_id
) == NULL
) {
502 iter_client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
;
504 iter_client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
;
510 * Send vote info. If client_leaving is set, client is ignored. if send_acks
511 * is set, only ACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state),
512 * otherwise only NACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state)
514 * Returns number of send votes
517 qnetd_algo_ffsplit_send_votes(struct qnetd_client
*client
, int client_leaving
,
518 const struct tlv_ring_id
*ring_id
, int send_acks
)
521 struct qnetd_client
*iter_client
;
522 struct qnetd_algo_ffsplit_client_data
*iter_client_data
;
523 const struct tlv_ring_id
*ring_id_to_send
;
524 enum tlv_vote vote_to_send
;
528 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
529 if (iter_client
->node_id
== client
->node_id
) {
530 if (client_leaving
) {
534 ring_id_to_send
= ring_id
;
536 ring_id_to_send
= &iter_client
->last_ring_id
;
539 iter_client_data
= (struct qnetd_algo_ffsplit_client_data
*)iter_client
->algorithm_data
;
540 vote_to_send
= TLV_VOTE_UNDEFINED
;
543 if (iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
) {
544 vote_to_send
= TLV_VOTE_ACK
;
547 if (iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
) {
548 vote_to_send
= TLV_VOTE_NACK
;
552 if (vote_to_send
!= TLV_VOTE_UNDEFINED
) {
553 iter_client_data
->vote_info_expected_seq_num
++;
556 if (qnetd_client_send_vote_info(iter_client
,
557 iter_client_data
->vote_info_expected_seq_num
, ring_id_to_send
,
558 vote_to_send
) == -1) {
559 client
->schedule_disconnect
= 1;
568 * Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is
569 * set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is
573 qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client
*client
, int sending_acks
)
576 struct qnetd_client
*iter_client
;
577 struct qnetd_algo_ffsplit_client_data
*iter_client_data
;
581 TAILQ_FOREACH(iter_client
, &client
->cluster
->client_list
, cluster_entries
) {
582 iter_client_data
= (struct qnetd_algo_ffsplit_client_data
*)iter_client
->algorithm_data
;
585 iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK
) {
590 iter_client_data
->client_state
== QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK
) {
599 qnetd_algo_ffsplit_do(struct qnetd_client
*client
, int client_leaving
,
600 const struct tlv_ring_id
*ring_id
, const struct node_list
*config_node_list
,
601 const struct node_list
*membership_node_list
, enum tlv_heuristics client_heuristics
)
603 struct qnetd_algo_ffsplit_cluster_data
*cluster_data
;
604 const struct node_list
*quorate_partition_node_list
;
606 cluster_data
= (struct qnetd_algo_ffsplit_cluster_data
*)client
->cluster
->algorithm_data
;
608 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP
;
610 if (!qnetd_algo_ffsplit_is_membership_stable(client
, client_leaving
,
611 ring_id
, config_node_list
, membership_node_list
)) {
613 * Wait until membership is stable
615 log(LOG_DEBUG
, "ffsplit: Membership for cluster %s is not yet stable", client
->cluster_name
);
617 return (TLV_VOTE_WAIT_FOR_REPLY
);
620 log(LOG_DEBUG
, "ffsplit: Membership for cluster %s is now stable", client
->cluster_name
);
622 quorate_partition_node_list
= qnetd_algo_ffsplit_select_partition(client
, client_leaving
,
623 config_node_list
, membership_node_list
, client_heuristics
);
624 cluster_data
->quorate_partition_node_list
= quorate_partition_node_list
;
626 if (quorate_partition_node_list
== NULL
) {
627 log(LOG_DEBUG
, "ffsplit: No quorate partition was selected");
629 log(LOG_DEBUG
, "ffsplit: Quorate partition selected");
630 qnetd_log_debug_dump_node_list(client
, quorate_partition_node_list
);
633 qnetd_algo_ffsplit_update_nodes_state(client
, client_leaving
, quorate_partition_node_list
);
635 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
;
637 if (qnetd_algo_ffsplit_send_votes(client
, client_leaving
, ring_id
, 0) == 0) {
638 log(LOG_DEBUG
, "ffsplit: No client gets NACK");
640 * No one gets nack -> send acks
642 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
;
644 if (qnetd_algo_ffsplit_send_votes(client
, client_leaving
, ring_id
, 1) == 0) {
645 log(LOG_DEBUG
, "ffsplit: No client gets ACK");
647 * No one gets acks -> finished
649 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
653 return (TLV_VOTE_NO_CHANGE
);
656 enum tlv_reply_error_code
657 qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client
*client
,
658 uint32_t msg_seq_num
, int config_version_set
, uint64_t config_version
,
659 const struct node_list
*nodes
, int initial
, enum tlv_vote
*result_vote
)
662 if (node_list_size(nodes
) == 0) {
664 * Empty node list shouldn't happen
666 log(LOG_ERR
, "ffsplit: Received empty config node list for client %s",
669 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST
);
672 if (node_list_find_node_id(nodes
, client
->node_id
) == NULL
) {
674 * Current node is not in node list
676 log(LOG_ERR
, "ffsplit: Received config node list without client %s",
679 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST
);
682 if (initial
|| node_list_size(&client
->last_membership_node_list
) == 0) {
684 * Initial node list -> membership is going to be send by client
686 *result_vote
= TLV_VOTE_ASK_LATER
;
688 *result_vote
= qnetd_algo_ffsplit_do(client
, 0, &client
->last_ring_id
,
689 nodes
, &client
->last_membership_node_list
, client
->last_heuristics
);
692 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
696 * Called after client sent membership node list.
697 * All client fields are already set. Nodes is actual node list.
698 * msg_seq_num is 32-bit number set by client. If client sent config file version,
699 * config_version_set is set to 1 and config_version contains valid config file version.
700 * ring_id and quorate are copied from client votequorum callback.
702 * Function has to return result_vote. This can be one of ack/nack, ask_later (client
703 * should ask later for a vote) or wait_for_reply (client should wait for reply).
705 * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
706 * on failure (error is send back to client)
709 enum tlv_reply_error_code
710 qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client
*client
,
711 uint32_t msg_seq_num
, const struct tlv_ring_id
*ring_id
,
712 const struct node_list
*nodes
, enum tlv_heuristics heuristics
, enum tlv_vote
*result_vote
)
715 if (node_list_size(nodes
) == 0) {
717 * Empty node list shouldn't happen
719 log(LOG_ERR
, "ffsplit: Received empty membership node list for client %s",
722 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST
);
725 if (node_list_find_node_id(nodes
, client
->node_id
) == NULL
) {
727 * Current node is not in node list
729 log(LOG_ERR
, "ffsplit: Received membership node list without client %s",
732 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST
);
735 if (node_list_size(&client
->configuration_node_list
) == 0) {
737 * Config node list not received -> it's going to be sent later
739 *result_vote
= TLV_VOTE_ASK_LATER
;
741 *result_vote
= qnetd_algo_ffsplit_do(client
, 0, ring_id
,
742 &client
->configuration_node_list
, nodes
, heuristics
);
745 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
748 enum tlv_reply_error_code
749 qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client
*client
,
750 uint32_t msg_seq_num
, enum tlv_quorate quorate
, const struct node_list
*nodes
,
751 enum tlv_vote
*result_vote
)
755 * Quorum node list is informative -> no change
757 *result_vote
= TLV_VOTE_NO_CHANGE
;
759 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
763 qnetd_algo_ffsplit_client_disconnect(struct qnetd_client
*client
, int server_going_down
)
766 (void)qnetd_algo_ffsplit_do(client
, 1, &client
->last_ring_id
,
767 &client
->configuration_node_list
, &client
->last_membership_node_list
,
768 client
->last_heuristics
);
770 free(client
->algorithm_data
);
772 if (qnetd_cluster_size(client
->cluster
) == 1) {
774 * Last client in the cluster
776 free(client
->cluster
->algorithm_data
);
780 enum tlv_reply_error_code
781 qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client
*client
, uint32_t msg_seq_num
,
782 enum tlv_vote
*result_vote
)
786 * Ask for vote is not supported in current algorithm
788 return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE
);
791 enum tlv_reply_error_code
792 qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client
*client
, uint32_t msg_seq_num
)
794 struct qnetd_algo_ffsplit_cluster_data
*cluster_data
;
795 struct qnetd_algo_ffsplit_client_data
*client_data
;
797 cluster_data
= (struct qnetd_algo_ffsplit_cluster_data
*)client
->cluster
->algorithm_data
;
798 client_data
= (struct qnetd_algo_ffsplit_client_data
*)client
->algorithm_data
;
800 if (client_data
->vote_info_expected_seq_num
!= msg_seq_num
) {
801 log(LOG_DEBUG
, "ffsplit: Received old vote info reply from client %s",
804 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
807 client_data
->client_state
= QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE
;
809 if (cluster_data
->cluster_state
!= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
&&
810 cluster_data
->cluster_state
!= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
) {
811 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
814 if (cluster_data
->cluster_state
== QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS
) {
815 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client
, 0) == 0) {
816 log(LOG_DEBUG
, "ffsplit: All NACK votes sent for cluster %s",
817 client
->cluster_name
);
819 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS
;
821 if (qnetd_algo_ffsplit_send_votes(client
, 0, &client
->last_ring_id
, 1) == 0) {
822 log(LOG_DEBUG
, "ffsplit: No client gets ACK");
824 * No one gets acks -> finished
826 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
830 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client
, 1) == 0) {
831 log(LOG_DEBUG
, "ffsplit: All ACK votes sent for cluster %s",
832 client
->cluster_name
);
834 cluster_data
->cluster_state
= QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE
;
838 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
841 enum tlv_reply_error_code
842 qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client
*client
, uint32_t msg_seq_num
,
843 enum tlv_heuristics heuristics
, enum tlv_vote
*result_vote
)
846 if (node_list_size(&client
->configuration_node_list
) == 0 ||
847 node_list_size(&client
->last_membership_node_list
) == 0) {
849 * Config or membership node list not received -> it's going to be sent later
851 *result_vote
= TLV_VOTE_ASK_LATER
;
853 *result_vote
= qnetd_algo_ffsplit_do(client
, 0, &client
->last_ring_id
,
854 &client
->configuration_node_list
, &client
->last_membership_node_list
,
858 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
861 enum tlv_reply_error_code
862 qnetd_algo_ffsplit_timer_callback(struct qnetd_client
*client
, int *reschedule_timer
,
863 int *send_vote
, enum tlv_vote
*result_vote
)
866 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
869 static struct qnetd_algorithm qnetd_algo_ffsplit
= {
870 .init
= qnetd_algo_ffsplit_client_init
,
871 .config_node_list_received
= qnetd_algo_ffsplit_config_node_list_received
,
872 .membership_node_list_received
= qnetd_algo_ffsplit_membership_node_list_received
,
873 .quorum_node_list_received
= qnetd_algo_ffsplit_quorum_node_list_received
,
874 .client_disconnect
= qnetd_algo_ffsplit_client_disconnect
,
875 .ask_for_vote_received
= qnetd_algo_ffsplit_ask_for_vote_received
,
876 .vote_info_reply_received
= qnetd_algo_ffsplit_vote_info_reply_received
,
877 .heuristics_change_received
= qnetd_algo_ffsplit_heuristics_change_received
,
878 .timer_callback
= qnetd_algo_ffsplit_timer_callback
,
881 enum tlv_reply_error_code
qnetd_algo_ffsplit_register()
884 return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT
, &qnetd_algo_ffsplit
));