]> git.proxmox.com Git - mirror_corosync-qdevice.git/blob - qdevices/qnetd-algo-ffsplit.c
qnetd: Rename qnetd-log.c to log.c
[mirror_corosync-qdevice.git] / qdevices / qnetd-algo-ffsplit.c
1 /*
2 * Copyright (c) 2015-2019 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Author: Jan Friesse (jfriesse@redhat.com)
7 *
8 * This software licensed under BSD license, the text of which follows:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <sys/types.h>
36
37 #include <string.h>
38
39 #include "log.h"
40 #include "qnetd-algo-ffsplit.h"
41 #include "qnetd-log-debug.h"
42 #include "qnetd-cluster-list.h"
43 #include "qnetd-cluster.h"
44 #include "qnetd-client-send.h"
45
46 enum qnetd_algo_ffsplit_cluster_state {
47 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE,
48 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP,
49 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS,
50 QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS,
51 };
52
53 struct qnetd_algo_ffsplit_cluster_data {
54 enum qnetd_algo_ffsplit_cluster_state cluster_state;
55 const struct node_list *quorate_partition_node_list;
56 };
57
58 enum qnetd_algo_ffsplit_client_state {
59 QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE,
60 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK,
61 QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK,
62 };
63
64 struct qnetd_algo_ffsplit_client_data {
65 enum qnetd_algo_ffsplit_client_state client_state;
66 uint32_t vote_info_expected_seq_num;
67 };
68
69 enum tlv_reply_error_code
70 qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
71 {
72 struct qnetd_algo_ffsplit_cluster_data *cluster_data;
73 struct qnetd_algo_ffsplit_client_data *client_data;
74
75 if (qnetd_cluster_size(client->cluster) == 1) {
76 cluster_data = malloc(sizeof(*cluster_data));
77 if (cluster_data == NULL) {
78 log(LOG_ERR, "ffsplit: Can't initialize cluster data for client %s",
79 client->addr_str);
80
81 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
82 }
83 memset(cluster_data, 0, sizeof(*cluster_data));
84 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
85 cluster_data->quorate_partition_node_list = NULL;
86
87 client->cluster->algorithm_data = cluster_data;
88 }
89
90 client_data = malloc(sizeof(*client_data));
91 if (client_data == NULL) {
92 log(LOG_ERR, "ffsplit: Can't initialize node data for client %s",
93 client->addr_str);
94
95 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
96 }
97 memset(client_data, 0, sizeof(*client_data));
98 client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
99 client->algorithm_data = client_data;
100
101 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
102 }
103
104 static int
105 qnetd_algo_ffsplit_is_preferred_partition(const struct qnetd_client *client,
106 const struct node_list *config_node_list, const struct node_list *membership_node_list)
107 {
108 uint32_t preferred_node_id;
109 struct node_list_entry *node_entry;
110 int case_processed;
111
112 preferred_node_id = 0;
113 case_processed = 0;
114
115 switch (client->tie_breaker.mode) {
116 case TLV_TIE_BREAKER_MODE_LOWEST:
117 node_entry = TAILQ_FIRST(config_node_list);
118
119 preferred_node_id = node_entry->node_id;
120
121 TAILQ_FOREACH(node_entry, config_node_list, entries) {
122 if (node_entry->node_id < preferred_node_id) {
123 preferred_node_id = node_entry->node_id;
124 }
125 }
126 case_processed = 1;
127 break;
128 case TLV_TIE_BREAKER_MODE_HIGHEST:
129 node_entry = TAILQ_FIRST(config_node_list);
130
131 preferred_node_id = node_entry->node_id;
132
133 TAILQ_FOREACH(node_entry, config_node_list, entries) {
134 if (node_entry->node_id > preferred_node_id) {
135 preferred_node_id = node_entry->node_id;
136 }
137 }
138 case_processed = 1;
139 break;
140 case TLV_TIE_BREAKER_MODE_NODE_ID:
141 preferred_node_id = client->tie_breaker.node_id;
142 case_processed = 1;
143 break;
144 }
145
146 if (!case_processed) {
147 log(LOG_CRIT, "qnetd_algo_ffsplit_is_preferred_partition unprocessed "
148 "tie_breaker.mode");
149 exit(1);
150 }
151
152 return (node_list_find_node_id(membership_node_list, preferred_node_id) != NULL);
153 }
154
155 static int
156 qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client *client, int client_leaving,
157 const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
158 const struct node_list *membership_node_list)
159 {
160 const struct qnetd_client *iter_client1, *iter_client2;
161 const struct node_list *config_node_list1, *config_node_list2;
162 const struct node_list *membership_node_list1, *membership_node_list2;
163 const struct node_list_entry *iter_node1, *iter_node2;
164 const struct node_list_entry *iter_node3, *iter_node4;
165 const struct tlv_ring_id *ring_id1, *ring_id2;
166
167 /*
168 * Test if all active clients share same config list.
169 */
170 TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
171 TAILQ_FOREACH(iter_client2, &client->cluster->client_list, cluster_entries) {
172 if (iter_client1 == iter_client2) {
173 continue;
174 }
175
176 if (iter_client1->node_id == client->node_id) {
177 if (client_leaving) {
178 continue;
179 }
180
181 config_node_list1 = config_node_list;
182 } else {
183 config_node_list1 = &iter_client1->configuration_node_list;
184 }
185
186 if (iter_client2->node_id == client->node_id) {
187 if (client_leaving) {
188 continue;
189 }
190
191 config_node_list2 = config_node_list;
192 } else {
193 config_node_list2 = &iter_client2->configuration_node_list;
194 }
195
196 /*
197 * Walk thru all node ids in given config node list...
198 */
199 TAILQ_FOREACH(iter_node1, config_node_list1, entries) {
200 /*
201 * ... and try to find given node id in other list
202 */
203 iter_node2 = node_list_find_node_id(config_node_list2, iter_node1->node_id);
204
205 if (iter_node2 == NULL) {
206 /*
207 * Node with iter_node1->node_id was not found in
208 * config_node_list2 -> lists doesn't match
209 */
210 return (0);
211 }
212 }
213 }
214 }
215
216 /*
217 * Test if same partitions share same ring ids and membership node list
218 */
219 TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
220 if (iter_client1->node_id == client->node_id) {
221 if (client_leaving) {
222 continue;
223 }
224
225 membership_node_list1 = membership_node_list;
226 ring_id1 = ring_id;
227 } else {
228 membership_node_list1 = &iter_client1->last_membership_node_list;
229 ring_id1 = &iter_client1->last_ring_id;
230 }
231
232 /*
233 * Walk thru all memberships nodes
234 */
235 TAILQ_FOREACH(iter_node1, membership_node_list1, entries) {
236 /*
237 * try to find client with given node id
238 */
239 iter_client2 = qnetd_cluster_find_client_by_node_id(client->cluster,
240 iter_node1->node_id);
241 if (iter_client2 == NULL) {
242 /*
243 * Client with given id is not connected
244 */
245 continue;
246 }
247
248 if (iter_client2->node_id == client->node_id) {
249 if (client_leaving) {
250 continue;
251 }
252
253 membership_node_list2 = membership_node_list;
254 ring_id2 = ring_id;
255 } else {
256 membership_node_list2 = &iter_client2->last_membership_node_list;
257 ring_id2 = &iter_client2->last_ring_id;
258 }
259
260 /*
261 * Compare ring ids
262 */
263 if (!tlv_ring_id_eq(ring_id1, ring_id2)) {
264 return (0);
265 }
266
267 /*
268 * Now compare that membership node list equals, so walk thru all
269 * members ...
270 */
271 TAILQ_FOREACH(iter_node3, membership_node_list1, entries) {
272 /*
273 * ... and try to find given node id in other membership node list
274 */
275 iter_node4 = node_list_find_node_id(membership_node_list2, iter_node3->node_id);
276
277 if (iter_node4 == NULL) {
278 /*
279 * Node with iter_node3->node_id was not found in
280 * membership_node_list2 -> lists doesn't match
281 */
282 return (0);
283 }
284 }
285 }
286 }
287
288 return (1);
289 }
290
291 static void
292 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(const struct qnetd_client *client,
293 const struct node_list *client_membership_node_list, enum tlv_heuristics client_heuristics,
294 size_t *no_clients, size_t *no_heuristics_pass, size_t *no_heuristics_fail)
295 {
296 const struct node_list_entry *iter_node;
297 const struct qnetd_client *iter_client;
298 enum tlv_heuristics iter_heuristics;
299
300 *no_clients = 0;
301 *no_heuristics_pass = 0;
302 *no_heuristics_fail = 0;
303
304 if (client == NULL || client_membership_node_list == NULL) {
305 return ;
306 }
307
308 TAILQ_FOREACH(iter_node, client_membership_node_list, entries) {
309 iter_client = qnetd_cluster_find_client_by_node_id(client->cluster,
310 iter_node->node_id);
311 if (iter_client != NULL) {
312 (*no_clients)++;
313
314 if (iter_client == client) {
315 iter_heuristics = client_heuristics;
316 } else {
317 iter_heuristics = iter_client->last_heuristics;
318 }
319
320 if (iter_heuristics == TLV_HEURISTICS_PASS) {
321 (*no_heuristics_pass)++;
322 } else if (iter_heuristics == TLV_HEURISTICS_FAIL) {
323 (*no_heuristics_fail)++;
324 }
325 }
326 }
327 }
328
329 /*
330 * Compares two partitions. Return 1 if client1, config_node_list1, membership_node_list1 is
331 * "better" than client2, config_node_list2, membership_node_list2
332 */
333 static int
334 qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
335 const struct node_list *config_node_list1, const struct node_list *membership_node_list1,
336 enum tlv_heuristics heuristics_1,
337 const struct qnetd_client *client2,
338 const struct node_list *config_node_list2, const struct node_list *membership_node_list2,
339 enum tlv_heuristics heuristics_2)
340 {
341 size_t part1_active_clients, part2_active_clients;
342 size_t part1_no_heuristics_pass, part2_no_heuristics_pass;
343 size_t part1_no_heuristics_fail, part2_no_heuristics_fail;
344 size_t part1_score, part2_score;
345
346 int res;
347
348 res = -1;
349
350 if (node_list_size(config_node_list1) % 2 != 0) {
351 /*
352 * Odd clusters never split into 50:50.
353 */
354 if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
355 res = 1; goto exit_res;
356 } else {
357 res = 0; goto exit_res;
358 }
359 } else {
360 if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
361 res = 1; goto exit_res;
362 } else if (node_list_size(membership_node_list1) < node_list_size(config_node_list1) / 2) {
363 res = 0; goto exit_res;
364 }
365
366 /*
367 * 50:50 split
368 */
369
370 /*
371 * Check how many active clients are in partitions and heuristics results
372 */
373 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client1,
374 membership_node_list1, heuristics_1, &part1_active_clients,
375 &part1_no_heuristics_pass, &part1_no_heuristics_fail);
376 qnetd_algo_ffsplit_get_active_clients_in_partition_stats(client2,
377 membership_node_list2, heuristics_2, &part2_active_clients,
378 &part2_no_heuristics_pass, &part2_no_heuristics_fail);
379
380 /*
381 * Partition can contain clients with one of 4 states:
382 * 1. Not-connected to qnetd (D)
383 * 2. Disabled heuristics (U)
384 * 3. Enabled heuristics with pass result (P)
385 * 4. Enabled heuristics with fail result (F)
386 *
387 * The question is, what partition should get vote is kind of hard with
388 * so much states. Following simple "score" seems to be good enough, but may
389 * be suboptimal in some cases. As and example let's say there are
390 * 2 partitions with 4 nodes each. Partition 1 looks like PDDD and partition 2 looks
391 * like FUUU. Partition 1 score is 1 + (1 - 0), partition 2 score is 4 + (0 - 1).
392 * Partition 2 wins eventho there is one processor with failed heuristics.
393 */
394 part1_score = part1_active_clients + (part1_no_heuristics_pass - part1_no_heuristics_fail);
395 part2_score = part2_active_clients + (part2_no_heuristics_pass - part2_no_heuristics_fail);
396
397 if (part1_score > part2_score) {
398 res = 1; goto exit_res;
399 } else if (part1_score < part2_score) {
400 res = 0; goto exit_res;
401 }
402
403 if (part1_active_clients > part2_active_clients) {
404 res = 1; goto exit_res;
405 } else if (part1_active_clients < part2_active_clients) {
406 res = 0; goto exit_res;
407 }
408
409 /*
410 * Number of active clients in both partitions equals. Use tie-breaker.
411 */
412
413 if (qnetd_algo_ffsplit_is_preferred_partition(client1, config_node_list1,
414 membership_node_list1)) {
415 res = 1; goto exit_res;
416 } else {
417 res = 0; goto exit_res;
418 }
419 }
420
421 exit_res:
422 if (res == -1) {
423 log(LOG_CRIT, "qnetd_algo_ffsplit_partition_cmp unhandled case");
424 exit(1);
425 /* NOTREACHED */
426 }
427
428 return (res);
429 }
430
431 /*
432 * Select best partition for given client->cluster.
433 * If there is no partition which could become quorate, NULL is returned
434 */
435 static const struct node_list *
436 qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving,
437 const struct node_list *config_node_list, const struct node_list *membership_node_list,
438 enum tlv_heuristics client_heuristics)
439 {
440 const struct qnetd_client *iter_client;
441 const struct qnetd_client *best_client;
442 const struct node_list *best_config_node_list, *best_membership_node_list;
443 const struct node_list *iter_config_node_list, *iter_membership_node_list;
444 enum tlv_heuristics iter_heuristics, best_heuristics;
445
446 best_client = NULL;
447 best_config_node_list = best_membership_node_list = NULL;
448 best_heuristics = TLV_HEURISTICS_UNDEFINED;
449
450 /*
451 * Get highest score
452 */
453 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
454 if (iter_client->node_id == client->node_id) {
455 if (client_leaving) {
456 continue;
457 }
458
459 iter_config_node_list = config_node_list;
460 iter_membership_node_list = membership_node_list;
461 iter_heuristics = client_heuristics;
462 } else {
463 iter_config_node_list = &iter_client->configuration_node_list;
464 iter_membership_node_list = &iter_client->last_membership_node_list;
465 iter_heuristics = iter_client->last_heuristics;
466 }
467
468 if (qnetd_algo_ffsplit_partition_cmp(iter_client, iter_config_node_list,
469 iter_membership_node_list, iter_heuristics, best_client, best_config_node_list,
470 best_membership_node_list, best_heuristics) > 0) {
471 best_client = iter_client;
472 best_config_node_list = iter_config_node_list;
473 best_membership_node_list = iter_membership_node_list;
474 best_heuristics = iter_heuristics;
475 }
476 }
477
478 return (best_membership_node_list);
479 }
480
481 /*
482 * Update state of all nodes to match quorate_partition_node_list
483 */
484 static void
485 qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client *client, int client_leaving,
486 const struct node_list *quorate_partition_node_list)
487 {
488 const struct qnetd_client *iter_client;
489 struct qnetd_algo_ffsplit_client_data *iter_client_data;
490
491 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
492 iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
493
494 if (iter_client->node_id == client->node_id && client_leaving) {
495 iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
496
497 continue;
498 }
499
500 if (quorate_partition_node_list == NULL ||
501 node_list_find_node_id(quorate_partition_node_list, iter_client->node_id) == NULL) {
502 iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK;
503 } else {
504 iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK;
505 }
506 }
507 }
508
509 /*
510 * Send vote info. If client_leaving is set, client is ignored. if send_acks
511 * is set, only ACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state),
512 * otherwise only NACK votes are sent (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state)
513 *
514 * Returns number of send votes
515 */
516 static size_t
517 qnetd_algo_ffsplit_send_votes(struct qnetd_client *client, int client_leaving,
518 const struct tlv_ring_id *ring_id, int send_acks)
519 {
520 size_t sent_votes;
521 struct qnetd_client *iter_client;
522 struct qnetd_algo_ffsplit_client_data *iter_client_data;
523 const struct tlv_ring_id *ring_id_to_send;
524 enum tlv_vote vote_to_send;
525
526 sent_votes = 0;
527
528 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
529 if (iter_client->node_id == client->node_id) {
530 if (client_leaving) {
531 continue;
532 }
533
534 ring_id_to_send = ring_id;
535 } else {
536 ring_id_to_send = &iter_client->last_ring_id;
537 }
538
539 iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
540 vote_to_send = TLV_VOTE_UNDEFINED;
541
542 if (send_acks) {
543 if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
544 vote_to_send = TLV_VOTE_ACK;
545 }
546 } else {
547 if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
548 vote_to_send = TLV_VOTE_NACK;
549 }
550 }
551
552 if (vote_to_send != TLV_VOTE_UNDEFINED) {
553 iter_client_data->vote_info_expected_seq_num++;
554 sent_votes++;
555
556 if (qnetd_client_send_vote_info(iter_client,
557 iter_client_data->vote_info_expected_seq_num, ring_id_to_send,
558 vote_to_send) == -1) {
559 client->schedule_disconnect = 1;
560 }
561 }
562 }
563
564 return (sent_votes);
565 }
566
567 /*
568 * Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is
569 * set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is
570 * not set
571 */
572 static size_t
573 qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int sending_acks)
574 {
575 size_t no_clients;
576 struct qnetd_client *iter_client;
577 struct qnetd_algo_ffsplit_client_data *iter_client_data;
578
579 no_clients = 0;
580
581 TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
582 iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
583
584 if (sending_acks &&
585 iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
586 no_clients++;
587 }
588
589 if (!sending_acks &&
590 iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
591 no_clients++;
592 }
593 }
594
595 return (no_clients);
596 }
597
598 static enum tlv_vote
599 qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
600 const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
601 const struct node_list *membership_node_list, enum tlv_heuristics client_heuristics)
602 {
603 struct qnetd_algo_ffsplit_cluster_data *cluster_data;
604 const struct node_list *quorate_partition_node_list;
605
606 cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
607
608 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP;
609
610 if (!qnetd_algo_ffsplit_is_membership_stable(client, client_leaving,
611 ring_id, config_node_list, membership_node_list)) {
612 /*
613 * Wait until membership is stable
614 */
615 log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name);
616
617 return (TLV_VOTE_WAIT_FOR_REPLY);
618 }
619
620 log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name);
621
622 quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving,
623 config_node_list, membership_node_list, client_heuristics);
624 cluster_data->quorate_partition_node_list = quorate_partition_node_list;
625
626 if (quorate_partition_node_list == NULL) {
627 log(LOG_DEBUG, "ffsplit: No quorate partition was selected");
628 } else {
629 log(LOG_DEBUG, "ffsplit: Quorate partition selected");
630 qnetd_log_debug_dump_node_list(client, quorate_partition_node_list);
631 }
632
633 qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list);
634
635 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS;
636
637 if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 0) == 0) {
638 log(LOG_DEBUG, "ffsplit: No client gets NACK");
639 /*
640 * No one gets nack -> send acks
641 */
642 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
643
644 if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 1) == 0) {
645 log(LOG_DEBUG, "ffsplit: No client gets ACK");
646 /*
647 * No one gets acks -> finished
648 */
649 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
650 }
651 }
652
653 return (TLV_VOTE_NO_CHANGE);
654 }
655
656 enum tlv_reply_error_code
657 qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
658 uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
659 const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
660 {
661
662 if (node_list_size(nodes) == 0) {
663 /*
664 * Empty node list shouldn't happen
665 */
666 log(LOG_ERR, "ffsplit: Received empty config node list for client %s",
667 client->addr_str);
668
669 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
670 }
671
672 if (node_list_find_node_id(nodes, client->node_id) == NULL) {
673 /*
674 * Current node is not in node list
675 */
676 log(LOG_ERR, "ffsplit: Received config node list without client %s",
677 client->addr_str);
678
679 return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
680 }
681
682 if (initial || node_list_size(&client->last_membership_node_list) == 0) {
683 /*
684 * Initial node list -> membership is going to be send by client
685 */
686 *result_vote = TLV_VOTE_ASK_LATER;
687 } else {
688 *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
689 nodes, &client->last_membership_node_list, client->last_heuristics);
690 }
691
692 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
693 }
694
695 /*
696 * Called after client sent membership node list.
697 * All client fields are already set. Nodes is actual node list.
698 * msg_seq_num is 32-bit number set by client. If client sent config file version,
699 * config_version_set is set to 1 and config_version contains valid config file version.
700 * ring_id and quorate are copied from client votequorum callback.
701 *
702 * Function has to return result_vote. This can be one of ack/nack, ask_later (client
703 * should ask later for a vote) or wait_for_reply (client should wait for reply).
704 *
705 * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
706 * on failure (error is send back to client)
707 */
708
709 enum tlv_reply_error_code
710 qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
711 uint32_t msg_seq_num, const struct tlv_ring_id *ring_id,
712 const struct node_list *nodes, enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
713 {
714
715 if (node_list_size(nodes) == 0) {
716 /*
717 * Empty node list shouldn't happen
718 */
719 log(LOG_ERR, "ffsplit: Received empty membership node list for client %s",
720 client->addr_str);
721
722 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
723 }
724
725 if (node_list_find_node_id(nodes, client->node_id) == NULL) {
726 /*
727 * Current node is not in node list
728 */
729 log(LOG_ERR, "ffsplit: Received membership node list without client %s",
730 client->addr_str);
731
732 return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
733 }
734
735 if (node_list_size(&client->configuration_node_list) == 0) {
736 /*
737 * Config node list not received -> it's going to be sent later
738 */
739 *result_vote = TLV_VOTE_ASK_LATER;
740 } else {
741 *result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id,
742 &client->configuration_node_list, nodes, heuristics);
743 }
744
745 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
746 }
747
748 enum tlv_reply_error_code
749 qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
750 uint32_t msg_seq_num, enum tlv_quorate quorate, const struct node_list *nodes,
751 enum tlv_vote *result_vote)
752 {
753
754 /*
755 * Quorum node list is informative -> no change
756 */
757 *result_vote = TLV_VOTE_NO_CHANGE;
758
759 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
760 }
761
762 void
763 qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
764 {
765
766 (void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id,
767 &client->configuration_node_list, &client->last_membership_node_list,
768 client->last_heuristics);
769
770 free(client->algorithm_data);
771
772 if (qnetd_cluster_size(client->cluster) == 1) {
773 /*
774 * Last client in the cluster
775 */
776 free(client->cluster->algorithm_data);
777 }
778 }
779
780 enum tlv_reply_error_code
781 qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client *client, uint32_t msg_seq_num,
782 enum tlv_vote *result_vote)
783 {
784
785 /*
786 * Ask for vote is not supported in current algorithm
787 */
788 return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE);
789 }
790
791 enum tlv_reply_error_code
792 qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client *client, uint32_t msg_seq_num)
793 {
794 struct qnetd_algo_ffsplit_cluster_data *cluster_data;
795 struct qnetd_algo_ffsplit_client_data *client_data;
796
797 cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
798 client_data = (struct qnetd_algo_ffsplit_client_data *)client->algorithm_data;
799
800 if (client_data->vote_info_expected_seq_num != msg_seq_num) {
801 log(LOG_DEBUG, "ffsplit: Received old vote info reply from client %s",
802 client->addr_str);
803
804 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
805 }
806
807 client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
808
809 if (cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS &&
810 cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS) {
811 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
812 }
813
814 if (cluster_data->cluster_state == QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS) {
815 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 0) == 0) {
816 log(LOG_DEBUG, "ffsplit: All NACK votes sent for cluster %s",
817 client->cluster_name);
818
819 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
820
821 if (qnetd_algo_ffsplit_send_votes(client, 0, &client->last_ring_id, 1) == 0) {
822 log(LOG_DEBUG, "ffsplit: No client gets ACK");
823 /*
824 * No one gets acks -> finished
825 */
826 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
827 }
828 }
829 } else {
830 if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 1) == 0) {
831 log(LOG_DEBUG, "ffsplit: All ACK votes sent for cluster %s",
832 client->cluster_name);
833
834 cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
835 }
836 }
837
838 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
839 }
840
841 enum tlv_reply_error_code
842 qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client *client, uint32_t msg_seq_num,
843 enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
844 {
845
846 if (node_list_size(&client->configuration_node_list) == 0 ||
847 node_list_size(&client->last_membership_node_list) == 0) {
848 /*
849 * Config or membership node list not received -> it's going to be sent later
850 */
851 *result_vote = TLV_VOTE_ASK_LATER;
852 } else {
853 *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
854 &client->configuration_node_list, &client->last_membership_node_list,
855 heuristics);
856 }
857
858 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
859 }
860
861 enum tlv_reply_error_code
862 qnetd_algo_ffsplit_timer_callback(struct qnetd_client *client, int *reschedule_timer,
863 int *send_vote, enum tlv_vote *result_vote)
864 {
865
866 return (TLV_REPLY_ERROR_CODE_NO_ERROR);
867 }
868
869 static struct qnetd_algorithm qnetd_algo_ffsplit = {
870 .init = qnetd_algo_ffsplit_client_init,
871 .config_node_list_received = qnetd_algo_ffsplit_config_node_list_received,
872 .membership_node_list_received = qnetd_algo_ffsplit_membership_node_list_received,
873 .quorum_node_list_received = qnetd_algo_ffsplit_quorum_node_list_received,
874 .client_disconnect = qnetd_algo_ffsplit_client_disconnect,
875 .ask_for_vote_received = qnetd_algo_ffsplit_ask_for_vote_received,
876 .vote_info_reply_received = qnetd_algo_ffsplit_vote_info_reply_received,
877 .heuristics_change_received = qnetd_algo_ffsplit_heuristics_change_received,
878 .timer_callback = qnetd_algo_ffsplit_timer_callback,
879 };
880
881 enum tlv_reply_error_code qnetd_algo_ffsplit_register()
882 {
883
884 return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT, &qnetd_algo_ffsplit));
885 }