]> git.proxmox.com Git - mirror_corosync-qdevice.git/commitdiff
qnetd: Add support for keep active partition vote
authorJan Friesse <jfriesse@redhat.com>
Tue, 22 Sep 2020 11:31:24 +0000 (13:31 +0200)
committerJan Friesse <jfriesse@redhat.com>
Tue, 22 Sep 2020 11:35:55 +0000 (13:35 +0200)
When tie happens prefer partition with members of
previously active (quorate) partition. This is hard-coded
behavior of LMS algorithm so this setting affects only
FFSplit algorithm. By default it is disabled for backwards
compatibility.

This solves problem with FFSplit when node A (with lowest id) is killed,
node B gets vote and then node A starts up and creates single node
membership and gets vote.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
man/corosync-qnetd.8
qdevices/qnet-config.h
qdevices/qnetd-advanced-settings.c
qdevices/qnetd-advanced-settings.h
qdevices/qnetd-algo-ffsplit.c
qdevices/qnetd-client-msg-received.c
qdevices/qnetd-client.h

index 3ae5531b69f54876ed0d08c73f4b0fe11f29810b..7a108e854bad9ac83163c9bd3e604fd7a56b908b 100644 (file)
@@ -31,7 +31,7 @@
 .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" * THE POSSIBILITY OF SUCH DAMAGE.
 .\" */
-.TH COROSYNC-QNETD 8 2020-09-15
+.TH COROSYNC-QNETD 8 2020-09-22
 .SH NAME
 corosync-qnetd \- QNet daemon
 .SH SYNOPSIS
@@ -236,6 +236,10 @@ Maximum size of a message received by IPC client. (4096)
 .TP
 .B ipc_max_send_size
 Maximum size of a message sent to an IPC client. (10485760)
+.TP
+.B keep_active_partition_tie_breaker
+When tie happens prefer partition with members of previously active (quorate) partition.
+This is hard-coded behavior of LMS algorithm so this setting affects only FFSplit algorithm. (off)
 .SH SEE ALSO
 .BR corosync-qnetd-tool (8)
 .BR corosync-qnetd-certutil (8)
index 3a9df193a6db6c5e4d60dbba2b02d119692554f8..81a479ecf826fa7020f5f167599f3a6202ad0eff 100644 (file)
@@ -85,6 +85,8 @@ extern "C" {
 #define QNETD_DEFAULT_IPC_MAX_SEND_SIZE                        (10*1024*1024)
 #define QNETD_MIN_IPC_RECEIVE_SEND_SIZE                        1024
 
+#define QNETD_DEFAULT_KEEP_ACTIVE_PARTITION_TIE_BREAKER        0
+
 #define QNETD_TOOL_PROGRAM_NAME                                "corosync-qnetd-tool"
 
 #define QDEVICE_NET_DEFAULT_NSS_DB_DIR                 COROSYSCONFDIR "/qdevice/net/nssdb"
index 04519f0a330a94b2e0cc14ae96a92c822763677e..313b3743513b79dacc67d56f60e99f086117f890 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016 Red Hat, Inc.
+ * Copyright (c) 2015-2020 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -75,6 +75,8 @@ qnetd_advanced_settings_init(struct qnetd_advanced_settings *settings)
        settings->ipc_max_receive_size = QNETD_DEFAULT_IPC_MAX_RECEIVE_SIZE;
        settings->ipc_max_send_size = QNETD_DEFAULT_IPC_MAX_SEND_SIZE;
 
+       settings->keep_active_partition_tie_breaker = QNETD_DEFAULT_KEEP_ACTIVE_PARTITION_TIE_BREAKER;
+
        return (0);
 }
 
@@ -195,6 +197,12 @@ qnetd_advanced_settings_set(struct qnetd_advanced_settings *settings,
                }
 
                settings->ipc_max_send_size = (size_t)tmpll;
+       } else if (strcasecmp(option, "keep_active_partition_tie_breaker") == 0) {
+               if ((tmpll = utils_parse_bool_str(value)) == -1) {
+                       return (-2);
+               }
+
+               settings->keep_active_partition_tie_breaker = (uint8_t)tmpll;
        } else {
                return (-1);
        }
index b4ad0ae467a7c9cec6d89ebc45057ca4798e6071..835f46f4f7db0eeac8ca2cc9ef9911afb5ee61f2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016 Red Hat, Inc.
+ * Copyright (c) 2015-2020 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -56,6 +56,7 @@ struct qnetd_advanced_settings {
        size_t ipc_max_clients;
        size_t ipc_max_send_size;
        size_t ipc_max_receive_size;
+       uint8_t keep_active_partition_tie_breaker;
 };
 
 extern int             qnetd_advanced_settings_init(struct qnetd_advanced_settings *settings);
index d6f01f4fb089291bfc9c53606bb0cc2c4cd4147b..6a39a98d6d2fce8dd09121f023988383c55bdd11 100644 (file)
@@ -53,7 +53,7 @@ enum qnetd_algo_ffsplit_cluster_state {
 
 struct qnetd_algo_ffsplit_cluster_data {
        enum qnetd_algo_ffsplit_cluster_state cluster_state;
-       const struct node_list *quorate_partition_node_list;
+       struct node_list quorate_partition_node_list;
 };
 
 enum qnetd_algo_ffsplit_client_state {
@@ -83,7 +83,7 @@ qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
                }
                memset(cluster_data, 0, sizeof(*cluster_data));
                cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
-               cluster_data->quorate_partition_node_list = NULL;
+               node_list_init(&cluster_data->quorate_partition_node_list);
 
                client->cluster->algorithm_data = cluster_data;
        }
@@ -337,12 +337,17 @@ qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
     enum tlv_heuristics heuristics_1,
     const struct qnetd_client *client2,
     const struct node_list *config_node_list2, const struct node_list *membership_node_list2,
-    enum tlv_heuristics heuristics_2)
+    enum tlv_heuristics heuristics_2,
+    const struct node_list *quorate_partition_node_list)
 {
        size_t part1_active_clients, part2_active_clients;
        size_t part1_no_heuristics_pass, part2_no_heuristics_pass;
        size_t part1_no_heuristics_fail, part2_no_heuristics_fail;
        size_t part1_score, part2_score;
+       /* Result of node_list_find_node_id of client 1 node id in quorate_partition_node_list */
+       struct node_list_entry *qpnl_client1;
+       /* Result of node_list_find_node_id of client 2 node id in quorate_partition_node_list */
+       struct node_list_entry *qpnl_client2;
 
        int res;
 
@@ -407,6 +412,26 @@ qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
                        res = 0; goto exit_res;
                }
 
+               /*
+                * Use keep active partition tie-breaker if enabled for both clients
+                */
+               if (client1->keep_active_partition_tie_breaker &&
+                   client2->keep_active_partition_tie_breaker) {
+                       qpnl_client1 = node_list_find_node_id(quorate_partition_node_list, client1->node_id);
+                       qpnl_client2 = node_list_find_node_id(quorate_partition_node_list, client2->node_id);
+
+                       /*
+                        * Client 1 in quorate partition, client 2 isn't and vice-versa.
+                        * If both either doesn't exist in quorate partion or both exists use
+                        * next tie-breaker
+                        */
+                       if (qpnl_client1 != NULL && qpnl_client2 == NULL) {
+                               res = 1; goto exit_res;
+                       } else if (qpnl_client1 == NULL && qpnl_client2 != NULL) {
+                               res = 0; goto exit_res;
+                       }
+               }
+
                /*
                 * Number of active clients in both partitions equals. Use tie-breaker.
                 */
@@ -436,7 +461,7 @@ exit_res:
 static const struct node_list *
 qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving,
     const struct node_list *config_node_list, const struct node_list *membership_node_list,
-    enum tlv_heuristics client_heuristics)
+    const struct node_list *quorate_partition_node_list, enum tlv_heuristics client_heuristics)
 {
        const struct qnetd_client *iter_client;
        const struct qnetd_client *best_client;
@@ -468,7 +493,8 @@ qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int clien
 
                if (qnetd_algo_ffsplit_partition_cmp(iter_client, iter_config_node_list,
                    iter_membership_node_list, iter_heuristics, best_client, best_config_node_list,
-                   best_membership_node_list, best_heuristics) > 0) {
+                   best_membership_node_list, best_heuristics,
+                   quorate_partition_node_list) > 0) {
                        best_client = iter_client;
                        best_config_node_list = iter_config_node_list;
                        best_membership_node_list = iter_membership_node_list;
@@ -596,10 +622,11 @@ qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int
        return (no_clients);
 }
 
-static enum tlv_vote
+static enum tlv_reply_error_code
 qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
     const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
-    const struct node_list *membership_node_list, enum tlv_heuristics client_heuristics)
+    const struct node_list *membership_node_list, enum tlv_heuristics client_heuristics,
+    enum tlv_vote *result_vote)
 {
        struct qnetd_algo_ffsplit_cluster_data *cluster_data;
        const struct node_list *quorate_partition_node_list;
@@ -614,21 +641,31 @@ qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
                 * Wait until membership is stable
                 */
                log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name);
+               *result_vote = TLV_VOTE_WAIT_FOR_REPLY;
 
-               return (TLV_VOTE_WAIT_FOR_REPLY);
+               return (TLV_REPLY_ERROR_CODE_NO_ERROR);
        }
 
        log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name);
 
        quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving,
-           config_node_list, membership_node_list, client_heuristics);
-       cluster_data->quorate_partition_node_list = quorate_partition_node_list;
+           config_node_list, membership_node_list, &cluster_data->quorate_partition_node_list,
+           client_heuristics);
+
+       node_list_free(&cluster_data->quorate_partition_node_list);
 
        if (quorate_partition_node_list == NULL) {
                log(LOG_DEBUG, "ffsplit: No quorate partition was selected");
        } else {
                log(LOG_DEBUG, "ffsplit: Quorate partition selected");
                log_common_debug_dump_node_list(quorate_partition_node_list);
+
+               if (node_list_clone(&cluster_data->quorate_partition_node_list,
+                   quorate_partition_node_list) != 0) {
+                       log(LOG_ERR, "ffsplit: Can't clone quourate partition node list");
+
+                       return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
+               }
        }
 
        qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list);
@@ -651,7 +688,9 @@ qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
                }
        }
 
-       return (TLV_VOTE_NO_CHANGE);
+       *result_vote = TLV_VOTE_NO_CHANGE;
+
+       return (TLV_REPLY_ERROR_CODE_NO_ERROR);
 }
 
 enum tlv_reply_error_code
@@ -659,6 +698,9 @@ qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
     uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
     const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
 {
+       enum tlv_reply_error_code reply_error_code;
+
+       reply_error_code = TLV_REPLY_ERROR_CODE_NO_ERROR;
 
        if (node_list_size(nodes) == 0) {
                /*
@@ -686,11 +728,12 @@ qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
                 */
                *result_vote = TLV_VOTE_ASK_LATER;
        } else {
-               *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
-                   nodes, &client->last_membership_node_list, client->last_heuristics);
+               reply_error_code = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
+                   nodes, &client->last_membership_node_list, client->last_heuristics,
+                   result_vote);
        }
 
-       return (TLV_REPLY_ERROR_CODE_NO_ERROR);
+       return (reply_error_code);
 }
 
 /*
@@ -712,6 +755,9 @@ qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
     uint32_t msg_seq_num, const struct tlv_ring_id *ring_id,
     const struct node_list *nodes, enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
 {
+       enum tlv_reply_error_code reply_error_code;
+
+       reply_error_code = TLV_REPLY_ERROR_CODE_NO_ERROR;
 
        if (node_list_size(nodes) == 0) {
                /*
@@ -739,11 +785,12 @@ qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
                 */
                *result_vote = TLV_VOTE_ASK_LATER;
        } else {
-               *result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id,
-                   &client->configuration_node_list, nodes, heuristics);
+               reply_error_code = qnetd_algo_ffsplit_do(client, 0, ring_id,
+                   &client->configuration_node_list, nodes, heuristics,
+                   result_vote);
        }
 
-       return (TLV_REPLY_ERROR_CODE_NO_ERROR);
+       return (reply_error_code);
 }
 
 enum tlv_reply_error_code
@@ -763,11 +810,15 @@ qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
 void
 qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
 {
+       enum tlv_vote result_vote;
+       struct qnetd_algo_ffsplit_cluster_data *cluster_data;
+
+       cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
 
        if (!server_going_down) {
                (void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id,
                    &client->configuration_node_list, &client->last_membership_node_list,
-                   client->last_heuristics);
+                   client->last_heuristics, &result_vote);
        }
 
        free(client->algorithm_data);
@@ -776,7 +827,9 @@ qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_goi
                /*
                 * Last client in the cluster
                 */
-                free(client->cluster->algorithm_data);
+               node_list_free(&cluster_data->quorate_partition_node_list);
+
+               free(client->cluster->algorithm_data);
        }
 }
 
@@ -845,6 +898,9 @@ enum tlv_reply_error_code
 qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client *client, uint32_t msg_seq_num,
     enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
 {
+       enum tlv_reply_error_code reply_error_code;
+
+       reply_error_code = TLV_REPLY_ERROR_CODE_NO_ERROR;
 
        if (node_list_size(&client->configuration_node_list) == 0 ||
            node_list_size(&client->last_membership_node_list) == 0) {
@@ -853,12 +909,12 @@ qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client *client, uint3
                 */
                *result_vote = TLV_VOTE_ASK_LATER;
        } else {
-               *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
+               reply_error_code = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
                    &client->configuration_node_list, &client->last_membership_node_list,
-                   heuristics);
+                   heuristics, result_vote);
        }
 
-       return (TLV_REPLY_ERROR_CODE_NO_ERROR);
+       return (reply_error_code);
 }
 
 enum tlv_reply_error_code
index 5bb89a127c7b8d0e80d340e1bcd63baff53ac341..04db240f39dfb72b06c3400051fa54a9bc5ba1af 100644 (file)
@@ -444,6 +444,14 @@ qnetd_client_msg_received_init(struct qnetd_instance *instance, struct qnetd_cli
                client->decision_algorithm = msg->decision_algorithm;
        }
 
+       if (reply_error_code == TLV_REPLY_ERROR_CODE_NO_ERROR) {
+               /*
+                * Preset keep_active_partition_tie_breaker from default config
+                */
+               client->keep_active_partition_tie_breaker =
+                   instance->advanced_settings->keep_active_partition_tie_breaker;
+       }
+
        if (reply_error_code == TLV_REPLY_ERROR_CODE_NO_ERROR) {
                reply_error_code = qnetd_client_msg_received_init_check_new_client(instance,
                    client);
index 4930d71d005c0254ccc41dd0af6b7ad5423cdb33..974802b32044a3fa6a8eecca3c575dba6ecb78ac 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017 Red Hat, Inc.
+ * Copyright (c) 2015-2020 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -90,6 +90,7 @@ struct qnetd_client {
        enum tlv_heuristics last_membership_heuristics; /* Passed in membership node list */
        enum tlv_heuristics last_regular_heuristics; /* Passed in heuristics change callback */
        enum tlv_heuristics last_heuristics; /* Latest heuristics both membership and regular */
+       uint8_t keep_active_partition_tie_breaker;
        TAILQ_ENTRY(qnetd_client) entries;
        TAILQ_ENTRY(qnetd_client) cluster_entries;
 };