2 * Copyright (c) 2015-2019 Red Hat, Inc.
6 * Author: Christine Caulfield (ccaulfie@redhat.com)
8 * This software licensed under BSD license, the text of which follows:
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
37 * This is a simple 'last man standing' algorithm for 2 node clusters
39 * If the node is the only one left in the cluster that can see the
40 * qdevice server then we return a vote.
42 * If more than one node can see the qdevice server but the nodes can't
43 * see each other then we return a vote to the nominated tie_breaker node
45 * If there are more than two nodes, then we don't return a vote.
46 * this is not our job.
49 #include <sys/types.h>
55 #include "qnetd-algo-2nodelms.h"
56 #include "qnetd-cluster-list.h"
57 #include "qnetd-algo-utils.h"
60 struct qnetd_algo_2nodelms_info
{
62 enum tlv_vote last_result
;
65 enum tlv_reply_error_code
66 qnetd_algo_2nodelms_client_init(struct qnetd_client
*client
)
68 struct qnetd_algo_2nodelms_info
*info
;
70 info
= malloc(sizeof(struct qnetd_algo_2nodelms_info
));
72 return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR
);
74 client
->algorithm_data
= info
;
75 info
->last_result
= 0;
76 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
80 * Called after client sent configuration node list
81 * All client fields are already set. Nodes is actual node list, initial is used
82 * to distinquish between initial node list and changed node list.
83 * msg_seq_num is 32-bit number set by client. If client sent config file version,
84 * config_version_set is set to 1 and config_version contains valid config file version.
86 * Function has to return result_vote. This can be one of ack/nack, ask_later (client
87 * should ask later for a vote) or wait_for_reply (client should wait for reply).
89 * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
90 * on failure (error is sent back to client)
92 enum tlv_reply_error_code
93 qnetd_algo_2nodelms_config_node_list_received(struct qnetd_client
*client
,
94 uint32_t msg_seq_num
, int config_version_set
, uint64_t config_version
,
95 const struct node_list
*nodes
, int initial
, enum tlv_vote
*result_vote
)
97 struct node_list_entry
*node_info
;
98 struct qnetd_algo_2nodelms_info
*info
= client
->algorithm_data
;
101 /* Check this is a 2 node cluster */
102 TAILQ_FOREACH(node_info
, nodes
, entries
) {
105 info
->num_config_nodes
= node_count
;
106 log(LOG_DEBUG
, "algo-2nodelms: cluster %s config_list has %d nodes", client
->cluster_name
, node_count
);
108 if (node_count
!= 2) {
109 log(LOG_INFO
, "algo-2nodelms: cluster %s does not have 2 configured nodes, it has %d", client
->cluster_name
, node_count
);
111 *result_vote
= TLV_VOTE_NACK
;
112 return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM
);
115 *result_vote
= TLV_VOTE_NO_CHANGE
;
117 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
121 * Called after client sent membership node list.
122 * All client fields are already set. Nodes is actual node list.
123 * msg_seq_num is 32-bit number set by client. If client sent config file version,
124 * config_version_set is set to 1 and config_version contains valid config file version.
125 * ring_id and quorate are copied from client votequorum callback.
127 * Function has to return result_vote. This can be one of ack/nack, ask_later (client
128 * should ask later for a vote) or wait_for_reply (client should wait for reply).
130 * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
131 * on failure (error is sent back to client)
134 enum tlv_reply_error_code
135 qnetd_algo_2nodelms_membership_node_list_received(struct qnetd_client
*client
,
136 uint32_t msg_seq_num
, const struct tlv_ring_id
*ring_id
,
137 const struct node_list
*nodes
, enum tlv_heuristics heuristics
,
138 enum tlv_vote
*result_vote
)
140 struct node_list_entry
*node_info
;
141 struct qnetd_client
*other_client
;
142 struct qnetd_algo_2nodelms_info
*info
= client
->algorithm_data
;
144 uint32_t low_node_id
= UINT32_MAX
;
145 uint32_t high_node_id
= 0;
146 enum tlv_heuristics other_node_heuristics
;
148 /* If we're a newcomer and there is another active partition, then we must NACK
149 * to avoid quorum moving to us from already active nodes.
151 if (info
->last_result
== 0) {
152 TAILQ_FOREACH(other_client
, &client
->cluster
->client_list
, cluster_entries
) {
153 struct qnetd_algo_2nodelms_info
*other_info
= other_client
->algorithm_data
;
154 if (!tlv_ring_id_eq(ring_id
, &other_client
->last_ring_id
) &&
155 other_info
->last_result
== TLV_VOTE_ACK
) {
157 /* Don't save NACK, we need to know subsequently if we haven't been voting */
158 *result_vote
= TLV_VOTE_NACK
;
159 log(LOG_DEBUG
, "algo-2nodelms: we are a new partition and another active partition exists. NACK");
160 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
165 /* If both nodes are present, then we're OK. return a vote */
166 TAILQ_FOREACH(node_info
, nodes
, entries
) {
170 log(LOG_DEBUG
, "algo-2nodelms: cluster %s (client %p nodeid "UTILS_PRI_NODE_ID
") membership list has %d member nodes (ring ID "UTILS_PRI_RING_ID
")", client
->cluster_name
, client
, client
->node_id
, node_count
, ring_id
->node_id
, ring_id
->seq
);
172 if (node_count
== 2) {
173 log(LOG_DEBUG
, "algo-2nodelms: cluster %s running normally. Both nodes active", client
->cluster_name
);
174 *result_vote
= info
->last_result
= TLV_VOTE_ACK
;
175 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
178 /* Now look for other clients connected from this cluster that can't see us any more */
180 other_node_heuristics
= TLV_HEURISTICS_UNDEFINED
;
181 TAILQ_FOREACH(other_client
, &client
->cluster
->client_list
, cluster_entries
) {
184 log(LOG_DEBUG
, "algo-2nodelms: seen nodeid "UTILS_PRI_NODE_ID
" on client %p (ring ID "UTILS_PRI_RING_ID
")", other_client
->node_id
, other_client
, other_client
->last_ring_id
.node_id
, other_client
->last_ring_id
.seq
);
185 if (other_client
->node_id
< low_node_id
) {
186 low_node_id
= other_client
->node_id
;
188 if (other_client
->node_id
> high_node_id
) {
189 high_node_id
= other_client
->node_id
;
191 if (other_client
!= client
) {
192 other_node_heuristics
= other_client
->last_heuristics
;
195 log(LOG_DEBUG
, "algo-2nodelms: cluster %s %d nodes running independently", client
->cluster_name
, node_count
);
197 /* Only 1 node alive .. allow it to continue */
198 if (node_count
== 1) {
199 log(LOG_DEBUG
, "algo-2nodelms: cluster %s running on 'last-man'", client
->cluster_name
);
200 *result_vote
= info
->last_result
= TLV_VOTE_ACK
;
201 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
205 * Both nodes are alive.
206 * Check their heuristics.
208 if (tlv_heuristics_cmp(heuristics
, other_node_heuristics
) > 0) {
209 *result_vote
= info
->last_result
= TLV_VOTE_ACK
;
211 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
212 } else if (tlv_heuristics_cmp(heuristics
, other_node_heuristics
) < 0) {
213 *result_vote
= info
->last_result
= TLV_VOTE_NACK
;
215 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
218 /* Heuristics are equal -> Only give a vote to the nominated tie-breaker node */
219 switch (client
->tie_breaker
.mode
) {
221 case TLV_TIE_BREAKER_MODE_LOWEST
:
222 if (client
->node_id
== low_node_id
) {
223 log(LOG_DEBUG
, "algo-2nodelms: cluster %s running on low node-id %d", client
->cluster_name
, low_node_id
);
224 *result_vote
= info
->last_result
= TLV_VOTE_ACK
;
227 log(LOG_DEBUG
, "algo-2nodelms: cluster %s node-id %d denied vote because low nodeid %d is active", client
->cluster_name
, client
->node_id
, low_node_id
);
228 *result_vote
= info
->last_result
= TLV_VOTE_NACK
;
231 case TLV_TIE_BREAKER_MODE_HIGHEST
:
232 if (client
->node_id
== high_node_id
) {
233 log(LOG_DEBUG
, "algo-2nodelms: cluster %s running on high node-id %d", client
->cluster_name
, high_node_id
);
234 *result_vote
= info
->last_result
= TLV_VOTE_ACK
;
237 log(LOG_DEBUG
, "algo-2nodelms: cluster %s node-id %d denied vote because high nodeid %d is active", client
->cluster_name
, client
->node_id
, high_node_id
);
238 *result_vote
= info
->last_result
= TLV_VOTE_NACK
;
241 case TLV_TIE_BREAKER_MODE_NODE_ID
:
242 if (client
->node_id
== client
->tie_breaker
.node_id
) {
243 log(LOG_DEBUG
, "algo-2nodelms: cluster %s running on nominated tie-breaker node %d", client
->cluster_name
, client
->tie_breaker
.node_id
);
244 *result_vote
= info
->last_result
= TLV_VOTE_ACK
;
247 log(LOG_DEBUG
, "algo-2nodelms: cluster %s node-id %d denied vote because nominated tie-breaker nodeid %d is active", client
->cluster_name
, client
->node_id
, client
->tie_breaker
.node_id
);
248 *result_vote
= info
->last_result
= TLV_VOTE_NACK
;
252 log(LOG_DEBUG
, "algo-2nodelms: cluster %s node-id %d denied vote because tie-breaker option is invalid: %d", client
->cluster_name
, client
->node_id
, client
->tie_breaker
.mode
);
253 *result_vote
= info
->last_result
= TLV_VOTE_NACK
;
256 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
259 enum tlv_reply_error_code
260 qnetd_algo_2nodelms_quorum_node_list_received(struct qnetd_client
*client
,
261 uint32_t msg_seq_num
, enum tlv_quorate quorate
, const struct node_list
*nodes
,
262 enum tlv_vote
*result_vote
)
265 *result_vote
= TLV_VOTE_NO_CHANGE
;
267 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
271 * Called after client disconnect. Client structure is still existing (and it's part
272 * of a client->cluster), but it is destroyed (and removed from cluster) right after
273 * this callback finishes. Callback is used mainly for destroing client->algorithm_data.
276 qnetd_algo_2nodelms_client_disconnect(struct qnetd_client
*client
, int server_going_down
)
278 log(LOG_INFO
, "algo-2nodelms: Client %p (cluster %s, node_id "UTILS_PRI_NODE_ID
") "
279 "disconnect", client
, client
->cluster_name
, client
->node_id
);
281 log(LOG_INFO
, "algo-2nodelms: server going down %u", server_going_down
);
283 free(client
->algorithm_data
);
287 * Called after client sent ask for vote message. This is usually happening after server
288 * replied TLV_VOTE_ASK_LATER.
290 enum tlv_reply_error_code
291 qnetd_algo_2nodelms_ask_for_vote_received(struct qnetd_client
*client
, uint32_t msg_seq_num
,
292 enum tlv_vote
*result_vote
)
294 struct qnetd_algo_2nodelms_info
*info
= client
->algorithm_data
;
296 log(LOG_INFO
, "algo-2nodelms: Client %p (cluster %s, node_id "UTILS_PRI_NODE_ID
") "
297 "asked for a vote", client
, client
->cluster_name
, client
->node_id
);
299 if (info
->last_result
== 0) {
300 *result_vote
= TLV_VOTE_ASK_LATER
;
303 *result_vote
= info
->last_result
;
306 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
309 enum tlv_reply_error_code
310 qnetd_algo_2nodelms_vote_info_reply_received(struct qnetd_client
*client
, uint32_t msg_seq_num
)
313 log(LOG_INFO
, "algo-2nodelms: Client %p (cluster %s, node_id "UTILS_PRI_NODE_ID
") "
314 "replied back to vote info message", client
, client
->cluster_name
, client
->node_id
);
316 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
319 enum tlv_reply_error_code
320 qnetd_algo_2nodelms_heuristics_change_received(struct qnetd_client
*client
, uint32_t msg_seq_num
,
321 enum tlv_heuristics heuristics
, enum tlv_vote
*result_vote
)
324 log(LOG_INFO
, "algo-2nodelms: heuristics change is not supported.");
326 *result_vote
= TLV_VOTE_NO_CHANGE
;
328 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
331 enum tlv_reply_error_code
332 qnetd_algo_2nodelms_timer_callback(struct qnetd_client
*client
, int *reschedule_timer
,
333 int *send_vote
, enum tlv_vote
*result_vote
)
336 return (TLV_REPLY_ERROR_CODE_NO_ERROR
);
339 static struct qnetd_algorithm qnetd_algo_2nodelms
= {
340 .init
= qnetd_algo_2nodelms_client_init
,
341 .config_node_list_received
= qnetd_algo_2nodelms_config_node_list_received
,
342 .membership_node_list_received
= qnetd_algo_2nodelms_membership_node_list_received
,
343 .quorum_node_list_received
= qnetd_algo_2nodelms_quorum_node_list_received
,
344 .client_disconnect
= qnetd_algo_2nodelms_client_disconnect
,
345 .ask_for_vote_received
= qnetd_algo_2nodelms_ask_for_vote_received
,
346 .vote_info_reply_received
= qnetd_algo_2nodelms_vote_info_reply_received
,
347 .heuristics_change_received
= qnetd_algo_2nodelms_heuristics_change_received
,
348 .timer_callback
= qnetd_algo_2nodelms_timer_callback
,
351 enum tlv_reply_error_code
qnetd_algo_2nodelms_register()
353 return qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_2NODELMS
, &qnetd_algo_2nodelms
);