]> git.proxmox.com Git - mirror_corosync-qdevice.git/blob - qdevices/qdevice-net-heuristics.c
5757faba44fa89f8211e20f23b62b8b03479a619
[mirror_corosync-qdevice.git] / qdevices / qdevice-net-heuristics.c
1 /*
2 * Copyright (c) 2017-2020 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Author: Jan Friesse (jfriesse@redhat.com)
7 *
8 * This software licensed under BSD license, the text of which follows:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include "log.h"
36 #include "qdevice-net-algorithm.h"
37 #include "qdevice-net-cast-vote-timer.h"
38 #include "qdevice-net-heuristics.h"
39 #include "qdevice-net-send.h"
40 #include "qdevice-net-votequorum.h"
41
42 enum tlv_heuristics
43 qdevice_net_heuristics_exec_result_to_tlv(enum qdevice_heuristics_exec_result exec_result)
44 {
45 enum tlv_heuristics res;
46
47 switch (exec_result) {
48 case QDEVICE_HEURISTICS_EXEC_RESULT_DISABLED: res = TLV_HEURISTICS_UNDEFINED; break;
49 case QDEVICE_HEURISTICS_EXEC_RESULT_PASS: res = TLV_HEURISTICS_PASS; break;
50 case QDEVICE_HEURISTICS_EXEC_RESULT_FAIL: res = TLV_HEURISTICS_FAIL; break;
51 default:
52 log(LOG_ERR, "qdevice_net_heuristics_exec_result_to_tlv: Unhandled "
53 "heuristics exec result %s",
54 qdevice_heuristics_exec_result_to_str(exec_result));
55 exit(EXIT_FAILURE);
56 break;
57 }
58
59 return (res);
60 }
61
62 static int
63 qdevice_net_regular_heuristics_exec_result_callback(uint32_t seq_number,
64 enum qdevice_heuristics_exec_result exec_result, void *user_data1, void *user_data2)
65 {
66 struct qdevice_heuristics_instance *heuristics_instance;
67 struct qdevice_instance *instance;
68 struct qdevice_net_instance *net_instance;
69 int send_msg;
70 enum tlv_vote vote;
71 enum tlv_heuristics heuristics;
72
73 instance = (struct qdevice_instance *)user_data1;
74 heuristics_instance = &instance->heuristics_instance;
75 net_instance = instance->model_data;
76
77 if (qdevice_heuristics_result_notifier_list_set_active(&heuristics_instance->exec_result_notifier_list,
78 qdevice_net_regular_heuristics_exec_result_callback, 0) != 0) {
79 log(LOG_ERR, "Can't deactivate net regular heuristics exec callback notifier");
80
81 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ACTIVATE_HEURISTICS_RESULT_NOTIFIER;
82 net_instance->schedule_disconnect = 1;
83
84 return (0);
85 }
86
87 heuristics = qdevice_net_heuristics_exec_result_to_tlv(exec_result);
88
89 if (exec_result == QDEVICE_HEURISTICS_EXEC_RESULT_DISABLED) {
90 /*
91 * Can happen when user disables heuristics during runtime
92 */
93 return (0);
94 }
95
96 if (net_instance->latest_heuristics_result != heuristics) {
97 log(heuristics == TLV_HEURISTICS_PASS ? LOG_NOTICE : LOG_ERR,
98 "Heuristics result changed from %s to %s",
99 tlv_heuristics_to_str(net_instance->latest_heuristics_result),
100 tlv_heuristics_to_str(heuristics));
101
102 if (net_instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) {
103 /*
104 * Not connected to qnetd
105 */
106 send_msg = 0;
107 } else {
108 send_msg = 1;
109 }
110
111 vote = TLV_VOTE_NO_CHANGE;
112
113 if (qdevice_net_algorithm_heuristics_change(net_instance, &heuristics, &send_msg,
114 &vote) == -1) {
115 log(LOG_ERR, "Algorithm returned error. Disconnecting.");
116
117 net_instance->disconnect_reason =
118 QDEVICE_NET_DISCONNECT_REASON_ALGO_HEURISTICS_CHANGE_ERR;
119 net_instance->schedule_disconnect = 1;
120
121 return (0);
122 } else {
123 log(LOG_DEBUG, "Algorithm decided to %s message with heuristics result "
124 "%s and result vote is %s", (send_msg ? "send" : "not send"),
125 tlv_heuristics_to_str(heuristics), tlv_vote_to_str(vote));
126 }
127
128 if (send_msg) {
129 if (heuristics == TLV_HEURISTICS_UNDEFINED) {
130 log(LOG_ERR, "Inconsistent algorithm result. "
131 "It's not possible to send message with undefined heuristics. "
132 "Disconnecting.");
133
134 net_instance->disconnect_reason =
135 QDEVICE_NET_DISCONNECT_REASON_ALGO_HEURISTICS_CHANGE_ERR;
136 net_instance->schedule_disconnect = 1;
137
138 return (0);
139 }
140
141 if (!net_instance->server_supports_heuristics) {
142 log(LOG_ERR, "Server doesn't support heuristics. "
143 "Disconnecting.");
144
145 net_instance->disconnect_reason =
146 QDEVICE_NET_DISCONNECT_REASON_SERVER_DOESNT_SUPPORT_REQUIRED_OPT;
147 net_instance->schedule_disconnect = 1;
148
149 return (0);
150 }
151
152 if (qdevice_net_send_heuristics_change(net_instance, heuristics) != 0) {
153 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
154 net_instance->schedule_disconnect = 1;
155
156 return (0);
157 }
158 }
159
160 if (qdevice_net_cast_vote_timer_update(net_instance, vote) != 0) {
161 log(LOG_CRIT, "qdevice_net_heuristics_exec_result_callback "
162 "Can't update cast vote timer");
163
164 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER;
165 net_instance->schedule_disconnect = 1;
166
167 return (0);
168 }
169 }
170
171 net_instance->latest_regular_heuristics_result = heuristics;
172 net_instance->latest_heuristics_result = heuristics;
173
174 if (qdevice_net_heuristics_schedule_timer(net_instance) != 0) {
175 return (0);
176 }
177
178 return (0);
179 }
180
181 static int
182 qdevice_net_connect_heuristics_exec_result_callback(uint32_t seq_number,
183 enum qdevice_heuristics_exec_result exec_result, void *user_data1, void *user_data2)
184 {
185 struct qdevice_heuristics_instance *heuristics_instance;
186 struct qdevice_instance *instance;
187 struct qdevice_net_instance *net_instance;
188 enum tlv_vote vote;
189 enum tlv_heuristics heuristics;
190 int send_config_node_list;
191 int send_membership_node_list;
192 int send_quorum_node_list;
193 struct tlv_ring_id tlv_rid;
194 enum tlv_quorate quorate;
195
196 instance = (struct qdevice_instance *)user_data1;
197 heuristics_instance = &instance->heuristics_instance;
198 net_instance = instance->model_data;
199
200 if (qdevice_heuristics_result_notifier_list_set_active(&heuristics_instance->exec_result_notifier_list,
201 qdevice_net_connect_heuristics_exec_result_callback, 0) != 0) {
202 log(LOG_ERR, "Can't deactivate net connect heuristics exec callback notifier");
203
204 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ACTIVATE_HEURISTICS_RESULT_NOTIFIER;
205 net_instance->schedule_disconnect = 1;
206
207 return (0);
208 }
209
210 if (net_instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) {
211 /*
212 * Not connected to qnetd -> heuristics will be called again on new connect
213 */
214 return (0);
215 }
216
217 heuristics = qdevice_net_heuristics_exec_result_to_tlv(exec_result);
218
219 send_config_node_list = 1;
220 send_membership_node_list = 1;
221 send_quorum_node_list = 1;
222 vote = TLV_VOTE_WAIT_FOR_REPLY;
223
224 if (qdevice_net_algorithm_connected(net_instance, &heuristics, &send_config_node_list,
225 &send_membership_node_list, &send_quorum_node_list, &vote) != 0) {
226 log(LOG_DEBUG, "Algorithm returned error. Disconnecting.");
227 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_CONNECTED_ERR;
228 return (0);
229 } else {
230 log(LOG_DEBUG, "Algorithm decided to %s config node list, %s membership "
231 "node list, %s quorum node list, heuristics is %s and result vote is %s",
232 (send_config_node_list ? "send" : "not send"),
233 (send_membership_node_list ? "send" : "not send"),
234 (send_quorum_node_list ? "send" : "not send"),
235 tlv_heuristics_to_str(heuristics),
236 tlv_vote_to_str(vote));
237 }
238
239 /*
240 * Now we can finally really send node list, votequorum node list and update timer
241 */
242 if (send_config_node_list) {
243 if (qdevice_net_send_config_node_list(net_instance,
244 &instance->config_node_list,
245 instance->config_node_list_version_set,
246 instance->config_node_list_version, 1) != 0) {
247 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
248 return (0);
249 }
250 }
251
252 if (send_membership_node_list) {
253 qdevice_net_votequorum_ring_id_to_tlv(&tlv_rid,
254 &instance->vq_node_list_ring_id);
255
256 if (qdevice_net_send_membership_node_list(net_instance, &tlv_rid,
257 instance->vq_node_list_entries,
258 instance->vq_node_list,
259 heuristics) != 0) {
260 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
261 return (0);
262 }
263 }
264
265 if (send_quorum_node_list) {
266 quorate = (instance->vq_quorum_quorate ?
267 TLV_QUORATE_QUORATE : TLV_QUORATE_INQUORATE);
268
269 if (qdevice_net_send_quorum_node_list(net_instance,
270 quorate,
271 instance->vq_quorum_node_list_entries,
272 instance->vq_quorum_node_list) != 0) {
273 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
274 return (0);
275 }
276 }
277
278 if (qdevice_net_cast_vote_timer_update(net_instance, vote) != 0) {
279 log(LOG_CRIT, "qdevice_net_msg_received_set_option_reply fatal error. "
280 " Can't update cast vote timer vote");
281 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER;
282 }
283
284 net_instance->state = QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS;
285 net_instance->connected_since_time = time(NULL);
286
287 net_instance->latest_connect_heuristics_result = heuristics;
288 net_instance->latest_heuristics_result = heuristics;
289
290 return (0);
291 }
292
293 static int
294 qdevice_net_heuristics_timer_callback(void *data1, void *data2)
295 {
296 struct qdevice_net_instance *net_instance;
297 struct qdevice_heuristics_instance *heuristics_instance;
298
299 net_instance = (struct qdevice_net_instance *)data1;
300 heuristics_instance = &net_instance->qdevice_instance_ptr->heuristics_instance;
301
302 if (qdevice_heuristics_waiting_for_result(heuristics_instance)) {
303 log(LOG_DEBUG, "Not executing regular heuristics because other heuristics is already running.");
304
305 return (1);
306 }
307
308 net_instance->regular_heuristics_timer = NULL;
309
310 log(LOG_DEBUG, "Executing regular heuristics.");
311
312 if (qdevice_heuristics_result_notifier_list_set_active(&heuristics_instance->exec_result_notifier_list,
313 qdevice_net_regular_heuristics_exec_result_callback, 1) != 0) {
314 log(LOG_ERR, "Can't activate net regular heuristics exec callback notifier");
315
316 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ACTIVATE_HEURISTICS_RESULT_NOTIFIER;
317 net_instance->schedule_disconnect = 1;
318
319 return (0);
320 }
321
322 if (qdevice_heuristics_exec(heuristics_instance,
323 net_instance->qdevice_instance_ptr->sync_in_progress) != 0) {
324 log(LOG_ERR, "Can't execute regular heuristics.");
325
326 net_instance->schedule_disconnect = 1;
327 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_START_HEURISTICS;
328
329 return (0);
330 }
331
332 /*
333 * Do not schedule this callback again. It's going to be scheduled in the
334 * qdevice_net_heuristics_exec_result_callback
335 */
336 return (0);
337 }
338
339 int
340 qdevice_net_heuristics_stop_timer(struct qdevice_net_instance *net_instance)
341 {
342 struct qdevice_instance *instance;
343 struct qdevice_heuristics_instance *heuristics_instance;
344
345 instance = net_instance->qdevice_instance_ptr;
346 heuristics_instance = &instance->heuristics_instance;
347
348 if (net_instance->regular_heuristics_timer != NULL) {
349 log(LOG_DEBUG, "Regular heuristics timer stopped");
350
351 timer_list_delete(&net_instance->main_timer_list, net_instance->regular_heuristics_timer);
352 net_instance->regular_heuristics_timer = NULL;
353
354 if (qdevice_heuristics_result_notifier_list_set_active(&heuristics_instance->exec_result_notifier_list,
355 qdevice_net_regular_heuristics_exec_result_callback, 0) != 0) {
356 log(LOG_ERR, "Can't deactivate net regular heuristics exec callback notifier");
357
358 net_instance->disconnect_reason =
359 QDEVICE_NET_DISCONNECT_REASON_CANT_ACTIVATE_HEURISTICS_RESULT_NOTIFIER;
360 net_instance->schedule_disconnect = 1;
361 return (-1);
362 }
363 }
364
365 return (0);
366 }
367
368 int
369 qdevice_net_heuristics_schedule_timer(struct qdevice_net_instance *net_instance)
370 {
371 uint32_t interval;
372 struct qdevice_instance *instance;
373 struct qdevice_heuristics_instance *heuristics_instance;
374
375 instance = net_instance->qdevice_instance_ptr;
376 heuristics_instance = &instance->heuristics_instance;
377
378 if (heuristics_instance->mode != QDEVICE_HEURISTICS_MODE_ENABLED) {
379 log(LOG_DEBUG, "Not scheduling heuristics timer because mode is not enabled");
380
381 if (qdevice_net_heuristics_stop_timer(net_instance) != 0) {
382 return (-1);
383 }
384
385 return (0);
386 }
387
388 if (net_instance->regular_heuristics_timer != NULL) {
389 log(LOG_DEBUG, "Not scheduling heuristics timer because it is already scheduled");
390
391 return (0);
392 }
393
394 interval = heuristics_instance->interval;
395
396 log(LOG_DEBUG, "Scheduling next regular heuristics in %"PRIu32"ms", interval);
397
398 net_instance->regular_heuristics_timer = timer_list_add(&net_instance->main_timer_list,
399 interval,
400 qdevice_net_heuristics_timer_callback,
401 (void *)net_instance, NULL);
402
403 if (net_instance->regular_heuristics_timer == NULL) {
404 log(LOG_ERR, "Can't schedule regular heuristics.");
405
406 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_HEURISTICS_TIMER;
407 net_instance->schedule_disconnect = 1;
408 return (-1);
409 }
410
411 return (0);
412 }
413
414 int
415 qdevice_net_heuristics_init(struct qdevice_net_instance *net_instance)
416 {
417
418 if (qdevice_heuristics_result_notifier_list_add(
419 &net_instance->qdevice_instance_ptr->heuristics_instance.exec_result_notifier_list,
420 qdevice_net_regular_heuristics_exec_result_callback,
421 net_instance->qdevice_instance_ptr, NULL) == NULL) {
422 log(LOG_ERR, "Can't add net regular heuristics exec callback into notifier");
423
424 return (-1);
425 }
426
427 if (qdevice_heuristics_result_notifier_list_add(
428 &net_instance->qdevice_instance_ptr->heuristics_instance.exec_result_notifier_list,
429 qdevice_net_connect_heuristics_exec_result_callback,
430 net_instance->qdevice_instance_ptr, NULL) == NULL) {
431 log(LOG_ERR, "Can't add net connect heuristics exec callback into notifier");
432
433 return (-1);
434 }
435
436 return (0);
437 }
438
439 int
440 qdevice_net_heuristics_exec_after_connect(struct qdevice_net_instance *net_instance)
441 {
442 struct qdevice_instance *instance;
443 struct qdevice_heuristics_instance *heuristics_instance;
444
445 instance = net_instance->qdevice_instance_ptr;
446 heuristics_instance = &instance->heuristics_instance;
447
448 log(LOG_DEBUG, "Executing after-connect heuristics.");
449
450 if (qdevice_heuristics_result_notifier_list_set_active(&heuristics_instance->exec_result_notifier_list,
451 qdevice_net_connect_heuristics_exec_result_callback, 1) != 0) {
452 log(LOG_ERR, "Can't activate net connect heuristics exec callback notifier");
453
454 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ACTIVATE_HEURISTICS_RESULT_NOTIFIER;
455 net_instance->schedule_disconnect = 1;
456
457 return (-1);
458 }
459
460 if (qdevice_heuristics_exec(heuristics_instance,
461 instance->sync_in_progress) != 0) {
462 log(LOG_ERR, "Can't execute connect heuristics.");
463
464 net_instance->schedule_disconnect = 1;
465 net_instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_START_HEURISTICS;
466
467 return (-1);
468 }
469
470 return (0);
471 }