From 28d49141f8d1852f89847b10d35042ed6a9392bf Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Wed, 18 Nov 2020 17:52:12 +0100 Subject: [PATCH] qnetd: Move client schedule disconnect handling Client disconnect used to be per client fd in the qnetd_client_net_socket_poll_loop_set_events_cb. Problem is, that disconnect calls algorithm which may send message to other client with fd which was already processed in the pr-poll-loop so POLLOUT is not set till new loop exec is called (and that usually happens because old one timeouts). To reproduce this problem use ffsplit and make qnetd disconnect one of the clients - ffsplit needs to send ack/nack votes, but it doesn't send them during first iteration and waits for dpd timeout. Signed-off-by: Jan Friesse --- qdevices/qnetd-client-net.c | 18 ++++++------- qdevices/qnetd-instance.c | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/qdevices/qnetd-client-net.c b/qdevices/qnetd-client-net.c index 391bb87..3d2b546 100644 --- a/qdevices/qnetd-client-net.c +++ b/qdevices/qnetd-client-net.c @@ -61,18 +61,18 @@ static int qnetd_client_net_socket_poll_loop_set_events_cb(PRFileDesc *prfd, short *events, void *user_data1, void *user_data2) { - struct qnetd_instance *instance = (struct qnetd_instance *)user_data1; struct qnetd_client *client = (struct qnetd_client *)user_data2; if (client->schedule_disconnect) { - qnetd_instance_client_disconnect(instance, client, 0); - - if (pr_poll_loop_del_prfd(&instance->main_poll_loop, prfd) == -1) { - log(LOG_ERR, "pr_poll_loop_del_prfd for client socket failed"); - - return (-2); - } - + /* + * Disconnect logic used to be there but it was moved to + * qnetd-instance.c (see qnetd_instance_poll_loop_pre_poll_cb + * function for reasoning). + * + * This condition (= set_events_cb and client scheduled for disconnect) + * shouldn't really happen, but if it happens just don't add client to + * pr loop and wait for next pre_poll_cb. + */ return (-1); } diff --git a/qdevices/qnetd-instance.c b/qdevices/qnetd-instance.c index e8d9d14..d1f846a 100644 --- a/qdevices/qnetd-instance.c +++ b/qdevices/qnetd-instance.c @@ -35,6 +35,7 @@ #include #include +#include "log.h" #include "qnetd-instance.h" #include "qnetd-client.h" #include "qnetd-client-dpd-timer.h" @@ -42,6 +43,45 @@ #include "qnetd-log-debug.h" #include "qnetd-client-algo-timer.h" +static int +qnetd_instance_poll_loop_pre_poll_cb(void *user_data1, void *user_data2) +{ + struct qnetd_instance *instance = (struct qnetd_instance *)user_data1; + struct qnetd_client *client; + struct qnetd_client *client_next; + + /* + * This functionality used to be per client fd in + * the qnetd_client_net_socket_poll_loop_set_events_cb. Problem is, that + * disconnect calls algorithm which may send message to other client + * with fd which was already processed in the pr-poll-loop so POLLOUT is + * not set till new loop exec is called (and that usually happens + * because old one timeouts). To reproduce this problem use + * ffsplit and make qnetd disconnect one of the clients - ffsplit needs to + * send ack/nack votes, but it doesn't send them during first iteration + * and waits for dpd timeout. + */ + client = TAILQ_FIRST(&instance->clients); + while (client != NULL) { + client_next = TAILQ_NEXT(client, entries); + + if (client->schedule_disconnect) { + if (pr_poll_loop_del_prfd(&instance->main_poll_loop, + client->socket) == -1) { + log(LOG_ERR, "pr_poll_loop_del_prfd for client socket failed"); + + return (-1); + } + + qnetd_instance_client_disconnect(instance, client, 0); + } + + client = client_next; + } + + return (0); +} + int qnetd_instance_init(struct qnetd_instance *instance, enum tlv_tls_supported tls_supported, int tls_client_cert_required, size_t max_clients, @@ -62,6 +102,14 @@ qnetd_instance_init(struct qnetd_instance *instance, pr_poll_loop_init(&instance->main_poll_loop); + if (pr_poll_loop_add_pre_poll_cb(&instance->main_poll_loop, + qnetd_instance_poll_loop_pre_poll_cb, + instance, NULL) == -1) { + log(LOG_ERR, "Can't add instance pre poll loop cb"); + + return (-1); + } + return (0); } @@ -83,6 +131,11 @@ qnetd_instance_destroy(struct qnetd_instance *instance) qnetd_cluster_list_free(&instance->clusters); qnetd_client_list_free(&instance->clients); + if (pr_poll_loop_del_pre_poll_cb(&instance->main_poll_loop, + qnetd_instance_poll_loop_pre_poll_cb) == -1) { + log(LOG_WARNING, "Can't delete instance pre poll loop cb"); + } + pr_poll_loop_destroy(&instance->main_poll_loop); return (0); -- 2.39.2