From: Han Zhou Date: Mon, 19 Aug 2019 16:29:59 +0000 (-0700) Subject: raft.c: Set candidate_retrying if no leader elected since last election. X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=923f01cad678228224ae4fe86466e2f61ab2c9d0;p=mirror_ovs.git raft.c: Set candidate_retrying if no leader elected since last election. candiate_retrying is used to determine if the current node is disconnected from the cluster when the node is in candiate role. However, a node can flap between candidate and follower role before a leader is elected when majority of the cluster is down, so is_connected() will flap, too, which confuses clients. This patch avoids the flapping with the help of a new member had_leader, so that if no leader was elected since last election, we know we are still retrying, and keep as disconnected from the cluster. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 71c5a7e1a..ca86830f3 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -285,8 +285,13 @@ struct raft { /* Candidates only. Reinitialized at start of election. */ int n_votes; /* Number of votes for me. */ - bool candidate_retrying; /* The first round of election timed-out and it - is now retrying. */ + + /* Followers and candidates only. */ + bool candidate_retrying; /* The earlier election timed-out and we are + now retrying. */ + bool had_leader; /* There has been leader elected since last + election initiated. This is to help setting + candidate_retrying. */ }; /* All Raft structures. */ @@ -344,6 +349,7 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); static void raft_run_reconfigure(struct raft *); +static void raft_set_leader(struct raft *, const struct uuid *sid); static struct raft_server * raft_find_server(const struct raft *raft, const struct uuid *sid) { @@ -996,11 +1002,13 @@ raft_get_sid(const struct raft *raft) bool raft_is_connected(const struct raft *raft) { - return (!(raft->role == RAFT_CANDIDATE && raft->candidate_retrying) + bool ret = (!raft->candidate_retrying && !raft->joining && !raft->leaving && !raft->left && !raft->failed); + VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false"); + return ret; } /* Returns true if 'raft' is the cluster leader. */ @@ -1615,8 +1623,11 @@ raft_start_election(struct raft *raft, bool leadership_transfer) } ovs_assert(raft->role != RAFT_LEADER); - raft->candidate_retrying = (raft->role == RAFT_CANDIDATE); raft->role = RAFT_CANDIDATE; + /* If there was no leader elected since last election, we know we are + * retrying now. */ + raft->candidate_retrying = !raft->had_leader; + raft->had_leader = false; raft->n_votes = 0; @@ -2485,6 +2496,14 @@ raft_server_init_leader(struct raft *raft, struct raft_server *s) s->replied = false; } +static void +raft_set_leader(struct raft *raft, const struct uuid *sid) +{ + raft->leader_sid = *sid; + raft->had_leader = true; + raft->candidate_retrying = false; +} + static void raft_become_leader(struct raft *raft) { @@ -2497,7 +2516,7 @@ raft_become_leader(struct raft *raft) ovs_assert(raft->role != RAFT_LEADER); raft->role = RAFT_LEADER; - raft->leader_sid = raft->sid; + raft_set_leader(raft, &raft->sid); raft_reset_election_timer(raft); raft_reset_ping_timer(raft); @@ -2891,7 +2910,7 @@ raft_update_leader(struct raft *raft, const struct uuid *sid) raft_get_nickname(raft, sid, buf, sizeof buf), raft->term); } - raft->leader_sid = *sid; + raft_set_leader(raft, sid); /* Record the leader to the log. This is not used by the algorithm * (although it could be, for quick restart), but it is used for diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index 0fbb2681f..c8532fa27 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -66,23 +66,30 @@ EXECUTION_EXAMPLES AT_BANNER([OVSDB - disconnect from cluster]) OVS_START_SHELL_HELPERS -# ovsdb_test_cluster_disconnect LEADER_OR_FOLLOWER +# ovsdb_test_cluster_disconnect N_SERVERS LEADER_OR_FOLLOWER [CHECK_FLAPPING] +# Test server disconnected from the cluster. +# N_SERVERS: Number of servers in the cluster. +# LEADER_OR_FOLLOWER: The role of the server that is disconnected from the +# cluster: "leader" or "follower". +# CHECK_FLAPPING: Whether to check if is_disconnected flapped. "yes", "no". ovsdb_test_cluster_disconnect () { - leader_or_follower=$1 + n=$1 + leader_or_follower=$2 + check_flapping=$3 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ordinal_schema > schema AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) cid=`ovsdb-tool db-cid s1.db` schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` - for i in `seq 2 3`; do + for i in `seq 2 $n`; do AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) done on_exit 'kill `cat *.pid`' - for i in `seq 3`; do + for i in `seq $n`; do AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) done - for i in `seq 3`; do + for i in `seq $n`; do AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) done @@ -96,14 +103,18 @@ ovsdb_test_cluster_disconnect () { # a VIP on a load-balance. So we use single remote to test here. if test $leader_or_follower == "leader"; then target=1 - shutdown="2 3" + shutdown=`seq $(($n/2 + 1)) $n` + cleanup=`seq $(($n/2))` else - target=3 + target=$n - # shutdown follower before the leader so that there is no chance for s3 - # become leader during the process. - shutdown="2 1" + # shutdown followers before the leader (s1) so that there is no chance for + # s$n to become leader during the process. + shutdown="`seq 2 $(($n/2 + 1))` 1" + cleanup=`seq $(($n/2 + 2)) $n` fi + echo shutdown=$shutdown + echo cleanup=$cleanup # Connect to $target. Use "wait" to trigger a non-op transaction so # that test-ovsdb will not quit. @@ -119,6 +130,11 @@ ovsdb_test_cluster_disconnect () { OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log]) + # Start collecting raft_is_connected logs for $target before shutting down + # any servers. + tail -f s$target.log > raft_is_connected.log & + echo $! > tail.pid + # Shutdown the other servers so that $target is disconnected from the cluster. for i in $shutdown; do OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) @@ -127,18 +143,40 @@ ovsdb_test_cluster_disconnect () { # The test-ovsdb should detect the disconnect and retry. OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log]) - OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$target], [s$target.pid]) + # The $target debug log should show raft_is_connected: false. + OVS_WAIT_UNTIL([grep "raft_is_connected: false" raft_is_connected.log]) + + # Save the current count of "raft_is_connected: true" + count_old=`grep "raft_is_connected: true" raft_is_connected.log | wc -l` + echo count_old $count_old + + if test X$check_flapping == X"yes"; then + sleep 10 + fi + # Make sure raft_is_connected didn't flap from false to true. + count_new=`grep "raft_is_connected: true" raft_is_connected.log | wc -l` + echo count_new $count_new + AT_CHECK([test $count_new == $count_old]) + + for i in $cleanup; do + OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) + done } OVS_END_SHELL_HELPERS AT_SETUP([OVSDB cluster - follower disconnect from cluster, single remote]) AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) -ovsdb_test_cluster_disconnect follower +ovsdb_test_cluster_disconnect 3 follower AT_CLEANUP AT_SETUP([OVSDB cluster - leader disconnect from cluster, single remote]) AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) -ovsdb_test_cluster_disconnect leader +ovsdb_test_cluster_disconnect 3 leader +AT_CLEANUP + +AT_SETUP([OVSDB cluster - leader disconnect from cluster, check flapping]) +AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) +ovsdb_test_cluster_disconnect 5 leader yes AT_CLEANUP