1 OVS_START_SHELL_HELPERS
2 # ovsdb_check_cluster N_SERVERS SCHEMA_FUNC OUTPUT TRANSACTION...
3 ovsdb_check_cluster () {
4 local n=$1 schema_func=$2 output=$3
8 schema=`ovsdb-tool schema-name schema`
9 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr])
10 AT_CHECK([grep -v 'from ephemeral to persistent' stderr], [1])
11 cid=`ovsdb-tool db-cid s1.db`
12 for i in `seq 2 $n`; do
13 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft unix:s1.raft])
16 on_exit 'kill `cat *.pid`'
18 AT_CHECK([ovsdb-server -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
21 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema connected])
26 AT_CHECK([ovsdb-client -vjsonrpc -vconsole:off -vsyslog:off -vvlog:off --log-file transact unix:s1.ovsdb,unix:s2.ovsdb,unix:s3.ovsdb "$txn"], [0], [stdout])
29 AT_CHECK_UNQUOTED([uuidfilt output], [0], [$output])
31 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
34 AT_CHECK([ovsdb-tool check-cluster s*.db])
38 # Test a 1-server cluster.
39 AT_BANNER([OVSDB - clustered transactions (1 server)])
40 m4_define([OVSDB_CHECK_EXECUTION],
41 [AT_SETUP([$1 - cluster of 1])
42 AT_KEYWORDS([ovsdb server positive unix cluster cluster1 $5])
43 ovsdb_check_cluster 1 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
47 # Test a 3-server cluster.
48 AT_BANNER([OVSDB - clustered transactions (3 servers)])
49 m4_define([OVSDB_CHECK_EXECUTION],
50 [AT_SETUP([$1 - cluster of 3])
51 AT_KEYWORDS([ovsdb server positive unix cluster cluster3 $5])
52 ovsdb_check_cluster 3 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
56 # Test a 5-server cluster.
57 AT_BANNER([OVSDB - clustered transactions (5 servers)])
58 m4_define([OVSDB_CHECK_EXECUTION],
59 [AT_SETUP([$1 - cluster of 5])
60 AT_KEYWORDS([ovsdb server positive unix cluster cluster5 $5])
61 ovsdb_check_cluster 5 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
66 AT_BANNER([OVSDB - disconnect from cluster])
68 OVS_START_SHELL_HELPERS
69 # ovsdb_test_cluster_disconnect N_SERVERS LEADER_OR_FOLLOWER [CHECK_FLAPPING]
70 # Test server disconnected from the cluster.
71 # N_SERVERS: Number of servers in the cluster.
72 # LEADER_OR_FOLLOWER: The role of the server that is disconnected from the
73 # cluster: "leader" or "follower".
74 # CHECK_FLAPPING: Whether to check if is_disconnected flapped. "yes", "no".
75 ovsdb_test_cluster_disconnect () {
79 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
80 ordinal_schema > schema
81 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
82 cid=`ovsdb-tool db-cid s1.db`
83 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
84 for i in `seq 2 $n`; do
85 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
88 on_exit 'kill `cat *.pid`'
90 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
93 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
96 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
99 "row": {"i": 1}}]]'], [0], [ignore], [ignore])
101 # When a node is disconnected from the cluster, the IDL should disconnect
102 # and retry even if it uses a single remote, because the remote IP can be
103 # a VIP on a load-balance. So we use single remote to test here.
104 if test $leader_or_follower == "leader"; then
106 shutdown=`seq $(($n/2 + 1)) $n`
107 cleanup=`seq $(($n/2))`
111 # shutdown followers before the leader (s1) so that there is no chance for
112 # s$n to become leader during the process.
113 shutdown="`seq 2 $(($n/2 + 1))` 1"
114 cleanup=`seq $(($n/2 + 2)) $n`
116 echo shutdown=$shutdown
117 echo cleanup=$cleanup
119 # Connect to $target. Use "wait" to trigger a non-op transaction so
120 # that test-ovsdb will not quit.
122 test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl unix:s$target.ovsdb '[["idltest",
125 "where": [["i", "==", 1]],
128 "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
129 echo $! > test-ovsdb.pid
131 OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
133 # Start collecting raft_is_connected logs for $target before shutting down
135 tail -f s$target.log > raft_is_connected.log &
138 # Shutdown the other servers so that $target is disconnected from the cluster.
139 for i in $shutdown; do
140 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
143 # The test-ovsdb should detect the disconnect and retry.
144 OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
146 # The $target debug log should show raft_is_connected: false.
147 OVS_WAIT_UNTIL([grep "raft_is_connected: false" raft_is_connected.log])
149 # Save the current count of "raft_is_connected: true"
150 count_old=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
151 echo count_old $count_old
153 if test X$check_flapping == X"yes"; then
156 # Make sure raft_is_connected didn't flap from false to true.
157 count_new=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
158 echo count_new $count_new
159 AT_CHECK([test $count_new == $count_old])
161 for i in $cleanup; do
162 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
165 OVS_END_SHELL_HELPERS
167 AT_SETUP([OVSDB cluster - follower disconnect from cluster, single remote])
168 AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
169 ovsdb_test_cluster_disconnect 3 follower
172 AT_SETUP([OVSDB cluster - leader disconnect from cluster, single remote])
173 AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
174 ovsdb_test_cluster_disconnect 3 leader
177 AT_SETUP([OVSDB cluster - leader disconnect from cluster, check flapping])
178 AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
179 ovsdb_test_cluster_disconnect 5 leader yes
182 AT_SETUP([OVSDB cluster - initial status should be disconnected])
183 AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
186 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
187 ordinal_schema > schema
188 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
189 cid=`ovsdb-tool db-cid s1.db`
190 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
191 for i in `seq 2 $n`; do
192 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
195 on_exit 'kill `cat *.pid`'
196 for i in `seq $n`; do
197 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
199 for i in `seq $n`; do
200 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
203 # Stop all servers, and start the s1 only, to test initial connection status
204 # when there is no leader yet.
205 for i in `seq 1 $n`; do
206 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
209 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
211 # The initial status should be disconnected. So wait should fail.
212 AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore])
213 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
219 AT_BANNER([OVSDB cluster election timer change])
221 AT_SETUP([OVSDB cluster - election timer change])
222 AT_KEYWORDS([ovsdb server positive unix cluster timer])
225 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
226 ordinal_schema > schema
227 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
228 cid=`ovsdb-tool db-cid s1.db`
229 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
230 for i in `seq 2 $n`; do
231 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
234 on_exit 'kill `cat *.pid`'
235 for i in `seq $n`; do
236 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
238 for i in `seq $n`; do
239 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
242 # Change not allowed through follower.
243 AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/change-election-timer $schema_name 2000], [2], [], [ignore])
245 # Timer cannot be changed to bigger than 2x the original value.
246 AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [2], [], [ignore])
248 AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 2000], [0], [dnl
249 change of election timer initiated.
251 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 2000"])
252 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 2000"])
254 AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [0], [dnl
255 change of election timer initiated.
257 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"])
258 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"])
260 # Latest timer should be used after restart
261 for i in `seq $n`; do
262 printf "\ns$i: stopping\n"
263 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
265 for i in `seq $n`; do
266 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
268 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"])
269 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"])
271 # Wait until cluster is ready
272 for i in `seq $n`; do
273 OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Leader: unknown"])
276 # Latest timer should be restored after DB compact and restart.
277 # This is to test the install_snapshot RPC.
280 for i in `seq $n`; do
281 AT_CHECK([ovs-appctl -t "`pwd`"/s$i ovsdb-server/compact])
284 for i in `seq $n`; do
285 printf "\ns$i: stopping\n"
286 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
288 for i in `seq $n`; do
289 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
291 for i in `seq $n`; do
292 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Election timer: 4000"])
295 # Wait until cluster is ready
296 for i in `seq $n`; do
297 OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Leader: unknown"])
300 # Newly joined member should use latest timer value
301 AT_CHECK([ovsdb-tool join-cluster s4.db $schema_name unix:s4.raft unix:s1.raft])
302 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s4.log --pidfile=s4.pid --unixctl=s4 --remote=punix:s4.ovsdb s4.db])
303 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Election timer: 4000"])
304 # for i in `seq 10`; do
305 # ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name
312 AT_BANNER([OVSDB cluster install snapshot RPC])
314 AT_SETUP([OVSDB cluster - install snapshot RPC])
315 AT_KEYWORDS([ovsdb server positive unix cluster snapshot])
318 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
319 ordinal_schema > schema
320 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
321 cid=`ovsdb-tool db-cid s1.db`
322 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
323 for i in `seq 2 $n`; do
324 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
327 on_exit 'kill `cat *.pid`'
328 for i in `seq $n`; do
329 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
331 for i in `seq $n`; do
332 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
335 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
338 "row": {"i": 0}}]]'], [0], [ignore], [ignore])
340 # Kill one follower (s2) and write some data to cluster, so that the follower is falling behind
341 printf "\ns2: stopping\n"
342 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s2], [s2.pid])
344 # Delete "i":0 and readd it to get a different UUID for it.
345 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
348 "where": [["i", "==", 0]]}]]'], [0], [ignore], [ignore])
350 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
353 "row": {"i": 0}}]]'], [0], [ignore], [ignore])
355 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
358 "row": {"i": 1}}]]'], [0], [ignore], [ignore])
360 # Compact leader online to generate snapshot
361 AT_CHECK([ovs-appctl -t "`pwd`"/s1 ovsdb-server/compact])
363 # Start the follower s2 again.
364 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s2.log --pidfile=s2.pid --unixctl=s2 --remote=punix:s2.ovsdb s2.db])
365 AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name connected])
367 # A client transaction through s2. During this transaction, there will be a
368 # install_snapshot RPC because s2 detects it is behind and s1 doesn't have the
369 # pre_log_index requested by s2 because it is already compacted.
370 # After the install_snapshot RPC process, the transaction through s2 should
372 AT_CHECK([ovsdb-client transact unix:s2.ovsdb '[["idltest",
375 "row": {"i": 2}}]]'], [0], [ignore], [ignore])
377 # The snapshot should overwrite the in-memory contents of the DB on S2
378 # without generating any constraint violations. All tree records (0, 1, 2)
379 # should be in the DB at this point.
380 AT_CHECK([ovsdb-client --no-headings dump unix:s2.ovsdb idltest indexed | uuidfilt | sort -k 2], [0], [dnl
387 for i in `seq $n`; do
388 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
395 OVS_START_SHELL_HELPERS
396 # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
397 ovsdb_cluster_failure_test () {
398 # Initial state: s1 is leader, s2 and s3 are followers
403 if test "$crash_node" == "1"; then
407 cp $top_srcdir/vswitchd/vswitch.ovsschema schema
408 schema=`ovsdb-tool schema-name schema`
409 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
410 ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
417 for j in `seq 1 $n`; do
418 if test $i != $j; then
419 others="$others unix:s$j.raft"
422 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
426 printf "\ns$i: starting\n"
427 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
431 printf "\ns$i: waiting to connect to storage\n"
432 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
434 cid=`ovsdb-tool db-cid s1.db`
435 for i in `seq 2 $n`; do join_cluster $i; done
437 on_exit 'kill `cat *.pid`'
438 for i in `seq $n`; do start_server $i; done
439 for i in `seq $n`; do connect_server $i; done
441 db=unix:s$remote_1.ovsdb,unix:s$remote_2.ovsdb
443 # To ensure $new_leader node the new leader, we delay election timer for
444 # the other follower.
445 if test -n "$new_leader"; then
446 if test "$new_leader" == "2"; then
447 delay_election_node=3
449 delay_election_node=2
451 AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore])
453 AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore])
454 AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore])
456 # Make sure that the node really crashed.
457 AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore])
458 # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed).
459 if test "$remote_1" = "$crash_node"; then
460 db=unix:s$remote_2.ovsdb
462 AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x
465 OVS_END_SHELL_HELPERS
466 AT_BANNER([OVSDB - cluster failure with pending transaction])
468 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-2 becomes leader])
469 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
470 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 2
473 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-3 becomes leader])
474 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
475 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 3
478 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-2 becomes leader])
479 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
480 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 2
483 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-3 becomes leader])
484 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
485 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 3
488 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-2 becomes leader])
489 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
490 ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 2
493 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-3 becomes leader])
494 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
495 ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 3
498 AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-2 becomes leader])
499 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
500 ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 2
503 AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-3 becomes leader])
504 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
505 ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 3
508 AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-2 becomes leader])
509 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
510 # XXX: Detect and skip repeated transaction before enabling this test
512 ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 2
515 AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-3 becomes leader])
516 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
517 # XXX: Detect and skip repeated transaction before enabling this test
519 ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 3
522 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to follower-3])
523 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
524 ovsdb_cluster_failure_test 2 3 2 crash-before-sending-execute-command-request
527 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to leader])
528 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
529 ovsdb_cluster_failure_test 2 1 2 crash-before-sending-execute-command-request
532 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to follower-3])
533 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
534 # XXX: Detect and skip repeated transaction before enabling this test
536 ovsdb_cluster_failure_test 2 3 2 crash-after-sending-execute-command-request
539 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to leader])
540 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
541 # XXX: Detect and skip repeated transaction before enabling this test
543 ovsdb_cluster_failure_test 2 1 2 crash-after-sending-execute-command-request
546 AT_SETUP([OVSDB cluster - txn on leader, follower-2 crash after receiving appendReq for the update])
547 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
548 ovsdb_cluster_failure_test 1 1 2 crash-after-receiving-append-request-update
551 AT_SETUP([OVSDB cluster - txn on follower-2, follower-3 crash after receiving appendReq for the update])
552 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
553 ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update
557 AT_SETUP([OVSDB cluster - competing candidates])
558 AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates])
561 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
562 ordinal_schema > schema
563 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
564 cid=`ovsdb-tool db-cid s1.db`
565 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
566 for i in `seq 2 $n`; do
567 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
570 on_exit 'kill `cat *.pid`'
571 for i in `seq $n`; do
572 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
574 for i in `seq $n`; do
575 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
578 # We need to simulate the situation when 2 candidates starts election with same
581 # Before triggering leader election, tell follower s2 don't send vote request (simulating
582 # vote-request lost or not handled in time), and tell follower s3 to delay
583 # election timer to make sure s3 doesn't send vote-request before s2 enters
585 AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/failure-test dont-send-vote-request], [0], [ignore])
586 AT_CHECK([ovs-appctl -t "`pwd`"/s3 cluster/failure-test delay-election], [0], [ignore])
588 # Restart leader, which will become follower, and both old followers will start
589 # election as candidate. The new follower (old leader) will vote one of them,
590 # and the other candidate should step back as follower as again.
592 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s1.log --pidfile=s1.pid --unixctl=s1 --remote=punix:s1.ovsdb s1.db])
594 # Tell s1 to delay election timer so that it won't start election before s3
596 AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/failure-test delay-election], [0], [ignore])
598 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Term: 2"])
600 for i in `seq $n`; do
601 OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "candidate"])
602 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
605 for i in `seq $n`; do
606 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
612 AT_BANNER([OVSDB - cluster tests])
615 OVS_START_SHELL_HELPERS
616 ovsdb_torture_test () {
617 local n=$1 # Number of cluster members
618 local victim=$2 # Cluster member to kill or remove
619 local variant=$3 # 'kill' and restart or 'remove' and add
620 cp $top_srcdir/vswitchd/vswitch.ovsschema schema
621 schema=`ovsdb-tool schema-name schema`
622 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
623 ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
629 for j in `seq 1 $n`; do
630 if test $i != $j; then
631 others="$others unix:s$j.raft"
634 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
639 printf "\ns$i: starting\n"
640 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
644 printf "\ns$i: stopping\n"
645 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
649 printf "\ns$i: waiting to connect to storage\n"
650 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
654 printf "\ns$i: removing from cluster\n"
655 AT_CHECK([ovs-appctl -t "`pwd`"/s$i cluster/leave Open_vSwitch])
656 printf "\ns$i: waiting for removal to complete\n"
657 AT_CHECK([ovsdb_client_wait --log-file=remove$i.log unix:s$i.ovsdb $schema removed])
668 cid=`ovsdb-tool db-cid s1.db`
669 for i in `seq 2 $n`; do join_cluster $i; done
671 on_exit 'kill `cat *.pid`'
672 for i in `seq $n`; do start_server $i; done
673 for i in `seq $n`; do connect_server $i; done
676 for i in `seq 2 $n`; do
677 db=$db,unix:s$i.ovsdb
681 echo "starting $n1*$n2 ovs-vsctl processes..."
682 for i in $(seq 0 $(expr $n1 - 1) ); do
683 (for j in $(seq $n2); do
685 txn="add Open_vSwitch . external_ids $i-$j=$i-$j"
686 for k in $(seq $n3); do
687 txn="$txn -- add Open_vSwitch . external_ids $i-$j-$k=$i-$j-$k"
689 run_as "ovs-vsctl($i-$j)" ovs-vsctl "-vPATTERN:console:ovs-vsctl($i-$j)|%D{%H:%M:%S}|%05N|%c|%p|%m" --log-file=$i-$j.log -vfile -vsyslog:off -vtimeval:off --timeout=120 --db="$db" --no-leader-only --no-wait $txn
691 if test $status != 0; then
692 echo "$i-$j exited with status $status" > $i-$j:$status
700 echo "waiting for ovs-vsctl processes to exit..."
701 # Use file instead of var because code inside "while" runs in a subshell.
704 (while :; do echo; sleep 0.1; done) | while read REPLY; do
707 for j in $(seq 0 $(expr $n1 - 1)); do
708 if test -f $j.done; then
710 done=$(expr $done + 1)
714 if test $done = $n1; then
718 case $(cat phase) in # (
720 if test $done -ge $(expr $n1 / 10); then
721 if test $variant = kill; then
724 remove_server $victim
731 if test $i -ge $next; then
732 if test $variant = kill; then
734 connect_server $victim
746 AT_CHECK([if test $(cat phase) != 2; then exit 77; fi])
748 for i in $(seq 0 $(expr $n1 - 1) ); do
749 for j in `seq $n2`; do
751 for k in `seq $n3`; do
752 echo "$i-$j-$k=$i-$j-$k"
756 AT_CHECK([ovs-vsctl --db="$db" --no-wait --log-file=finalize.log -vtimeval:off -vfile -vsyslog:off --bare get Open_vSwitch . external-ids | tr ',' '\n' | sed 's/[[{}"" ]]//g' | sort], [0], [expout])
758 for i in `seq $n`; do
759 if test $i != $victim || test $(cat phase) != 1; then
764 # We ignore stdout because non-fatal warnings get printed there.
765 AT_CHECK([ovsdb-tool check-cluster s*.db], [0], [ignore])
767 OVS_END_SHELL_HELPERS
769 AT_SETUP([OVSDB 3-server torture test - kill/restart leader])
770 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
771 ovsdb_torture_test 3 1 kill
773 AT_SETUP([OVSDB 3-server torture test - kill/restart follower 1])
774 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
775 ovsdb_torture_test 3 2 kill
777 AT_SETUP([OVSDB 3-server torture test - kill/restart follower 2])
778 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
779 ovsdb_torture_test 3 3 kill
781 AT_SETUP([OVSDB 5-server torture test - kill/restart leader])
782 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
783 ovsdb_torture_test 5 1 kill
785 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 1])
786 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
787 ovsdb_torture_test 5 2 kill
789 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 2])
790 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
791 ovsdb_torture_test 5 3 kill
793 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 3])
794 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
795 ovsdb_torture_test 5 4 kill
797 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 4])
798 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
799 ovsdb_torture_test 5 5 kill
802 AT_SETUP([OVSDB 3-server torture test - remove/re-add leader])
803 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
804 ovsdb_torture_test 3 1 remove
806 AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 1])
807 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
808 ovsdb_torture_test 3 2 remove
810 AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 2])
811 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
812 ovsdb_torture_test 3 3 remove
814 AT_SETUP([OVSDB 5-server torture test - remove/re-add leader])
815 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
816 ovsdb_torture_test 5 1 remove
818 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 1])
819 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
820 ovsdb_torture_test 5 2 remove
822 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 2])
823 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
824 ovsdb_torture_test 5 3 remove
826 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 3])
827 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
828 ovsdb_torture_test 5 4 remove
830 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 4])
831 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
832 ovsdb_torture_test 5 5 remove